用 (g)awk 注释 (c) 源代码中变量的最后使用



我有一个带有c源代码的文件,例如

func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));  
func(&x13, &x14, 0x0, x12, x9);     
func(&x17, &x18, x16, x8, x5);
uint64_t x19 = (x18 + x6); 
func2(&x20, &x21, x11, 0xff);
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22);
uint64_t x28 = (x27 + x23);
func(&x29, &x30, 0x0, x11, x24);
func(&x31, &x32, x30, x13, x26);
func(&x33, &x34, x32, x15, x28);

我想注释变量的最后用法。如:

func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));   // 4,7
func(&x17, &x18, x16, x8, x5);    // 5,8,16,17
uint64_t x19 = (x18 + x6);        // 6,19
func2(&x20, &x21, x11, 0xff);     // 21,20
func2(&x24, &x25, x11, 0xff11));   
func(&x26, &x27, 0x0, x25, x22);  // 25,22
uint64_t x28 = (x27 + x23);       // 23,27,28
func(&x29, &x30, 0x0, x11, x24);  // 24,11,29
func(&x31, &x32, x30, x13, x26);  // 26,13,30,31

注释列出了所有变量,下面未使用。(这背后的语义:这些可以在之后重用/释放。

变量遵循正则表达式/x([0-9]){1,3}/。 我尝试使用以下cmd通过tac将该文件输入gawktac file.c | gawk ' match($0,/x([0-9]){1,3}/,a) && ! seen[a[0]] {printf "%s// %sn",$0,a[0];seen[a[0]]=1;}{print}' |tac产生

func2(&x5, &x6, x4, (arg1[3]));
func2(&x5, &x6, x4, (arg1[3]));// x5
func2(&x7, &x8, x4, (arg1[2]));  
func2(&x7, &x8, x4, (arg1[2]));  // x7
func(&x13, &x14, 0x0, x12, x9);     
func(&x13, &x14, 0x0, x12, x9);     // x13
func(&x17, &x18, x16, x8, x5);
func(&x17, &x18, x16, x8, x5);// x17
uint64_t x19 = (x18 + x6); 
uint64_t x19 = (x18 + x6); // x19
func2(&x20, &x21, x11, 0xff);
func2(&x20, &x21, x11, 0xff);// x20
func2(&x24, &x25, x11, 0xff11));
func2(&x24, &x25, x11, 0xff11));// x24
func(&x26, &x27, 0x0, x25, x22);
func(&x26, &x27, 0x0, x25, x22);// x26
uint64_t x28 = (x27 + x23);
uint64_t x28 = (x27 + x23);// x28
func(&x29, &x30, 0x0, x11, x24);
func(&x29, &x30, 0x0, x11, x24);// x29
func(&x31, &x32, x30, x13, x26);
func(&x31, &x32, x30, x13, x26);// x31
func(&x33, &x34, x32, x15, x28);
func(&x33, &x34, x32, x15, x28);// x33

我已经很接近了,但显然这不是我想要的。

  • 如何全球化match匹配,使其识别函数调用中的所有匹配项?

  • 避免两次打印行?

最大的问题是match()只找到正则表达式的第一个匹配项。您必须反复遍历每一行才能找到其中的所有变量。

如果您读取一次文件以查找变量的用法,然后再次读取它以根据第一次传递数据收集打印出上次使用的条目,则可以在 gawk 中完成。为方便起见,此 shell 脚本将其包装,因此您不必手动指定源文件两次:

#!/bin/sh
gawk '
NR == FNR {
s = $0
while (match(s, /<x([0-9]+)>/, a)) {
seen[a[1]] = FNR
s = substr(s, RSTART + RLENGTH)
}
next
}
{
s = $0
lasts = ""
while (match(s, /<x([0-9]+)>/, a)) {
if (seen[a[1]] == FNR) {
if (lasts == "")
lasts = a[1]
else
lasts = lasts "," a[1]
}
s = substr(s, RSTART + RLENGTH)
}
if (lasts == "")
print $0
else
printf "%st// %sn", $0, lasts;
}
' "$1" "$1"

用法示例:

$ ./lastvars foo.c
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));     // 7,4
func(&x13, &x14, 0x0, x12, x9);         // 14,12,9
func(&x17, &x18, x16, x8, x5);  // 17,16,8,5
uint64_t x19 = (x18 + x6);  // 19,18,6
func2(&x20, &x21, x11, 0xff);   // 20,21
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22);    // 25,22
uint64_t x28 = (x27 + x23); // 27,23
func(&x29, &x30, 0x0, x11, x24);    // 29,11,24
func(&x31, &x32, x30, x13, x26);    // 31,30,13,26
func(&x33, &x34, x32, x15, x28);    // 33,34,32,15,28

最新更新