嗨,我有多个文件具有相同的行但不同的数据。我想根据日期匹配数据;所有文件中的时间,并将输出作为某些特定行的聚合。
文件1
DATE TIME A B C D
20140402 00:15:26 21 50 30 60
20140402 00:20:04 23 54 40 70
20140402 00:25:04 25 52 50 80
文件2
DATE TIME A B C D
20140402 00:15:21 40 60 60 70
20140402 00:20:29 50 61 70 80
20140402 00:25:22 60 63 80 90
20140402 00:30:26 70 70 90 40
20140402 00:35:23 80 80 65 50
20140402 00:40:27 50 65 76 60
文件3
DATE TIME A B C D
20140402 00:15:24 10 50 10 50
20140402 00:20:03 20 60 40 60
20140402 00:25:03 30 70 50 70
20140402 00:30:24 10 80 70 80
20140402 00:35:03 50 90 80 10
输出:
DATE TIME B D
20140402 00:15 160 180
20140402 00:20 175 210
20140402 00:25 185 240
20140402 00:30 150 120
20140402 00:35 170 130
20140402 00:40 65 60
$ gawk '/2014/ { print $1, substr($2, 0, 5), $4, $6 }' < ?.txt |
sort |
gawk '{ B[$1 " " $2] += $3; D[$1 " " $2] += $4 } END {for (k in B) {print k, B[k], D[k]}}' |
sort
20140402 00:15 160 180
20140402 00:20 175 210
20140402 00:25 185 240
20140402 00:30 150 120
20140402 00:35 170 60
20140402 00:40 65 60
这可能有效:
awk 'FNR!=1 {split($2,a,":");arr_b[$1 FS a[1]":"a[2]]+=$4;arr_d[$1 FS a[1]":"a[2]]+=$6} END {for (i in arr_b) print i,arr_b[i],arr_d[i]}' OFS="t" file?
20140402 00:25 185 240
20140402 00:35 170 60
20140402 00:20 175 210
20140402 00:30 150 120
20140402 00:40 65 60
20140402 00:15 160 180
排序:
awk 'FNR!=1 {split($2,a,":");arr_b[$1 FS a[1]":"a[2]]+=$4;arr_d[$1 FS a[1]":"a[2]]+=$6} END {for (i in arr_b) print i,arr_b[i],arr_d[i]}' OFS="t" file? | sort -k1 -k2
20140402 00:15 160 180
20140402 00:20 175 210
20140402 00:25 185 240
20140402 00:30 150 120
20140402 00:35 170 60
20140402 00:40 65 60
file?
可用file1 file2 file3
代替
它是如何工作的:
awk '
FNR!=1 { # Do this for all line except header
split($2,a,":") # Split the time field into array "a"
arr_b[$1 FS a[1]":"a[2]]+=$4 # Sum value of "B" column into array "arr_b"
arr_d[$1 FS a[1]":"a[2]]+=$6 # Sum value of "D" column into array "arr_d"
}
END {
for (i in arr_b) # Loop trough all element in array "arr_b"
print i,arr_b[i],arr_d[i] # Print the value of index, "arr_b" and "arr_d"
}
' OFS="t" file? # Set output field separator to tab and read the files