===>user.application.2020-01-16-00-00.csv
user1,app1
user1,app2
user2,app1
user3,app1
===>user.application.2020-01-16-00-30.csv
user1,app1
user2,app1
user2,app4
user10,app2
user10,app1
user4,app5
我想要如下输出,应用程序后面跟着不同数量的用户
app1,4
app2,2
app4,1
app5,1
将所有组合存储在一个数组中,并打印数组的长度。
awk -F, '{a[$2][$1]} END { for (i in a) { print i "," length(a[i]) } }' *.csv
我修改了您的输入样本,使输入的处理明显明确,它不进行自动排序。
#!/bin/sh
DBG=0
BASE=`basename "$0" ".sh" `
TEST_INPUT1="${BASE}_input_1.txt"
TEST_INPUT2="${BASE}_input_2.txt"
cat >"${TEST_INPUT1}" <<-!EnDoFiNpUt
user1,app1
user10,app2
user1,app2
user2,app1
user3,app1
!EnDoFiNpUt
cat >"${TEST_INPUT2}" <<-!EnDoFiNpUt
user1,app1
user2,app1
user2,app5
user10,app2
user10,app1
user4,app4
!EnDoFiNpUt
cat "${TEST_INPUT1}" "${TEST_INPUT2}" |
awk -F "," -v dbg="${DBG}" 'BEGIN{
### initialize arrays
split("", apps ) ;
split("", usage ) ;
split("", users ) ;
### initialize arrays counter
indexApps=0 ;
indexUsers=0 ;
}
{
if( dbg == 1 ){ print "nLINE: ", $0 ; } ;
hit=0 ;
if( indexApps == 0 ){
indexApps=1 ;
apps[1]=$2 ;
if( dbg == 1 ){ print "t [0] new app -> ", apps[1] ; } ;
indexUsers=1 ;
usage[ indexApps, 1 ]=$2 ;
usage[ indexApps, 2 ]=1 ;
if( dbg == 1 ){ print "t [0]", apps[ indexApps ], " -> ", usage[ indexApps, 2 ] ; } ;
users[ indexApps, 1 ]=indexUsers ;
users[ indexApps, indexUsers+1 ]=$1 ;
if( dbg == 1 ){ print "t [0] users[", indexApps, " , ", indexUsers+1 " ] -> ", users[ indexApps, indexUsers+1 ] ; } ;
}else{
for( i=1 ; i <= indexApps ; i++ ){
if( $2 == apps[i] ){
hit=1 ;
if( dbg == 1 ){ print "t [1] users[i,1] = ", users[i,1] ; } ;
hitU=0 ;
for( j=1 ; j <= users[i, 1] ; j++ ){
if( users[i,j+1] == $1 ){
hitU=1 ;
if( dbg == 1 ){ print "t [1] exists -> ", users[i,j+1] ; } ;
break ;
} ;
} ;
if( hitU == 0 ){
if( dbg == 1 ){ print "t [1] Hit: usage BEFORE ", apps[i], " -> ", usage[i,2] ; } ;
usage[i,2]++ ;
if( dbg == 1 ){ print "t [1] Hit: ", apps[i], " -> ", usage[i,2] ; } ;
indexUsers=users[i,1]+1 ;
if( dbg == 1 ){ print "t [1] users[i,1] + 1 = ", indexUsers ; } ;
users[ i, 1 ]= indexUsers ;
users[ i, indexUsers+1 ]=$1 ;
if( dbg == 1 ){ print "t [1] users[", i, " , ", indexUsers+1 " ] -> ", users[ i, indexUsers+1 ] ; } ;
} ;
break ;
} ;
} ;
if( hit == 0 ){
if( dbg == 1 ){ print "t [2] NO hit ------------------------------- START" ; } ;
indexApps++ ;
apps[ indexApps ]=$2 ;
if( dbg == 1 ){ print "t [2] new app -> ", apps[ indexApps ] ; } ;
usage[ indexApps, 1 ]=$2 ;
usage[ indexApps, 2 ]=1 ;
if( dbg == 1 ){ print "t [2]", apps[ indexApps ], " -> ", usage[ indexApps, 2 ] ; } ;
indexUsers=users[ indexApps, 1 ]+1 ;
if( dbg == 1 ){ print "t [2] users[ indexApps, 1 ] + 1 = ", indexUsers ; } ;
users[ indexApps, 1 ]= indexUsers ;
users[ indexApps, indexUsers+1 ]=$1 ;
if( dbg == 1 ){ print "t [2] users[", indexApps, " , ", indexUsers+1 " ] -> ", users[ indexApps, iindexUsers+1 ] ; } ;
if( dbg == 1 ){ print "t [2] NO hit ------------------------------- END" ; } ;
} ;
} ;
}END{
print "Application Usage:" ;
for( i=1 ; i <= indexApps ; i++ ){
printf(" %s = %3dt", usage[i,1], usage[i,2] ) ;
for( j=1 ; j <= users[i,1] ; j++ ){
printf("t%s", users[i,j+1] ) ;
if( j > 10 ){ break ; } ;
} ;
print "" ;
} ;
}'
会话输出如下:
ericthered@OasisMega1:/WORKS$ ./test_47.sh
Application Usage:
app1 = 4 user1 user2 user3 user10
app2 = 2 user10 user1
app5 = 1 user2
app4 = 1 user4
ericthered@OasisMega1:/WORKS$