如何使用AWK命令输出并转换为CSV文件



我试图创建一个脚本,从autorep -j $i -q的输出中搜索某些字段,并打印出输出中提到的该字段的值。所以基本上,当脚本执行时,autorep -j $i -q将要求用户输入JOBNAME%SEARCHSTRING%,然后它将以以下格式提供作业详细信息:

/tmp $ autorep -j Test_jobA -q


insert_job: Test_jobA   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
run_calendar: Autosys_Calendar 
start_times: "09:09"
description: "test discription"
std_out_file: "/tmp/Test_jobA.out"
std_err_file: "/tmp/Test_jobA.err"
alarm_if_fail: 1
alarm_if_terminated: 1

insert_job: Test_JobB   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: mo,tu,we,th,fr 
start_times: "21:05"
description: "test discription"
std_out_file: "/tmp/Test_JobB.out"
std_err_file: "/tmp/Test_JobB.err"
alarm_if_fail: 1
alarm_if_terminated: 1
insert_job: Test_JobC  job_type: BOX
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 0
description: "test discription"
std_out_file: "/tmp/Test_JobC.out"
std_err_file: "/tmp/Test_JobC.err"
alarm_if_fail: 1
alarm_if_terminated: 1
insert_job: Test_JobD   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: su 
start_times: "08:50"
description: "test discription"
std_out_file: "/tmp/Test_JobD.out"
std_err_file: "/tmp/Test_JobD.err"
alarm_if_fail: 1
alarm_if_terminated: 1
insert_job: Test_JobE   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: su 
start_times: "08:20"
description: "test discription"
std_out_file: "/tmp/Test_JobE.out"
std_err_file: "/tmp/Test_JobE.err"
alarm_if_fail: 1
alarm_if_terminated: 1

insert_job: Test_JobF   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: all 
start_mins: 0,10,20,30,40,50
description: "test discription"
std_out_file: "/tmp/Test_JobF.out"
std_err_file: "/tmp/Test_JobF.err"
alarm_if_fail: 1
alarm_if_terminated: 1

就像你看到的,如果date_condition: 0那么作业中可能有condition:也可能没有days_of_week: start_mins: run_window: run_calendar:如果作业中有date_condition: 1那么也可能没有days_of_week: start_mins: run_window: run_calendar:

我有下面的脚本可以很好地进行过滤:

#!/bin/bash
TXT=/tmp/test1.txt
CSV=/tmp/test1.csv
echo "Enter the JOB_NAME or %SEARCHSTRING%"
while read -r i;
do
awk '
/^insert_job/ {if (flag) {printf "n"}; 
printf "%s %s ", $2, $4; 
flag = 1}; 
/^date_conditions/ {printf "%s", $2}; 
/^condition:|^days_of_week:|^run_calendar:|^start_times:|^start_mins:/ {printf "%s", $2}
' < <(autorep -j $i -q) > $TXT
break
done
if [ -s $TXT ]
then
(echo "job_name,job_type,Date_Conditions,condition,days_of_week,start_times,Start_mins" ; cat test1.txt) | sed 's/ +/,/g' > $CSV
else
echo "Please check the %SEARCHSTRING% or JOB_NAME"
fi

上面脚本中的While循环给出了以下输出:

Test_jobA CMD 1 Autosys_Calendar "09:09"
Test_JobB CMD 1 mo,tu,we,th,fr "21:05"
Test_Jobc BOX 0
Test_JobD CMD 1 su "08:50"
Test_JobE CMD 1 su "08:20"
Test_JobF CMD 1 all "02:02,04:04,06:06,08:08,10:10,12:12,14:14,16:16,18:18,20:20,22:22"

上述脚本中的IF condition用于将While loop的输出转换为.csv文件,但由于输出不接近,我在错误的列中得到错误的数据。

有没有办法让我把它弄平。

编辑:CSV文件所需的输出:

我正在寻找下面的输出,如果一个字段从作业中丢失,那么它应该打印"NA"这样。csv格式就可以在

行中
job_name     job_type  date_conditions   condition run_calendar       days_of_week    start_times    start_mins
Test_jobA      CMD             1            NA     Autosys_Calendar     NA             "09:09"        NA
Test_JobB      CMD             1            NA          NA              mo,tu,we,th,fr "21:05"        NA
Test_Jobc      BOX             0            NA          NA              NA             NA             NA
Test_JobD      CMD             1            NA          NA              su             "08:50"        NA
Test_JobE      CMD             1            NA          NA              su             "08:20"        NA
Test_JobF      CMD             1            NA          NA              all             NA          0,10,20,30,40,50

下面的awk可能是您的选择。您可以将输出重定向到.xlsx文件,或者将输出管道到column -t,以获得列格式的输出。

#!/bin/bash
awk ' 
BEGIN {
print "job_nametjob_typetdate_conditionstconditiont
run_calendartdays_of_weektstart_timeststart_mins"
}
/job_type/ { jn=$2; jt=$4; dc="NA"; c="NA"; rc="NA"; dow="NA"; st="NA"; sm="NA" }
/^date_conditions/ {dc=$2}
/^condition/ {c=$2}
/^run_calendar/ {rc=$2}
/^days_of_week/ {dow=$2}
/^start_times/ {st=$2;}
/^start_mins/ {sm=$2;}
/_if_terminated/{printf "%st%st%st%st%st%st%st%sn", jn, jt, dc, c, rc, dow, st, sm}
' job.dat

假设所有作业数据都包含在名为job.dat的文件中,下面是示例输出:

./script job.dat | column -t
job_name   job_type  date_conditions  condition  run_calendar      days_of_week    start_times  start_mins
Test_jobA  CMD       1                NA         Autosys_Calendar  NA              "09:09"      NA
Test_JobB  CMD       1                NA         NA                mo,tu,we,th,fr  "21:05"      NA
Test_JobC  BOX       0                NA         NA                NA              NA           NA
Test_JobD  CMD       1                NA         NA                su              "08:50"      NA
Test_JobE  CMD       1                NA         NA                su              "08:20"      NA
Test_JobF  CMD       1                NA         NA                all             NA           0,10,20,30,40,50

生成.csv输出的另一个脚本版本:

#!/bin/bash
data_file="${1}"
awk ' 
BEGIN {
print ""job_name","job_type","date_conditions","condition","run_calendar","days_of_week","start_times","start_mins""
}
/job_type/ { jn="""$2"""; jt="""$4"""; dc=""NA""; c=""NA""; rc=""NA""; dow=""NA""; st=""NA""; sm=""NA"" }
/^date_conditions/ {dc="""$2"""}
/^condition/ {c="""$2"""}
/^run_calendar/ {rc="""$2"""}
/^days_of_week/ {dow="""$2"""}
/^start_times/ {gsub(""",""); st="""$2"""}
/^start_mins/ {sm="""$2"""}
/_if_terminated/{printf "%s,%s,%s,%s,%s,%s,%s,%sn", jn, jt, dc, c, rc, dow, st, sm}
' "$data_file"

CSV输出:

"job_name","job_type","date_conditions","condition","run_calendar","days_of_week","start_times","start_mins"
"Test_jobA","CMD","1","NA","Autosys_Calendar","NA","09:09","NA"
"Test_JobB","CMD","1","NA","NA","mo,tu,we,th,fr","21:05","NA"
"Test_JobC","BOX","0","NA","NA","NA","NA","NA"
"Test_JobD","CMD","1","NA","NA","su","08:50","NA"
"Test_JobE","CMD","1","NA","NA","su","08:20","NA"
"Test_JobF","CMD","1","NA","NA","all","NA","0,10,20,30,40,50"

修改了不依赖'_if_terminated'输出的选项:

#!/bin/bash
data_file="${1}"
awk ' 
BEGIN {
print ""job_name","job_type","date_conditions","condition","run_calendar","days_of_week","start_times","start_mins""
}
/job_type/ { 
if (NR>1) {printf "%s,%s,%s,%s,%s,%s,%s,%sn", jn, jt, dc, c, rc, dow, st, sm}
jn="""$2"""; jt="""$4"""; dc=""NA""; c=""NA""; rc=""NA""; dow=""NA""; st=""NA""; sm=""NA"" }
/^date_conditions/ {dc="""$2"""}
/^condition/ {c="""$2"""}
/^run_calendar/ {rc="""$2"""}
/^days_of_week/ {dow="""$2"""}
/^start_times/ {gsub(""",""); st="""$2"""}
/^start_mins/ {sm="""$2"""}
END{printf "%s,%s,%s,%s,%s,%s,%s,%sn", jn, jt, dc, c, rc, dow, st, sm}
' "$data_file"

我想你会更喜欢这个脚本输出CSV作为起点,而不是一个脚本,产生你说你想要的输出:

$ cat tst.awk
BEGIN { OFS="," }
!NF { next }
match($0,/^[[:space:]]*insert_job: [^[:space:]]+[[:space:]]+/) {
prt()
delete tag2val
numTags = 0
set_tag2val(substr($0,1,RLENGTH))
$0 = substr($0,RSTART+RLENGTH)
}
{ set_tag2val($0) }
END { prt() }
function set_tag2val(str,       tag,val) {
gsub(/^[[:space:]]+|[[:space:]]+$/,"",str)
tag = val = str
sub(/[[:space:]]*:.*/,"",tag)
sub(/[^:]*:[[:space:]]*/,"",val)
if ( !(tag in tag2val) ) {
tags[++numTags] = tag
}
tag2val[tag] = val
}
function prt() {
if ( numTags && !doneHdr++ ) {
for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
tag = tags[tagNr]
printf ""%s"%s", tag, (tagNr<numTags ? OFS : ORS)
}
}
for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
tag = tags[tagNr]
val = tag2val[tag]
gsub(/^"|"$/,"",val)
printf ""%s"%s", val, (tagNr<numTags ? OFS : ORS)
}
}

$ awk -f tst.awk file
"insert_job","job_type","command","machine","owner","permission","date_conditions","run_calendar","start_times","description","std_out_file","std_err_file","alarm_if_fail","alarm_if_terminated"
"Test_jobA","CMD","echo","machinename","owner","","1","Autosys_Calendar","09:09","test discription","/tmp/Test_jobA.out","/tmp/Test_jobA.err","1","1"
"Test_JobB","CMD","echo","machinename","owner","","1","mo,tu,we,th,fr","21:05","test discription","/tmp/Test_JobB.out","/tmp/Test_JobB.err","1","1"
"Test_JobC","BOX","echo","machinename","owner","","0","test discription","/tmp/Test_JobC.out","/tmp/Test_JobC.err","1","1"
"Test_JobD","CMD","echo","machinename","owner","","1","su","08:50","test discription","/tmp/Test_JobD.out","/tmp/Test_JobD.err","1","1"
"Test_JobE","CMD","echo","machinename","owner","","1","su","08:20","test discription","/tmp/Test_JobE.out","/tmp/Test_JobE.err","1","1"
"Test_JobF","CMD","echo","machinename","owner","","1","all","0,10,20,30,40,50","test discription","/tmp/Test_JobF.out","/tmp/Test_JobF.err","1","1"

最新更新