代码为
cd /home/XXX/db-new
while read -r line; do
data=$(echo $line | awk -F'"' -v OFS='' '{ for (i=2; i<=NF; i+=2) gsub(",", "", $i) } 1' | awk '{gsub(/"/,"")};1' | tr -d '" )
d2=$(echo $data | awk -F, '{print $2}')
d3=$(echo $data | awk -F, '{print $3}')
d17=$(echo $data | awk -F, '{print $17}')
d4=$(echo $data | awk -F, '{print $4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14","$15","$16","$17","$18","$19","$20","$21","$22","$23","$24","$25","$26","$27","$28","$29","$30","$31","$32","$33","$34","$35","$36","$37","$38","$39","$40","$45","$46","$47","$48","$49","$50","$51","$52","$53","$54","$55","$56","$57","$58}')
d1=$d2+$d3
d59=$(echo $d2 | cut -d "." -f 2,3)
d60=$(echo $data | awk -F, '{print $19}' | awk 'BEGIN{FS=OFS=","} {gsub(/[[:punct:] ]/,"",$1)} 1' | sed 's/[^0-9]*//g' )
echo $d1,$d2,$d4,$d59,$d17,$d60 >> abc.csv
done < /home/XXX/db-new/2021-09-04.csv
/home/domainsanalytics/db-new/2021-09-04.csv
非常大,所以我只添加了前3行。
head -3 /home/domainsanalytics/db-new/2021-09-04.csv
"num","domain_name","query_time","create_date","update_date","expiry_date","domain_registrar_id","domain_registrar_name","domain_registrar_whois","domain_registrar_url","registrant_name","registrant_company","registrant_address","registrant_city","registrant_state","registrant_zip","registrant_country","registrant_email","registrant_phone","registrant_fax","administrative_name","administrative_company","administrative_address","administrative_city","administrative_state","administrative_zip","administrative_country","administrative_email","administrative_phone","administrative_fax","technical_name","technical_company","technical_address","technical_city","technical_state","technical_zip","technical_country","technical_email","technical_phone","technical_fax","billing_name","billing_company","billing_address","billing_city","billing_state","billing_zip","billing_country","billing_email","billing_phone","billing_fax","name_server_1","name_server_2","name_server_3","name_server_4","domain_status_1","domain_status_2","domain_status_3","domain_status_4"
"1","accounting-fwppool.com","2021-09-04 00:53:04","2021-08-10","2021-08-10","2022-08-10","303","PDR Ltd. d/b/a PublicDomainRegistry.com","whois.publicdomainregistry.com","http://www.publicdomainregistry.com","Micael brown","","4941 Maui Cir Huntington Beach, CA 92649","CA","CA","92649","United States","michbrown7654gh@gmail.com","+1.9169136369","","Micael brown","","4941 Maui Cir Huntington Beach, CA 92649","CA","CA","92649","United States","michbrown7654gh@gmail.com","+1.9169136369","","Micael brown","","4941 Maui Cir Huntington Beach, CA 92649","CA","CA","92649","United States","michbrown7654gh@gmail.com","+1.9169136369","","","","","","","","","","","","ns1.verification-hold.suspended-domain.com","ns2.verification-hold.suspended-domain.com","","","clientTransferProhibited","","",""
"2","xjava.com","2021-09-04 00:53:11","2001-03-06","2021-03-12","2022-03-06","472","Dynadot, LLC","whois.dynadot.com","http://www.dynadot.com","Super Privacy Service LTD c/o Dynadot","","PO Box 701","San Mateo","California","94401","United States","xjava.com@superprivacyservice.com","+1.6505854708","","Super Privacy Service LTD c/o Dynadot","","PO Box 701","San Mateo","California","94401","United States","xjava.com@superprivacyservice.com","+1.6505854708","","Super Privacy Service LTD c/o Dynadot","","PO Box 701","San Mateo","California","94401","United States","xjava.com@superprivacyservice.com","+1.6505854708","","","","","","","","","","","","ns1.sedoparking.com","ns2.sedoparking.com","","","clientTransferProhibited","","",""
我的代码给我的结果很好,但是$59,$17和$60是在新的行…
59美元只是告诉我得到的,
17美元是国家的转载。$60是没有特殊字符的电话号码
我想要的是所有在一行
输出是
domain_name+query_time domain_name create_date update_date expiry_date domain_registrar_id domain_registrar_name domain_registrar_whois domain_registrar_url registrant_name registrant_company registrant_address registrant_city registrant_state registrant_zip registrant_country registrant_email registrant_phone registrant_fax administrative_name administrative_company administrative_address administrative_city administrative_state administrative_zip administrative_country administrative_email administrative_phone administrative_fax technical_name technical_company technical_address technical_city technical_state technical_zip technical_country technical_email technical_phone technical_fax billing_state billing_zip billing_country billing_email billing_phone billing_fax name_server_1 name_server_2 name_server_3 name_server_4 domain_status_1 domain_status_2 domain_status_3 domain_status_4
domain_name registrant_country
accounting-fwppool.com+2021-09-04 00:53:04 accounting-fwppool.com 10/08/21 10/08/21 10/08/22 303 PDR Ltd. d/b/a PublicDomainRegistry.com whois.publicdomainregistry.com http://www.publicdomainregistry.com Micael brown 4941 Maui Cir Huntington Beach CA 92649 CA CA 92649 United States michbrown7654gh@gmail.com 1.916913637 Micael brown 4941 Maui Cir Huntington Beach CA 92649 CA CA 92649 United States michbrown7654gh@gmail.com 1.916913637 Micael brown 4941 Maui Cir Huntington Beach CA 92649 CA CA 92649 United States michbrown7654gh@gmail.com 1.916913637 ns1.verification-hold.suspended-domain.com ns2.verification-hold.suspended-domain.com clientTransferProhibited
com United States 19169136369
xjava.com+2021-09-04 00:53:11 xjava.com 06/03/01 12/03/21 06/03/22 472 Dynadot LLC whois.dynadot.com http://www.dynadot.com Super Privacy Service LTD c/o Dynadot PO Box 701 San Mateo California 94401 United States xjava.com@superprivacyservice.com 1.650585471 Super Privacy Service LTD c/o Dynadot PO Box 701 San Mateo California 94401 United States xjava.com@superprivacyservice.com 1.650585471 Super Privacy Service LTD c/o Dynadot PO Box 701 San Mateo California 94401 United States xjava.com@superprivacyservice.com 1.650585471 ns1.sedoparking.com ns2.sedoparking.com clientTransferProhibited
com United States 16505854708
accuratetactics.com+2021-09-04 00:53:14 accuratetactics.com 26/08/20 30/08/21 26/08/21 1660 Domainshype.com Inc. whois.domainshype.com http://www.domainshype.com This Domain For Sale Worldwide 339 222 5132 Buydomains.com 738 Main Street #389 Waltham Massachusetts 2451 United States brokerage@buydomains.com 1.339222513 1.78183928 This Domain For Sale Worldwide 339 222 5132 Buydomains.com 738 Main Street #389 Waltham Massachusetts 2451 United States brokerage@buydomains.com 1.339222513 1.78183928 This Domain For Sale Worldwide 339 222 5132 Buydomains.com 738 Main Street #389 Waltham Massachusetts 2451 United States brokerage@buydomains.com 1.339222513 1.78183928 dns7.parkpage.foundationapi.com dns8.parkpage.foundationapi.com OK
com United States 13392225132
vej.com+2021-09-04 00:53:16 vej.com 16/09/99 31/08/21 16/09/23 128 DomainRegistry.com Inc. nswhois.domainregistry.com http://www.domainregistry.com Scottcraft Label Co. Scottcraft Label Co. c/o Admin Svcs. PO Box 145 Marlton NJ 8053 United States itadmin@scottcraftlabel.com 1.215870212 IT Admin MS 445 Scottcraft Label Co. c/o Admin Svcs. PO Box 145 Marlton NJ 8053 United States itadmin@scottcraftlabel.com 1.215870212 IT Admin MS 445 Scottcraft Label Co. c/o Admin Svcs. PO Box 145 Marlton NJ 8053 United States itadmin@scottcraftlabel.com 1.215870212 colohost1.domainregistry.com cs03.domainregistry.com clientDeleteProhibited clientTransferProhibited clientUpdateProhibited
com United States 12158702120
accutekware.com+2021-09-04 00:53:24 accutekware.com 26/08/03 26/08/21 26/08/21 303 PDR Ltd. d/b/a PublicDomainRegistry.com whois.publicdomainregistry.com http://www.publicdomainregistry.com R Benedict Accutek Systems Inc PO Box 591125 Houston Texas 77259 United States rbeny09@hotmail.com 1.281461701 R Benedict Accutek Systems Inc PO Box 591125 Houston Texas 77259 United States rbeny09@hotmail.com 1.281461701 R Benedict Accutek Systems Inc PO Box 591125 Houston Texas 77259 United States rbeny09@hotmail.com 1.281461701 dns10.parkpage.foundationapi.com dns11.parkpage.foundationapi.com clientTransferProhibited
com United States 12814617007
crmxon.com+2021-09-04 00:53:27 crmxon.com 04/09/20 04/11/20 04/09/21 303 PDR Ltd. d/b/a PublicDomainRegistry.com whois.publicdomainregistry.com http://www.publicdomainregistry.com GDPR Masked GDPR Masked GDPR Masked GDPR Masked Newcastleupon Tyne(Cityof) GDPR Masked United Kingdom gdpr-masking@gdpr-masked.com GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked gdpr-masking@gdpr-masked.com GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked GDPR Masked gdpr-masking@gdpr-masked.com GDPR Masked GDPR Masked ns1.edagent.com ns2.edagent.com ns3.edagent.com ns4.edagent.com clientTransferProhibited
com United Kingdom
预期输出
domain_name+query_time domain_name create_date update_date expiry_date domain_registrar_id domain_registrar_name domain_registrar_whois domain_registrar_url registrant_name registrant_company registrant_address registrant_city registrant_state registrant_zip registrant_country registrant_email registrant_phone registrant_fax administrative_name administrative_company administrative_address administrative_city administrative_state administrative_zip administrative_country administrative_email administrative_phone administrative_fax technical_name technical_company technical_address technical_city technical_state technical_zip technical_country technical_email technical_phone technical_fax billing_state billing_zip billing_country billing_email billing_phone billing_fax name_server_1 name_server_2 name_server_3 name_server_4 domain_status_1 domain_status_2 domain_status_3 domain_status_4 domain_name registrant_country
accounting-fwppool.com+2021-09-04 00:53:04 accounting-fwppool.com 10/08/21 10/08/21 10/08/22 303 PDR Ltd. d/b/a PublicDomainRegistry.com whois.publicdomainregistry.com http://www.publicdomainregistry.com Micael brown 4941 Maui Cir Huntington Beach CA 92649 CA CA 92649 United States michbrown7654gh@gmail.com 1.91691364 Micael brown 4941 Maui Cir Huntington Beach CA 92649 CA CA 92649 United States michbrown7654gh@gmail.com 1.91691364 Micael brown 4941 Maui Cir Huntington Beach CA 92649 CA CA 92649 United States michbrown7654gh@gmail.com 1.91691364 ns1.verification-hold.suspended-domain.com ns2.verification-hold.suspended-domain.com clientTransferProhibited com United States 1.9169E+10
xjava.com+2021-09-04 00:53:11 xjava.com 06/03/01 12/03/21 06/03/22 472 Dynadot LLC whois.dynadot.com http://www.dynadot.com Super Privacy Service LTD c/o Dynadot PO Box 701 San Mateo California 94401 United States xjava.com@superprivacyservice.com 1.65058547 Super Privacy Service LTD c/o Dynadot PO Box 701 San Mateo California 94401 United States xjava.com@superprivacyservice.com 1.65058547 Super Privacy Service LTD c/o Dynadot PO Box 701 San Mateo California 94401 United States xjava.com@superprivacyservice.com 1.65058547 ns1.sedoparking.com ns2.sedoparking.com clientTransferProhibited com United States 1.6506E+10
accuratetactics.com+2021-09-04 00:53:14 accuratetactics.com 26/08/20 30/08/21 26/08/21 1660 Domainshype.com Inc. whois.domainshype.com http://www.domainshype.com This Domain For Sale Worldwide 339 222 5132 Buydomains.com 738 Main Street #389 Waltham Massachusetts 2451 United States brokerage@buydomains.com 1.33922251 1.78183928 This Domain For Sale Worldwide 339 222 5132 Buydomains.com 738 Main Street #389 Waltham Massachusetts 2451 United States brokerage@buydomains.com 1.33922251 1.78183928 This Domain For Sale Worldwide 339 222 5132 Buydomains.com 738 Main Street #389 Waltham Massachusetts 2451 United States brokerage@buydomains.com 1.33922251 1.78183928 dns7.parkpage.foundationapi.com dns8.parkpage.foundationapi.com OK com United States 1.3392E+10
建议使用单个awk
脚本处理所有数据:
以this开头:
script.awk
BEGIN{FS="","|"[[:space:]]*$|^[[:space:]]*""; OFS=" "}
{
$1=$1; # recalculate fields
# num field start from $2
arr[1] = $3 "+" $4;
arr[2] = $4;
arr[4] = $5;
# right append to arr[4] fields 6-41
for (i = 6; i <= 41; i++) arr[4] = arr[4] "," $i;
# right append to arr[4] fields 46-59
for (i = 46; i <= 59; i++) arr[4] = arr[4] "," $i;
arr[17] = $18;
arr[59 ] = $3;
# in 3rd field remove text after first "."
sub(/..*$/,"",arr[59]);
# remove all punctuations and digits from 20th field
gsub(/[[:punct:]]|[[:digit:]]*/,"",$20);
arr[60] = $20;
# output to stdout
print arr[1],arr[2],arr[4],arr[59],arr[17],arr[60];
}
跑步:
awk -f script.awk input.csv > output.csv
没有测试,因为样本数据不包含数值。