我正在尝试为以下文本格式添加一个分隔符(实际文件有更多字段(。
我看到的是,每个字段的长度由每个标头下面的每个下划线块------------
的长度给定。
输入:
NAME ADDRESS PHONE
--------------------- ------------------------------------------------------------ ------------
CLARK KENT 344 Clinton Street, Apartment 3D, midtown Metropolis 11111111
TONY STARK Malibu Point 10880, 902XX 22222222
PETER PARKER 15th Street, Queens, New York City, New York 33333333
所需输出:
NAME |ADDRESS |PHONE
CLARK KENT |344 Clinton Street, Apartment 3D, midtown Metropolis |11111111
TONY STARK |Malibu Point 10880, 902XX |22222222
PETER PARKER |15th Street, Queens, New York City, New York |33333333
到目前为止,我的尝试是打印每个标头的长度,但我不知道如何在以下位置添加字段分隔符|
:
$ awk 'FNR == 2 {for(i=1; i<=NF; i++) {print length($i)}}'
21
60
12
请在这个上提供一些帮助
就位FIELDWIDTHS
$ awk -v OFS='|' 'NR==1 {h=$0; next}
NR==2 {for(i=1;i<=NF;i++) f=f FS 1+length($i);
FIELDWIDTHS=f;
$0=h}
{$1=$1}1' file
NAME |ADDRESS |PHONE
CLARK KENT |344 Clinton Street, Apartment 3D, midtown Metropolis |11111111
TONY STARK |Malibu Point 10880, 902XX |22222222
PETER PARKER |15th Street, Queens, New York City, New York |33333333
使用GNU awk
wid=$(awk '
NR == 2 {
for (i=1; i<=NF; i++) printf "%d ", 1 + length($i)
exit
}
' file)
gawk -v FIELDWIDTHS="$wid" '
NR != 2 {
for (i=1; i<NF; i++) printf "%s|", $i
print $NF
}
' file
使用GNU awk for FIELDVIDTHS:
$ cat tst.awk
BEGIN { OFS="|" }
NR==1 { hdr=$0; next }
NR==2 {
nf = split($0,f)
for (i=1; i<=nf; i++) {
FIELDWIDTHS = (i>1 ? FIELDWIDTHS " 1 " : "") length(f[i])
}
$0 = hdr
}
{
for (i=1; i<=NF; i+=2) {
printf "%s%s", $i, (i<NF ? OFS : ORS)
}
}
$ awk -f tst.awk file
NAME |ADDRESS |PHONE
CLARK KENT |344 Clinton Street, Apartment 3D, midtown Metropolis |11111111
TONY STARK |Malibu Point 10880, 902XX |22222222
PETER PARKER |15th Street, Queens, New York City, New York |33333333
您可以将此awk用于任何版本的awk
:
awk -v OFS='|' '
NR == 1 {
h = $0
next
}
NR == 2 {
for(i=1; i<NF; i++)
w[i] = (i == 1 ? 1 : w[i-1] + 1) + length($i)
$0 = h
}
{
for(i=1; i<=length(w); i++)
$0 = substr($0, 1, w[i]) "|" substr($0, w[i]+i)
} 1' file
NAME |ADDRESS |PHONE
CLARK KENT |344 Clinton Street, Apartment 3D, midtown Metropolis |11111111
TONY STARK |Malibu Point 10880, 902XX |22222222
PETER PARKER |15th Street, Queens, New York City, New York |33333333
基于提供的样本数据的旧解决方案
您可以尝试这个sed
,它将子字符串与2个以上的空格和1个非空格匹配,并在它们之间插入|
:
sed -nE '/^-{3,}/! {s/([[:blank:]]{2,})([^[:blank:]])/1|2/gp;}' file
NAME |ADDRESS |PHONE
CLARK KENT |344 Clinton Street, Apartment 3D, midtown Metropolis |11111111
TONY STARK |Malibu Point 10880, 902XX |22222222
PETER PARKER |15th Street, Queens, New York City, New York |33333333