#!/usr/bin/perl
use strict;
use warnings;
# (1) program reads file
# (2) uses the split fxn to split the columns
# (3) loads the IDs that begin with FBgn from the 1st column as keys to a hash
# (4) loads the gene symbol from the second column as the value in each hash entry
my $infile = "fb_synonym_fb_2013_05.tsv";
open( FILE, "<", $infile )
or die "Cannot open file $!";
my @data = <FILE>;
foreach my $line (@data) {
my @column = split( /s+/, $line );
#print $column[0],"n";
my $columnID = $column[0];
foreach ( my $columnID ) {
while ( $columnID =~ /(^FBgnd+)/ ) {
my $ID = $1;
print $ID, "n";
}
}
}
我尝试运行代码,但它说$columnID是一个未初始化的值。我被困在这一点上。我需要选择以"FBgn"开头的列
输入:数据文件非常大,所以这里有一个选择
##primary_FBid current_symbol current_fullname fullname_synonym(s) symbol_synonym(s)
FBtr0000004 EcollacZ[svp-3]RA lacZ[svp-3]RA
FBtr0000005 EcollacZ[hkb-5953]RA lacZ[5953]RA,lacZ[hkb-5953]RA,hkb- lacZ,5953
FBtr0000007 Mab4A11[+]R2.1
FBtr0000009 EcollacZ[betaTub56D.AS1]RA lacZ[betaTub56D.AS1]RA
FBtr0000010 EcollacZ[betaTub56D.AS2]RA lacZ[betaTub56D.AS2]RA
FBtr0000011 EcollacZ[betaTub56D.AS3]RA lacZ[betaTub56D.AS3]RA
FBtr0000012 EcollacZ[betaTub56D.NN]RA lacZ[betaTub56D.NN]RA
FBtr0000013 EcollacZ[betaTub56D.NC]RA lacZ[betaTub56D.NC]RA
FBtr0000014 EcollacZ[betaTub56D.CP]RA lacZ[betaTub56D.CP]RA
FBtr0000015 EcollacZ[betaTub56D.ACO]RA lacZ[betaTub56D.ACO]RA
FBtr0000016 EcollacZ[betaTub56D.AC3]RA lacZ[betaTub56D.AC3]RA
我怀疑本节存在逻辑问题:
my $columnID = $column[0];
foreach ( my $columnID ) {
while ( $columnID =~ /(^FBgnd+)/ ) {
my $ID = $1;
print $ID, "n";
}
}
首先循环访问每一行,然后将该行拆分为列。目前为止,一切都好。
然后你拿第一列把它放到标量$columnID
。也很好。值得检查一下是否定义了它(以防您有一个空行)。
问题出在下一行:
foreach ( my $columnID )
你到底在迭代什么?你声明一个新变量($columnID
的新实例 - 你绝对不想这样做,因为它会隐藏你刚刚$columnID设置的内容) - 但是...这条线对你没有帮助。
接下来,您将有:
while ( $columnID =~ /(^FBgnd+)/ ) {
。另一个循环。但你不需要它。你只想知道这是真的还是假的:最好写:
if ( $columnID =~ ... ) {
# action if true
}
摘要:尝试将代码重写为:
my @data = <FILE>;
foreach my $line (@data) {
my @column = split( /s+/, $line );
my $columnID = $column[0];
next if ( ! defined( $columnID ) ); # bad line
if ( $columnID =~ /(^FBgnd+)/ ) {
my $ID = $1;
print $ID, "n";
}
}
$columnID
并在循环之外$ID
才能分配它们。您还定义了两次my $columnID
。
尝试:
my @data = <FILE>;
my @column;
my $columnID;
my $ID;
foreach my $line (@data) {
@column = split( /s+/, $line );
#print $column[0],"n";
$columnID = $column[0];
if ( $columnID =~ /(^FBgnd+)/ ) {
$ID = $1;
print $ID, "n";
}
}
派对迟到了,但无论如何这是我的答案:
#!/usr/bin/perl
use Data::Dumper;
@data = ("FBgn34 2.2 3 xxxxn","FBgn35 20.2 30 yyyyn");
my %hash = ();
foreach my $line (@data) {
my @column = split( /s+/, $line );
if ( $column[0] =~ /(^FBgnd+)/ ) {
$hash{ $column[0] } = $column[1];
}
}
print Dumper(%hash);
输出:
{
'FBgn35' => '20.2',
'FBgn34' => '2.2'
};