读取多个文件,比较值并创建参数存在或不存在的列表



请我需要这方面的帮助。

我有 12 个文件,我正在尝试比较"key_file.txt"中定义的区域是否存在,并生成一个显示此内容的列表。 我已经编写了以下代码,但出现以下错误。

File "filter_bedtools_all_samples_new.py", line 119, in <module>
start = elems[1]

索引错误:列出索引超出范围

这是代码

import sys
#read each file from the argument list
A1_file = sys.argv[1]
A2_file = sys.argv[2]
A3_file = sys.argv[3]
B1_file = sys.argv[4]
B2_file = sys.argv[5]
B3_file = sys.argv[6]
C1_file = sys.argv[7]
C2_file = sys.argv[8]
C3_file = sys.argv[9]
D1_file = sys.argv[10]
D2_file = sys.argv[11]
D3_file = sys.argv[12]
key_file = sys.argv[13]
offset1 = int(sys.argv[14])
offset2 = int(sys.argv[15])
out_file = sys.argv[16]
#open the output file
outHandle = open(out_file,'w')
#create a class to hold objects
class Island:
def __init__(self, chr, start, end):
self.chr = chr
self.start = start
self.end = end
#start reading files into lists
with open(A1_file) as A1:
list1 = A1.readlines()
with open(A2_file) as A2:
list2 = A2.readlines()
with open(A3_file) as A3:
list3 = A3.readlines()
with open(B1_file) as B1:
list4 = B1.readlines()
with open(B2_file) as B2:
list5 = B2.readlines()
with open(B3_file) as B3:
list6 = B3.readlines()
with open(C1_file) as C1:
list7 = C1.readlines()
with open(C2_file) as C2:
list8 = C2.readlines()
with open(C3_file) as C3:
list9 = C3.readlines()
with open(D1_file) as D1:
list10 = D1.readlines()
with open(D2_file) as D2:
list11 = D2.readlines()
with open(D3_file) as D3:
list12 = D3.readlines()
#create a list containing the filenames
file_list = ["list1","list2","list3","list4","list5","list6","list7","list8","list9","list10","list11","list12"]
#print(len(list1))
key_dict = {}
out_dict = {}
key_list = []
counter = 0
#open key file and read one line at a time
with open(key_file) as kf:
for eachline in kf:
#initialize a dictionary of lists to 0
temp_list = "list_" + str(counter)
temp_list = [0] * 12
out_dict[counter] = temp_list
els = eachline.split("t")
k_chr = els[0]
k_start = els[1]
k_end = els[2]
#create a dictionary of objects Island
temp_obj = Island(k_chr,k_start,k_end)
key_dict[counter] = temp_obj
key_list.append(eachline) #decided to try this out 
counter += 1
#for k,v in key_dict.iteritems():
for v in key_list:
key_elems = v.split("t")
key_chr = key_elems[0]
key_start = key_elems[1]
key_end = key_elems[2].strip(' trn')
for file_name in file_list:
#  for i in range(1,13)
#          file_name = "list" + str(i)
for eachline in file_name:
elems = eachline.split("t")
chr = elems[0]
start = elems[1]
end = elems[2]
island = elems[3]
count = elems[4]
start_diff = abs(int(key_start) - int(start))
end_diff = abs(int(key_end) - int(end))
if (chr == key_chr):
if(((key_start == start) or (0 <= start_diff <= offset1)) and ((key_end == end) or (0 <= end_diff <= offset2))):
temp_list = out_dict[k]
temp_list[i] = count
out_dict[k] = temp_list
else:
continue
else:
continue

for key,value in out_dict.iteritems():
outHandle.write(str(value))
print("Processing completed!")

这是文件 答1

Chromosome01    3187178 3187214 island-16   177976  .   3187178 3187214 iR  bC  bZ  bS
Chromosome01    5042128 5042182 island-32   943 .   5042128 5042182 iR  bC  bZ  bS

A2

Chromosome01    1102995 1103064 island-4    1558    .   1102995 1103064 iR  bC  bZ  bS
Chromosome01    3187178 3187227 island-9    81851   .   3187178 3187227 iR  bC  bZ  bS

A3

Chromosome01    4144298 4144467 island-39   354 .   4144298 4144467 iR  bC  bZ  bS
Chromosome01    4144671 4145103 island-41   344 .   4144671 4145103 iR  bC  bZ  bS

B1

Chromosome01    5042128 5042238 island-15   1250    .   5042128 5042238 iR  bC  bZ  bS
Chromosome01    5042315 5042535 island-16   3256    .   5042315 5042535 iR  bC  bZ  bS

B3

Chromosome01    1102966 1103182 island-2    3910    .   1102966 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-19   3488    .   5042128 5042238 iR  bC  bZ  bS

B5

Chromosome01    1102966 1103065 island-3    2462    .   1102966 1103065 iR  bC  bZ  bS
Chromosome01    5042128 5042237 island-20   2592    .   5042128 5042237 iR  bC  bZ  bS

C1

Chromosome01    1102973 1103182 island-4    3950    .   1102973 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042237 island-22   4965    .   5042128 5042237 iR  bC  bZ  bS

C2

Chromosome01    1102966 1103182 island-5    3697    .   1102966 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-29   2730    .   5042128 5042238 iR  bC  bZ  bS

C4

Chromosome01    1102974 1103065 island-6    1673    .   1102974 1103065 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-28   1857    .   5042128 5042238 iR  bC  bZ  bS

D1

Chromosome01    1102957 1103182 island-5    7654    .   1102957 1103182 iR  bC  bZ  bS
Chromosome01    3187180 3187215 island-21   223953  .   3187180 3187215 iR  bC  bZ  bS

D2

Chromosome01    1102973 1103182 island-5    4847    .   1102973 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042237 island-24   2300    .   5042128 5042237 iR  bC  bZ  bS

D3

Chromosome01    1102971 1103182 island-6    7091    .   1102971 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-30   2509    .   5042128 5042238 iR  bC  bZ  bS

key_list文件为:

Chromosome01    1102966 1103065 Chromosome01    1102966 1103182
Chromosome01    1102995 1103064 Chromosome01    3187178 3187214
Chromosome01    3187178 3187227 Chromosome01    4144298 4144467
Chromosome01    4144671 4145103 Chromosome01    5042128 5042182
Chromosome01    5042128 5042238 Chromosome01    5042315 5042535
Chromosome01    5042495 5042532 Chromosome01    5042663 5043093
Chromosome01    5042726 5043093 Chromosome01    5043238 5043392
Chromosome01    5043292 5043394 Chromosome01    5043520 5043752
Chromosome01    5043523 5043664 Chromosome01    5043547 5043617
Chromosome01    5043549 5043752 Chromosome01    5043902 5043961
Chromosome01    5044239 5044547 Chromosome01    5044462 5044505
Chromosome01    5044679 5044870 Chromosome01    5044679 5045096
Chromosome01    5044719 5044870 Chromosome01    5044946 5045096
Chromosome01    5044946 5045115 Chromosome01    5044946 5045168
Chromosome01    5044993 5045096 Chromosome01    5292510 5292635
Chromosome01    5292577 5292635 Chromosome01    6698849 6698976
Chromosome01    13128763    13128846 Chromosome01   13509086    13509169
Chromosome01    13509086    13509182 Chromosome01   18273293    18273468

感谢您的帮助

如注释中所述,您的代码无法工作"list1"因为list1

。您打开文件的方式太复杂

A1_file = sys.argv[1]
with open(A1_file) as A1:
list1 = A1.readlines()
file_list = ["list1","list2","list3","list4","list5","list6","list7","list8","list9","list10","list11","list12"]
for file_name in file_list:
for eachline in file_name:
do_stuff()

这,乘以您的 12 个文件。

for i in range(1,13):
with open(sys.argv[i]) as f:
lines = f.readlines()
for line in lines:
do_stuff()

在这里,无需创建无法按预期工作的临时file_name

最新更新