二维数组比较和输出文件问题|python



我正在尝试比较两个不同的不相等2D数组,阵列1:(RNA序列([63:3]

AAA
AAC
AGC
UAC

阵列2:RNA_coordinate_source_file[128:11]

ATOM    142 H1' A   5   -1.227  8.932   12.229  1.00    0.00    H
ATOM    142 H1' A   5   -1.227  8.932   12.229  1.00    0.00    H
ATOM    142 H1' G   5   -1.227  8.932   12.229  1.00    0.00    H
ATOM    143 N1  G   5   -0.447  7.106   12.630  1.00    0.00    N
ATOM    143 N1  G   5   -0.447  7.106   12.630  1.00    0.00    N
ATOM    143 N1  C   5   -0.447  7.106   12.630  1.00    0.00    N
ATOM    142 H1' C   5   -1.227  8.932   12.229  1.00    0.00    H
ATOM    143 N1  G   5   -0.447  7.106   12.630  1.00    0.00    N
ATOM    143 N1  G   5   -0.447  7.106   12.630  1.00    0.00    N
ATOM    143 N1  C   5   -0.447  7.106   12.630  1.00    0.00    N

当RNA序列(array1(是三元组时,我想写一个新文件,而一个新的文件具有相应的名称,如AAG.pdb,并且具有与AAG坐标对应的第4列的所有行。

out文件应该从array2中提取所有行,然后文件将具有"A"的所有坐标,然后是A和G,就像所有三元组一样,单独的文件和单独的名称。

我发现不相等的数组无法进行比较,下面的代码也不起作用,我需要帮助谢谢

import re
with open("drive/My Drive/RES/dimeric_force_field/Test/python_read/cropped.pdb", "r") as file:
arr = [ re.split("[ trn]+", line)[:-1] for line in file ]
#re.split("[ trn]+", line)[:-1]
#print ("pdb file as array=",arr)
#print("lenght=",len(arr))
fw = open("drive/My Drive/Colab Notebooks/pdb_out.pdb", "w")
myArray = ["A", "C", "G", "U"]
triplet = []
for a1 in myArray:
for a2 in myArray:
for a3 in myArray:
triplet.append(a1+a2+a3)
print("triplets=",triplet)
print("triplet array length=",len(triplet))
print("arr=",arr)
print("triplet array length=",len(arr))
i = 64
while i < len(triplet):
s1 = triplet[i][0] #U
s2 = triplet[i][1] #U
s3 = triplet[i][2] #G
#print (srt(i)+' '+s1+' '+s2+' '+s3)
#print (s1+' '+s2+' '+s
j= 0      
while j < (len(arr)-1):
el1 = arr[j][0] # line_def
el2 = arr[j][1] # atom_no
el3 = arr[j][2] # atom_name
el4 = arr[j][3] # residue_name
el5 = arr[j][4] # residure_no
el6 = arr[j][5] # x
el7 = arr[j][6] # y
el8 = arr[j][7] # z
el9 = arr[j][8] #
el10 = arr[j][9] #
el11 = arr[j][10] #
#print (str(j)+'t'+el1+'t'+el2+'t'+el3+'t'+el4+'t'+el5+'t'+el6+'t'+el7+'t'+el8+'t'+el9+'t'+el10+'t'+el11)
#print (str(j)+'t'+line_def+'t'+atom_no+'t'+atom_name+'t'+residue_name+'t'+residure_no+'t'+x+'t'+y+'t'+z+'t'+el9+'t'+el10+'t'+el11)
if s1==el4:
#print(str(i)+" "+str(j)+" "+"slected=","t".join(arr[j]))
print("t".join(arr[j]))
fw.write(str(i)+ "    "+ str(j) +  "t".join(arr[j])+'n')
if s2==el4:
#print(str(i)+" "+str(j)+" "+"slected=","t".join(arr[j]))
print("t".join(arr[j]))
fw.write(str(i)+"    " + str(j) +  "t".join(arr[j])+'n')
if s3==el4:
#a= print(str(i)+" "+str(j)+" "+"slected=","t".join(arr[j]))
print("t".join(arr[j]))
fw.write(str(i) +"    "+ str(j) + "t".join(arr[j])+'n')
#fw.write("t".join(arr[j])
#fw.write(p2)
#fw.write(p3)
j+=1
i+=1
fw.write("TER")
fw.close()
fr=open("drive/My Drive/Colab Notebooks/pdb_out.pdb", "r")
print(fr.read())

此代码正在工作,

while i < len(triplet) :  # goes to 0 t 63

fw = open("drive/My Drive/Colab Notebooks/test/"+str(triplet[i])+".pdb", "w")  # 
#fw = open("drive/My Drive/Colab Notebooks/test/str(triplet[i])+".pdb", "w")
if s1=="A":
fw.write(    (   'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_A  ]    )    )    )   # no 'n' in frst set because it avoid first line of the pdb file
n1="A"
elif s1=="C":
fw.write(    (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_C  ]    )    )   )
n1="C"
elif s1=="G":
fw.write(    (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_G  ]    )    )   )
n1="G"
else: #print U
fw.write(    (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_U  ]    )    )   )
n1="G"  
#fw.write('n'+str(i)+' '+"s1 END"+'n')

if s2=="A":
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_A  ]    )    )   )
n2="A" 
elif s2=="C":
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_C  ]    )    )   )
n2="C"
elif s2=="G":
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_G  ]    )    )   )
n2=""
else: #print U
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_U  ]    )    )   )
#fw.write('n'+str(i)+' '+"s2 END"+'n')

if s3=="A":
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_A  ]    )    )   )
elif s3=="C":
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_C  ]    )    )   )
elif s3=="G":
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_G  ]    )    )   )
else: #print U
fw.write( 'n' + (     'n'.join(    [   't'.join(   [  str(Y) for Y in lines]    )   for lines in NUC_U  ]    )    )   )
#fw.write('n'+str(i)+' '+"s3 END"+'n')

fw.close() 

i+=1


fr=open("drive/My Drive/Colab Notebooks/pdb_out.pdb", "r")
print(fr.read())

最新更新