如何处理处理 FASTA 编码时"属性错误:'Seq' 对象没有属性'tostring'"?


import pandas as pd
import numpy as np
from Bio import*
from Bio import SeqIO
import time 
import h5py

def vectorizeSequence(seq):
# the order of the letters is not arbitrary.
# Flip the matrix up-down and left-right for reverse compliment
ltrdict = {'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1], 'n':[0,0,0,0]}
return np.array([ltrdict[x] for x in seq])

starttime = time.time()
fasta_sequences = SeqIO.parse(open("contigs.fasta"),'fasta')
#fasta_sequences = str(seq.seq)
#GC(fasta_sequences)
with h5py.File('genomeEncoded.h5', 'w') as hf:
for fasta in fasta_sequences:
# get the fasta files.

name, sequence = fasta.id, fasta.seq.tostring()  # HERE APPEARS ERROR
# Write the chromosome name
new_file.write(name)
#  encoding scheme
data = vectorizeSequence(sequence.lower())
print (name + " is one hot encoded!")
# write to hdf5 
hf.create_dataset(name, data=data)
print (name + " is written to dataset")

endtime = time.time()       
print ("Encoding is done in " + str(endtime))

Traceback(最近一次调用):文件"FASTA_ENCODING4ML.py",第30行,在名称,序列= fasta。id, fasta.seq.tostring ()AttributeError: 'Seq'对象没有'tostring'属性

若要将bioppythonSeq对象转换为字符串,请使用str()

例如:

str(Seq('ATCGTGC'))
>>>>'ATCGTGC'

最新更新