我正在处理这个编程问题。该代码应该返回像本例中那样的团块
输入:加拿大5 50 4
输出:CGACA-gaga-
这是我使用过的代码:
def frequency_table(text, kmer_len):
freq_map = {}
nt = len(text)
nk = kmer_len
for i in range(0, nt-nk):
pattern = text[i : i+nk]
if not freq_map.get(pattern):
freq_map[pattern] = 1
else:
freq_map[pattern] = freq_map[pattern] + 1
return freq_map
def FindClumps(Text, k, L, t):
Patterns = []
n = len(Text)
for i in range(n - L):
Window = str(Text[i:L])
freqMap = list(frequency_table(Window, k))
for s in range(len(freqMap)):
if len(freqMap[s]) >= t:
Patterns.append(freqMap[s])
return Patterns
每次我提交答案,都有人说我错了。
我的代码有问题吗?还是有一个我不理解的潜在概念?
我发现了几个错误。下面是经过更正的代码,带有解释更改的内联注释。
def frequency_table(text, kmer_len):
freq_map = {}
nt = len(text)
nk = kmer_len
for i in range(0, nt-nk+1): # Range short by one
pattern = text[i : i+nk]
if not freq_map.get(pattern):
freq_map[pattern] = 1
else:
freq_map[pattern] = freq_map[pattern] + 1
return freq_map
def FindClumps(Text, k, L, t):
Patterns = []
n = len(Text)
for i in range(n - L):
Window = str(Text[i : i+L]) # End range needs to be in relation to "i"
freqMap = frequency_table(Window, k)
# You can interate dictionary keys. Previously, you converted the
# dictionary to a list (which removed your values) in order to iterate
# over it. Your conditional then became nonsensical.
for s in freqMap:
if freqMap[s] >= t:
Patterns.append(s)
return list(set(Patterns)) # limit output to unique values only
print(FindClumps("CGGACTCGACAGATGTGAAGAACGACAATGTGAAGACTCGACACGACAGAGTGAAGAGAAGAGGAAACATTGTAA", 5, 50, 4))
输出
['GAAGA', 'CGACA']