我试图实现Needleman-Wunsch算法来获得全局对齐函数中的最小分数,但当两个序列相等时,我没有获得0的最小分数。
此代码有什么问题?
alphabet = ["A", "C", "G", "T"]
score = [[0, 4, 2, 4, 8],
[4, 0, 4, 2, 8],
[2, 4, 0, 4, 8],
[4, 2, 4, 0, 8],
[8, 8, 8, 8, 8]]
def globalAlignment(x, y):
#Dynamic version very fast
D = []
for i in range(len(x)+1):
D.append([0]* (len(y)+1))
for i in range(1, len(x)+1):
D[i][0] = D[i-1][0] + score[alphabet.index(x[i-1])][-1]
for i in range(len(y)+1):
D[0][i] = D[0][i-1]+ score[-1][alphabet.index(y[i-1])]
for i in range(1, len(x)+1):
for j in range(1, len(y)+1):
distHor = D[i][j-1]+ score[-1][alphabet.index(y[j-1])]
distVer = D[i-1][j]+ score[-1][alphabet.index(x[i-1])]
if x[i-1] == y[j-1]:
distDiag = D[i-1][j-1]
else:
distDiag = D[i-1][j-1] + score[alphabet.index(x[i-1])][alphabet.index(y[j-1])]
D[i][j] = min(distHor, distVer, distDiag)
return D[-1][-1]
x = "ACGTGATGCTAGCAT"
y = "ACGTGATGCTAGCAT"
print(globalAlignment(x, y))
只需更改->for i in range(len(y)+1):
到for i in range(1, len(y) + 1):
和->distVer = D[i-1][j]+ score[-1][alphabet.index(x[i-1])]
到
distVer = D[i - 1][j] + score[alphabet.index(x[i - 1])][-1]
至少
distHor = D[i][j-1]+ score[-1][alphabet.index(y[j-1])]
distVer = D[i-1][j]+ score[-1][alphabet.index(x[i-1])]
是可疑的,因为你在初始化中没有使用相同的[-1]位置,并且两个距离不太可能在权重中使用相同的方向
我想应该是
score[alphabet.index(x[i-1])][-1]
但这可能不是唯一的错误。。。
我通过在最后一个分数列表中放入0而不是8来解决问题;
alphabet = ["A", "C", "G", "T"]
score = [[0, 4, 2, 4, 8],
[4, 0, 4, 2, 8],
[2, 4, 0, 4, 8],
[4, 2, 4, 0, 8],
[0, 0, 0, 0, 0]]
def globalAlignment(x, y):
#Dynamic version very fast
D = []
for i in range(len(x)+1):
D.append([0]* (len(y)+1))
for i in range(1, len(x)+1):
D[i][0] = D[i-1][0] + score[alphabet.index(x[i-1])][-1]
for i in range(len(y)+1):
D[0][i] = D[0][i-1]+ score[-1][alphabet.index(y[i-1])]
for i in range(1, len(x)+1):
for j in range(1, len(y)+1):
distHor = D[i][j-1]+ score[-1][alphabet.index(y[j-1])]
distVer = D[i-1][j]+ score[alphabet.index(x[i-1])][-1]
if x[i-1] == y[j-1]:
distDiag = D[i-1][j-1]
else:
distDiag = D[i-1][j-1] + score[alphabet.index(x[i-1])][alphabet.index(y[j-1])]
D[i][j] = min(distHor, distVer, distDiag)
return D[-1][-1]