我正在使用第一步转换矩阵来生成DNA序列。现在我需要给转移矩阵一个概率,每 1000 步改变一次。假设每 1000 步,转移矩阵有 40% 的概率发生变化。更改后,每一行都应加到 1。现在我不知道如何在python中访问嵌套字典数据中的值,以及如何实现40%的概率变化。我在这里附上了我的代码,任何建议都值得赞赏~
#!/usr/bin/env python
import sys, random
length = 10000
tran_matrix = {'a': {'a':0.495,'c':0.113,'g':0.129,'t':0.263},
'c': {'a':0.129,'c':0.063,'g':0.413,'t':0.395},
't': {'a':0.213,'c':0.495,'g':0.263,'t':0.029},
'g': {'a':0.263,'c':0.129,'g':0.295,'t':0.313}}
initial_p = {'a':0.25,'c':0.25,'t':0.25,'g':0.25}
def choose(dist):
r = random.random()
sum = 0.0
keys = dist.keys()
for k in keys:
sum += dist[k]
if sum > r:
return k
return keys[-1]
c = choose(initial_p)
for i in range(length):
sys.stdout.write(c)
c = choose(tran_matrix[c])
编辑:添加了一些生成新转换频率的代码的快速实现。您可能需要尝试找到最适合您的情况的随机数生成器,并查看是否可以使用随机概率的一些约束来获得更合理的生成。
import sys, random
LENGTH = 10000
CHANGE_EVERY = 1000
CHANGE_PROB = 0.4
tran_matrix = {'a': {'a':0.495,'c':0.113,'g':0.129,'t':0.263},
'c': {'a':0.129,'c':0.063,'g':0.413,'t':0.395},
't': {'a':0.213,'c':0.495,'g':0.263,'t':0.029},
'g': {'a':0.263,'c':0.129,'g':0.295,'t':0.313}}
initial_p = {'a':0.25,'c':0.25,'t':0.25,'g':0.25}
def choose(dist):
r = random.random()
sum = 0.0
keys = dist.keys()
for k in keys:
sum += dist[k]
if sum > r:
return k
return keys[-1]
def new_probs(precision=2):
"""
Generate a dictionary of random transition frequencies, of the form
{'a':0.495,'c':0.113,'g':0.129,'t':0.263}
"""
probs = []
total_prob = 0
# Choose a random probability p1 from a uniform distribution in
# the range (0, 1), then choose p2 in the range (0, 1 - p1), etc.
for i in range(3):
up_to = 1 - total_prob
p = round(random.uniform(0, up_to), precision)
probs.append(p)
total_prob += p
# Final probability is 1 - (sum of first 3 probabilities)
probs.append(1 - total_prob)
# Assign randomly to bases
# If you don't shuffle the order of the bases each time, 't'
# would end up with consistently lower probabilities
bases = ['a', 'c', 'g', 't']
random.shuffle(bases)
new_prob_dict = {}
for base, prob in zip(bases, probs):
new_prob_dict[base] = prob
return new_prob_dict
c = choose(initial_p)
for i in range(LENGTH):
if i % CHANGE_EVERY == 0:
dice_roll = random.random()
if dice_roll < CHANGE_PROB:
for base in tran_matrix:
# Generate a new probability dictionary for each
# base in the transition matrix
tran_matrix[base] = new_probs()
sys.stdout.write(c)
c = choose(tran_matrix[c])