population_d = {'0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1': 6,
'0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,1': 3,
'0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0': 5,
'1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0': 1}
def ProbabilityList(population_d):
fitness = population_d.values()
total_fit = (sum(fitness))
relative_fitness = [f/total_fit for f in fitness]
probabilities = [sum(relative_fitness[:i+1]) for i in range(len(relative_fitness))]
return (probabilities)
我试图计算这种数据结构的累积概率,但是,我需要保持值的顺序,以便将它们索引到另一个列表中处于相同位置的相应个人。
程序按顺序执行操作,为最后一个位置提供更高的权重,在这种情况下,这将是最低的适合度。
有谁知道是否有办法以正确的方式(适应度值的新月顺序(执行累积总和,而无需更改它们在输出列表中的位置?
谢谢!
population_d = {'0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1': 6,
'0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,1': 3,
'0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0': 5,
'1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0': 1}
在您的字典中,您将 fitness(?( 值与唯一标识符相关联。据推测,这些标识符来自程序和数据集中的其他位置。 我没有试图依靠字典的构造顺序来保持这种关系,而是保持了关联并构建了一个新的字典,其值是从低到高对适应度进行排序后获得的累积总和。
import operator
def ProbabilityList(population_d):
fitness = population_d.values()
total_fit = (sum(fitness))
#create list of (individual, fitness) tuples
items = population_d.items()
#sort by fitness value
items = sorted(items, key = operator.itemgetter(1))
#some people prefer
#items = sorted(items, key = lambda item: item[1])
#print(items)
#maintain association and calculate relative fitness
relative_fitness = [(ind,fit/total_fit) for (ind,fit) in items]
#print(relative_fitness)
cumsum = 0
probabilities = {}
for ind, fit in relative_fitness:
cumsum += fit
probabilities[ind] = cumsum
return (probabilities)
d = ProbabilityList(population_d)
for k, v in d.items():
print('key:{}, fitness:{}, cumsum:{}'.format(k, population_d[k], v))
>>>
key:1,0,0,1,1,1,0,0,1,1,0,1,1,0,0,0, fitness:1, cumsum:0.06666666666666667
key:0,0,1,1,1,0,0,1,1,0,1,1,0,0,0,1, fitness:3, cumsum:0.26666666666666666
key:0,1,1,0,1,1,0,0,1,1,1,0,0,1,0,0, fitness:5, cumsum:0.6
key:0,0,1,0,1,1,0,1,1,1,1,0,0,0,0,1, fitness:6, cumsum:1.0
>>>
希望通过字典,您将能够将累积总和与代码另一部分中的原始个人相关联。
我看到您一直在询问与此数据集和项目相关的其他问题。 您可能希望花一些时间学习 Pandas,甚至考虑将数据保存在数据库中,而不是分散在整个项目中的单个容器。