在NetworkX中组合两个加权图



我使用python多处理来创建多个不同的NetworkX图,然后使用下面的函数来组合这些图。然而,虽然此函数适用于小型图,但对于大型图,它使用了大量内存,并挂在我的系统和内存密集的AWS系统上(仅使用系统中总内存的三分之一左右)。是否有更有效的方法来执行以下功能?

def combine_graphs(graph1, graph2, graph2_weight = 1):
'''
Given two graphs of different edge (but same node) structure (and the same type),
combine the two graphs, summing all edge attributes and multiplying the second one's
attributes by the desired weights. 
E.g. if graph1.edge[a][b] = {'a': 1, 'b':2} and 
graph2.edge[a][b] = {'a': 3, 'c': 4}, 
with a weight of 1 the final graph edge should be 
final_graph.edge[a][b] = {'a': 4, 'b': 2, 'c': 4} and with a weight 
of .5 the final graph edge should be {'a': 2.5, 'b': 2, 'c': 2}.
Inputs: Two graphs to be combined and a weight to give to the second graph
'''
if type(graph1) != type(graph2) or len(set(graph2.nodes()) - set(graph1.nodes())) > 0:
raise Exception('Graphs must have the same type and graph 2 cannot have nodes that graph 1 does not have.')
# make a copy of the new graph to ensure that it doesn't change
new_graph = graph1.copy()
# iterate over graph2's edges, adding them to graph1
for node1, node2 in graph2.edges():
# if that edge already exists, now iterate over the attributes
if new_graph.has_edge(node1, node2):
for attr in graph2.edge[node1][node2]:
# if that attribute exists, sum the values, otherwise, simply copy attrs
if new_graph.edge[node1][node2].get(attr) is not None:
# try adding weighted value: if it fails, it's probably not numeric so add the full value (the only other option is a list)
try:
new_graph.edge[node1][node2][attr] += graph2.edge[node1][node2][attr] * graph2_weight
except:
new_graph.edge[node1][node2][attr] += graph2.edge[node1][node2][attr]
else:
try:
new_graph.edge[node1][node2][attr] = graph2.edge[node1][node2][attr] * graph2_weight
except:
new_graph.edge[node1][node2][attr] = graph2.edge[node1][node2][attr]
# otherwise, add the new edge with all its atributes -- first, iterate through those attributes to weight them
else:
attr_dict = graph2.edge[node1][node2]
for item in attr_dict:
try:
attr_dict[item] = attr_dict[item] * graph2_weight
except:
continue
new_graph.add_edge(node1, node2, attr_dict = attr_dict)
return new_graph

代码中有两个地方的内存会扩展:

1) 复印graph1(也许你需要保留一份)

2) 使用graph2.edges()生成内存中所有边的列表,graph2.edges_iter()在不创建新列表的情况下对边进行迭代

您也可以通过不同的方式处理边缘数据来加快速度。您可以在边上迭代时获得数据对象,而不必执行字典查找:

def combined_graphs_edges(G, H, weight = 1.0):
for u,v,hdata in H.edges_iter(data=True):
# multply attributes of H by weight
attr = dict( (key, value*weight) for key,value in hdata.items())
# get data from G or use empty dict if no edge in G
gdata = G[u].get(v,{})
# add data from g
# sum shared items
shared = set(gdata) & set(hdata)
attr.update(dict((key, attr[key] + gdata[key]) for key in shared))
# non shared items
non_shared = set(gdata) - set(hdata)
attr.update(dict((key, gdata[key]) for key in non_shared))
yield u,v,attr
return

if __name__ == '__main__':
import networkx as nx
G = nx.Graph([('a','b', {'a': 1, 'b':2})])
H = nx.Graph([('a','b', {'a': 3, 'c':4})])
print list(combined_graphs_edges(G,H,weight=0.5))
# or to make a new graph 
graph = G.copy()
graph.add_edges_from(combined_graphs_edges(G,H,weight=0.5))

最新更新