我有一个数据帧,如下
Quote ID Quote Date Email Phone VIN
0 1410095416 6/6/2021 DAMIONADAE@GMAIL.COM 4049366688 1C4RJFBG9EC2267
1 1410143058 6/6/2021 BEEZZZHAPPY@YAHOO.COM 3122340791 NaN
2 1408893417 6/3/2021 MONEYKAY38@YAHOO.COM 2149004015 1J8HG48NX6C2470
3 1408764243 6/2/2021 TIFFANYLESTER419@GMAIL.COM 5024647900 JN8AZ08W57W6527
4 1408639003 6/2/2021 MONEYTEAM799@YAHOO.COM 2149001015 1ZVBP8AM3E52605
我正在使用一个函数来生成网络图。下面的函数使用数据帧、节点和边来生成Graph。
def create_network(df, node, column_edge, column_edge1=None, column_edge2=None):
# select columns, remove NaN
df_edge1 = df[[node, column_edge]].dropna(subset=[column_edge]).drop_duplicates()
# To create connections between "node" who have the same "edge",
# join data with itself on the "node" column.
df_edge1 = df_edge1.merge(
df_edge1[[node, column_edge]].rename(columns={node:node+"_2"}),
on=column_edge
)
# By joining the data with itself, node will have a connection with themselves.
# Remove self connections, to keep only connected nodes which are different.
edge1 = df_edge1[~(df_edge1[node]==df_edge1[node+"_2"])].dropna()[[node, node +"_2", column_edge]]
# To avoid counting twice the connections (person 1 connected to person 2 and person 2 connected to person 1)
# we force the first ID to be "lower" then ID_2
edge1.drop(edge1.loc[edge1[node+"_2"]<edge1[node]].index.tolist(), inplace=True)
G = nx.from_pandas_edgelist(df=edge1, source=node, target=node + '_2', edge_attr=column_edge)
G.add_nodes_from(nodes_for_adding=df[node].tolist())
if column_edge1:
df_edge2 = df[[node, column_edge1]].dropna(subset=[column_edge1]).drop_duplicates()
df_edge2 = df_edge2.merge(
df_edge2[[node, column_edge1]].rename(columns={node:node+"_2"}),
on=column_edge1
)
edge2 = df_edge2[~(df_edge2[node]==df_edge2[node+"_2"])].dropna()[[node, node+"_2", column_edge1]]
edge2.drop(edge2.loc[edge2[node+"_2"]<edge2[node]].index.tolist(), inplace=True)
# Create the connections in the graph
links_attributes = {tuple(row[[node, node+"_2"]]): {column_edge1: row[column_edge1]} for i,row in edge2.iterrows()}
# create the connection, without attribute.
G.add_edges_from(links_attributes)
# adds the attribute.
nx.set_edge_attributes(G=G, values=links_attributes)
if column_edge2:
df_edge3 = df[[node, column_edge2]].dropna(subset=[column_edge2]).drop_duplicates()
df_edge3 = df_edge3.merge(
df_edge3[[node, column_edge2]].rename(columns={node:node+"_2"}),
on=column_edge2
)
edge3 = df_edge3[~(df_edge3[node]==df_edge3[node+"_2"])].dropna()[[node, node+"_2", column_edge2]]
edge3.drop(edge3.loc[edge3[node+"_2"]<edge3[node]].index.tolist(), inplace=True)
# Create the connections in the graph
links_attributes2 = {tuple(row[[node, node+"_2"]]): {column_edge2: row[column_edge2]} for i,row in edge3.iterrows()}
# create the connection, without attribute.
G.add_edges_from(links_attributes2)
# adds the attribute.
nx.set_edge_attributes(G=G, values=links_attributes2)
return G
调用上述功能
GE3 = create_network(data, 'Quote ID', "Email", column_edge1="Phone", column_edge2="VIN")
图形信息
Name:
Type: Graph
Number of nodes: 2441
Number of edges: 8374
Average degree: 6.8611
# import pyvis
from pyvis.network import Network
# # create vis network
net = Network(notebook=True, width=1000, height=600)
# load the networkx graph
net.from_nx(GE3)
# show
net.show("pyvis_example.html")
当我尝试使用pyvis
导出图形时。我得到了TypeError:类型为int64的对象不是JSON可序列化的
以下是的完整错误
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-35-b34699ea995d> in <module>()
17 net.from_nx(GE3)
18 # show
---> 19 net.show("pyvis_example.html")
10 frames
/usr/local/lib/python3.7/dist-packages/pyvis/network.py in show(self, name)
474 check_html(name)
475 if self.template is not None:
--> 476 return self.write_html(name, notebook=True)
477 else:
478 self.write_html(name)
/usr/local/lib/python3.7/dist-packages/pyvis/network.py in write_html(self, name, notebook)
457 bgcolor=self.bgcolor,
458 conf=self.conf,
--> 459 tooltip_link=use_link_template)
460
461 with open(name, "w+") as out:
/usr/local/lib/python3.7/dist-packages/jinja2/environment.py in render(self, *args, **kwargs)
1088 return concat(self.root_render_func(self.new_context(vars)))
1089 except Exception:
-> 1090 self.environment.handle_exception()
1091
1092 def render_async(self, *args, **kwargs):
/usr/local/lib/python3.7/dist-packages/jinja2/environment.py in handle_exception(self, source)
830 from .debug import rewrite_traceback_stack
831
--> 832 reraise(*rewrite_traceback_stack(source=source))
833
834 def join_path(self, template, parent):
/usr/local/lib/python3.7/dist-packages/jinja2/_compat.py in reraise(tp, value, tb)
26 def reraise(tp, value, tb=None):
27 if value.__traceback__ is not tb:
---> 28 raise value.with_traceback(tb)
29 raise value
30
<template> in top-level template code()
/usr/local/lib/python3.7/dist-packages/jinja2/filters.py in do_tojson(eval_ctx, value, indent)
1258 options = dict(options)
1259 options["indent"] = indent
-> 1260 return htmlsafe_json_dumps(value, dumper=dumper, **options)
1261
1262
/usr/local/lib/python3.7/dist-packages/jinja2/utils.py in htmlsafe_json_dumps(obj, dumper, **kwargs)
617 dumper = json.dumps
618 rv = (
--> 619 dumper(obj, **kwargs)
620 .replace(u"<", u"\u003c")
621 .replace(u">", u"\u003e")
/usr/lib/python3.7/json/__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)
239
240
/usr/lib/python3.7/json/encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
/usr/lib/python3.7/json/encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
/usr/lib/python3.7/json/encoder.py in default(self, o)
177
178 """
--> 179 raise TypeError(f'Object of type {o.__class__.__name__} '
180 f'is not JSON serializable')
181
TypeError: Object of type int64 is not JSON serializable
我认为错误来自Graph网络,但我不确定具体发生在哪里以及如何修复。。。。
有人能为解决这个问题提供一些线索吗?
确保节点索引是int
类型,而不是int64
或int32
。你的边缘构造也是如此。我建议您查看df[node]
熊猫系列的type
。
尝试将所有已知为int的变量强制转换为int。在python中,可以通过以下方式强制转换:int(int64var(。
如果不想强制转换所有变量,可以调试代码,如果变量类型正确,则可以查看每个变量的变量。