Networkx图形导出到Pyvis-TypeError:类型为int64的对象不可JSON序列化



我有一个数据帧,如下

Quote ID    Quote Date       Email                Phone       VIN
0   1410095416  6/6/2021    DAMIONADAE@GMAIL.COM    4049366688  1C4RJFBG9EC2267
1   1410143058  6/6/2021    BEEZZZHAPPY@YAHOO.COM   3122340791  NaN
2   1408893417  6/3/2021    MONEYKAY38@YAHOO.COM    2149004015  1J8HG48NX6C2470
3   1408764243  6/2/2021    TIFFANYLESTER419@GMAIL.COM  5024647900  JN8AZ08W57W6527
4   1408639003  6/2/2021    MONEYTEAM799@YAHOO.COM  2149001015  1ZVBP8AM3E52605

我正在使用一个函数来生成网络图。下面的函数使用数据帧、节点和边来生成Graph。

def create_network(df, node, column_edge, column_edge1=None, column_edge2=None):
#  select columns, remove NaN
df_edge1 = df[[node, column_edge]].dropna(subset=[column_edge]).drop_duplicates()
# To create connections between "node" who have the same "edge",
# join data with itself on the "node" column.
df_edge1 = df_edge1.merge(
df_edge1[[node, column_edge]].rename(columns={node:node+"_2"}), 
on=column_edge
)
# By joining the data with itself, node will have a connection with themselves.
# Remove self connections, to keep only connected nodes which are different.
edge1 = df_edge1[~(df_edge1[node]==df_edge1[node+"_2"])].dropna()[[node, node +"_2", column_edge]]

# To avoid counting twice the connections (person 1 connected to person 2 and person 2 connected to person 1)
# we force the first ID to be "lower" then ID_2
edge1.drop(edge1.loc[edge1[node+"_2"]<edge1[node]].index.tolist(), inplace=True)
G = nx.from_pandas_edgelist(df=edge1, source=node, target=node + '_2', edge_attr=column_edge)
G.add_nodes_from(nodes_for_adding=df[node].tolist())

if column_edge1:

df_edge2 = df[[node, column_edge1]].dropna(subset=[column_edge1]).drop_duplicates()
df_edge2 = df_edge2.merge(
df_edge2[[node, column_edge1]].rename(columns={node:node+"_2"}), 
on=column_edge1
)
edge2 = df_edge2[~(df_edge2[node]==df_edge2[node+"_2"])].dropna()[[node, node+"_2", column_edge1]]
edge2.drop(edge2.loc[edge2[node+"_2"]<edge2[node]].index.tolist(), inplace=True)
# Create the connections in the graph
links_attributes = {tuple(row[[node, node+"_2"]]): {column_edge1: row[column_edge1]} for i,row in edge2.iterrows()}
# create the connection, without attribute.
G.add_edges_from(links_attributes)
# adds the attribute.
nx.set_edge_attributes(G=G, values=links_attributes)

if column_edge2:
df_edge3 = df[[node, column_edge2]].dropna(subset=[column_edge2]).drop_duplicates()
df_edge3 = df_edge3.merge(
df_edge3[[node, column_edge2]].rename(columns={node:node+"_2"}), 
on=column_edge2
)

edge3 = df_edge3[~(df_edge3[node]==df_edge3[node+"_2"])].dropna()[[node, node+"_2", column_edge2]]
edge3.drop(edge3.loc[edge3[node+"_2"]<edge3[node]].index.tolist(), inplace=True)
# Create the connections in the graph
links_attributes2 = {tuple(row[[node, node+"_2"]]): {column_edge2: row[column_edge2]} for i,row in edge3.iterrows()}
# create the connection, without attribute.
G.add_edges_from(links_attributes2) 
# adds the attribute.
nx.set_edge_attributes(G=G, values=links_attributes2)

return G

调用上述功能

GE3 = create_network(data, 'Quote ID', "Email", column_edge1="Phone", column_edge2="VIN")

图形信息

Name: 
Type: Graph
Number of nodes: 2441
Number of edges: 8374
Average degree:   6.8611
# import pyvis
from pyvis.network import Network
# # create vis network
net = Network(notebook=True, width=1000, height=600)
# load the networkx graph
net.from_nx(GE3)
# show
net.show("pyvis_example.html")

当我尝试使用pyvis导出图形时。我得到了TypeError:类型为int64的对象不是JSON可序列化的

以下是的完整错误

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-35-b34699ea995d> in <module>()
17 net.from_nx(GE3)
18 # show
---> 19 net.show("pyvis_example.html")
10 frames
/usr/local/lib/python3.7/dist-packages/pyvis/network.py in show(self, name)
474         check_html(name)
475         if self.template is not None:
--> 476             return self.write_html(name, notebook=True)
477         else:
478             self.write_html(name)
/usr/local/lib/python3.7/dist-packages/pyvis/network.py in write_html(self, name, notebook)
457                                     bgcolor=self.bgcolor,
458                                     conf=self.conf,
--> 459                                     tooltip_link=use_link_template)
460 
461         with open(name, "w+") as out:
/usr/local/lib/python3.7/dist-packages/jinja2/environment.py in render(self, *args, **kwargs)
1088             return concat(self.root_render_func(self.new_context(vars)))
1089         except Exception:
-> 1090             self.environment.handle_exception()
1091 
1092     def render_async(self, *args, **kwargs):
/usr/local/lib/python3.7/dist-packages/jinja2/environment.py in handle_exception(self, source)
830         from .debug import rewrite_traceback_stack
831 
--> 832         reraise(*rewrite_traceback_stack(source=source))
833 
834     def join_path(self, template, parent):
/usr/local/lib/python3.7/dist-packages/jinja2/_compat.py in reraise(tp, value, tb)
26     def reraise(tp, value, tb=None):
27         if value.__traceback__ is not tb:
---> 28             raise value.with_traceback(tb)
29         raise value
30 
<template> in top-level template code()
/usr/local/lib/python3.7/dist-packages/jinja2/filters.py in do_tojson(eval_ctx, value, indent)
1258         options = dict(options)
1259         options["indent"] = indent
-> 1260     return htmlsafe_json_dumps(value, dumper=dumper, **options)
1261 
1262 
/usr/local/lib/python3.7/dist-packages/jinja2/utils.py in htmlsafe_json_dumps(obj, dumper, **kwargs)
617         dumper = json.dumps
618     rv = (
--> 619         dumper(obj, **kwargs)
620         .replace(u"<", u"\u003c")
621         .replace(u">", u"\u003e")
/usr/lib/python3.7/json/__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
236         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237         separators=separators, default=default, sort_keys=sort_keys,
--> 238         **kw).encode(obj)
239 
240 
/usr/lib/python3.7/json/encoder.py in encode(self, o)
197         # exceptions aren't as detailed.  The list call should be roughly
198         # equivalent to the PySequence_Fast that ''.join() would do.
--> 199         chunks = self.iterencode(o, _one_shot=True)
200         if not isinstance(chunks, (list, tuple)):
201             chunks = list(chunks)
/usr/lib/python3.7/json/encoder.py in iterencode(self, o, _one_shot)
255                 self.key_separator, self.item_separator, self.sort_keys,
256                 self.skipkeys, _one_shot)
--> 257         return _iterencode(o, 0)
258 
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
/usr/lib/python3.7/json/encoder.py in default(self, o)
177 
178         """
--> 179         raise TypeError(f'Object of type {o.__class__.__name__} '
180                         f'is not JSON serializable')
181 
TypeError: Object of type int64 is not JSON serializable

我认为错误来自Graph网络,但我不确定具体发生在哪里以及如何修复。。。。

有人能为解决这个问题提供一些线索吗?

确保节点索引是int类型,而不是int64int32。你的边缘构造也是如此。我建议您查看df[node]熊猫系列的type

尝试将所有已知为int的变量强制转换为int。在python中,可以通过以下方式强制转换:int(int64var(

如果不想强制转换所有变量,可以调试代码,如果变量类型正确,则可以查看每个变量的变量。

最新更新