Python 正确地将复杂对象解析为 JSON 格式



我有以下内容,我想将其解析为JSON。该类还具有项对象列表

class Item(JSONEncoder):
def __init__(self):
self.Type = ''
self.Content = ''
self.N = None
self.Parent = None
self.Items = []
def reprJSON(self):
d = dict()
for a, v in self.__dict__.items():
if (hasattr(v, "reprJSON")):
d[a] = v.reprJSON()
else:
d[a] = v
return d

因此,当我尝试解析 Item 类的实例时,root.reprJSON()得到以下结果。

{'Type': 'root',
'Content': '',
'N': 'root',
'Parent': None,
'Items': [<Item.Item at 0x10575fb3c88>,
<Item.Item at 0x10575fb3e10>,
<Item.Item at 0x10575fb3eb8>,
<Item.Item at 0x10575fbc080>,
<Item.Item at 0x10575fbc2b0>,
<Item.Item at 0x10575fc6a20>,
<Item.Item at 0x10575fc6a58>,
<Item.Item at 0x10575fc6b70>,
<Item.Item at 0x10575fc6be0>,
<Item.Item at 0x10575fc6c50>,
<Item.Item at 0x10575fc6da0>,
<Item.Item at 0x10575fc6fd0>,
<Item.Item at 0x10575fcb128>,
<Item.Item at 0x10575fcb358>,
<Item.Item at 0x10575fcba90>,
<Item.Item at 0x10575fcbb00>,
<Item.Item at 0x10575fcbb70>,
<Item.Item at 0x10575fcbc18>,
<Item.Item at 0x10575fcbda0>,
<Item.Item at 0x10575fcbfd0>,
<Item.Item at 0x10575fd3208>,
<Item.Item at 0x10575fd34a8>,
<Item.Item at 0x10575fd3550>,
<Item.Item at 0x10575fd35c0>,
<Item.Item at 0x10575fd36d8>,
<Item.Item at 0x10575fd37f0>,
<Item.Item at 0x10575fd3898>,
<Item.Item at 0x10575fd3940>,
<Item.Item at 0x10575fd39b0>,
<Item.Item at 0x10575fd3a20>,
<Item.Item at 0x10575fd3ac8>,
<Item.Item at 0x10575fd3b70>,
<Item.Item at 0x10575fd3c88>,
<Item.Item at 0x10575fd3d68>,
<Item.Item at 0x10575fd3dd8>,
<Item.Item at 0x10575fd3e10>,
<Item.Item at 0x10575fd3ef0>,
<Item.Item at 0x10575fdc080>,
<Item.Item at 0x10575fdc0b8>,
<Item.Item at 0x10575fdc128>,
<Item.Item at 0x10575fdc1d0>,
<Item.Item at 0x10575fdc240>,
<Item.Item at 0x10575fdc390>,
<Item.Item at 0x10575fdc438>,
<Item.Item at 0x10575fdc550>,
<Item.Item at 0x10575fdc5c0>,
<Item.Item at 0x10575fdc630>,
<Item.Item at 0x10575fdc6a0>,
<Item.Item at 0x10575fdc6d8>,
<Item.Item at 0x10575fdc780>,
<Item.Item at 0x10575fdc908>,
<Item.Item at 0x10575fdc9e8>,
<Item.Item at 0x10575fdca58>,
<Item.Item at 0x10575fdcac8>,
<Item.Item at 0x10575fdcb00>,
<Item.Item at 0x10575fdcba8>,
<Item.Item at 0x10575fdccc0>,
<Item.Item at 0x10575fdcd30>,
<Item.Item at 0x10575fdcda0>,
<Item.Item at 0x10575fdce48>,
<Item.Item at 0x10575fdceb8>,
<Item.Item at 0x10575fdcf28>,
<Item.Item at 0x10575fe22e8>,
<Item.Item at 0x10575fe2828>,
<Item.Item at 0x10575fe2940>,
<Item.Item at 0x10575fe2b70>,
<Item.Item at 0x10575fe2be0>,
<Item.Item at 0x10575fe2c88>,
<Item.Item at 0x10575fe2cc0>,
<Item.Item at 0x10575fe2cf8>]}

但是我想将这些项的值也放入单个 json 对象中。我不知道该怎么做,希望得到任何帮助。谢谢

编辑

下面的代码创建项类的实例并用数据填充它。

def Crawl(parsedPDF):   
soup = BeautifulSoup(parsedPDF, "html.parser")   

root = Item()
root.Type = "root"
root.N = "root"
parent = root
head = root
body = RemoveEmptyTags(soup.body)

for tag in body:        
elements = RemoveEmptyChild(tag.contents)        
for element in elements:
if element.name == "head":
head = CreateHeading(root, parent, element)
parent = head.Parent  
elif element.name == "p":               
AddParagraph(head, element)
elif element.name == "figure":
pass
elif element.name == "figdesc":
pass     
elif element.name == "table":
#elem = AddElement(head, element)     
pass  
else:
#elem = AddElement(head, element)
pass            
pass   

return root

def AddParagraph(head, element):
# split the paragraph into multiple lines based on alphabetize bullet points
lines = split_with_AplhabetizeBullets(element.text, '.s((.*?)s)')
for line in lines:
item = Item()
item.Content = line
item.Type = element.name    
item.Parent = head
head.Items.append(item)     


def CreateHeading(root, parent, element):
item = Item()
item.Content = element.text
item.Type = element.name    
item.Parent = parent
try:                    
item.N = element["n"]
except:                                   
pass
if item.N is None:        
bracketTextLength = 0
try:
result = re.search(r'(.*?)',item.Content)
bracketTextLength = len(result.group)
except:
pass   
item.N = item.Content
# to check if the heading without 'N' is a heading or its a subheading
if len(item.Content) > 3 and  bracketTextLength == 0:
root.Items.append(item) 
item.Parent = item
pass
else:
parent.Items.append(item)
pass


else: # item.N is not None        
if parent.N is None:
item.Parent = item
parent = item.Parent
pass
#else: # if the new heading sharing the same reference as of its parent then            
if parent.N in item.N[:len(parent.N)]:                
parent.Items.append(item)                
pass
else: # if the new heading has no parent then add it into root
root.Items.append(item)
item.Parent = item                
pass               

return item

查看您的代码,您可以在代码中使用此演示解决方案,因为我将演示类的对象存储在"项目"列表中。您需要在 Items 类中编写serialize()dumper()方法,并且还需要在reprJSON项列表上进行迭代的方法中进行更改。

from json import JSONEncoder
class Demo():
def __init__(self):
self.name = ''
self.demolist = []
class Item(JSONEncoder):
def __init__(self):
# super().__init__()
self.Type = ''
self.Content = ''
self.N = None
self.Parent = None
self.Items = []
def reprJSON(self):
d = {}
for a, v in self.__dict__.items():
if isinstance(v, list):
for i in v:
if d.get(a, []) == []:
d[a] = []
d[a].append(self.dumper(i))
else:
d[a].append(self.dumper(i))
else:
d[a] = v
return d
def serialize(self):
return self.__dict__
@staticmethod
def dumper(obj):
if "serialize" in dir(obj):
return obj.serialize()
return obj.__dict__


itemobj = Item()
d1 = Demo()
d2 = Demo()
d1.name = 'akash'
d1.demolist = [{'good':[4,6,5],'yyy':'why'},{'ho':{'ksks':'333'}}]
d2.name = 'heheh'
d2.demolist = [4,6,1111]
itemobj.Items.extend([d1,d2])
from pprint import pprint
pprint(itemobj.reprJSON())

输出:

{'Content': '',
'Items': [{'demolist': [{'good': [4, 6, 5], 'yyy': 'why'},
{'ho': {'ksks': '333'}}],
'name': 'akash'},
{'demolist': [4, 6, 1111], 'name': 'heheh'}],
'N': None,
'Parent': None,
'Type': ''}```

pip install jsonany

from jsonwhatever import jsonwhatever as jw

class Item():
def __init__(self):
self.Type = ''
self.Content = ''
self.N = None
self.Parent = None #Not to reference father class to avoid infinite recursivity
self.Items = None #You should put None by default to stop recursivity

obj = Item()
obj01 = Item()
obj01.Type = '01'
obj01.Content = 'stuff'
obj01.N = 9
obj01.Parent = None
list_objects = []
list_objects.append(obj01)
obj.Items = list_objects
json_string = jw.jsonwhatever('list_of_items', obj)
print(json_string)

最新更新