使用python从文本文件到JSON文件



假设我有一个文本文件,看起来像这样(缩进是4个空格):

key1=value1
key2
key2_1=value2_1
key2_2
key2_2_1=value2_2_1
key2_3=value2_3_1,value2_3_2,value2_3_3
key3=value3_1,value3_2,value3_3

我想把它转换成任何有效的json,像这样:

{
'key1':'value1',
'key2': {
'key2_1':'value2_1',
'key2_2':{
'key2_2_1':'value2_2_1'
},
'key2_3':['value2_3_1','value2_3_2','value2_3_3']
},
'key3':['value3_1','value3_2','value3_3']
}

我试过这个(我从另一个帖子得到的):

# helper method to convert equals sign to indentation for easier parsing
def convertIndentation(inputString):
indentCount = 0
indentVal = "    "
for position, eachLine in enumerate(inputString):
if "=" not in eachLine:
continue
else:
strSplit = eachLine.split("=", 1)
#get previous indentation
prevIndent = inputString[position].count(indentVal)
newVal = (indentVal * (prevIndent + 1)) + strSplit[1]
inputString[position] = strSplit[0] + 'n'
inputString.insert(position+1, newVal)
flatList = "".join(inputString)
return flatList
# helper class for node usage
class Node:
def __init__(self, indented_line):
self.children = []
self.level = len(indented_line) - len(indented_line.lstrip())
self.text = indented_line.strip()
def add_children(self, nodes):
childlevel = nodes[0].level
while nodes:
node = nodes.pop(0)
if node.level == childlevel: # add node as a child
self.children.append(node)
elif node.level > childlevel: # add nodes as grandchildren of the last child
nodes.insert(0,node)
self.children[-1].add_children(nodes)
elif node.level <= self.level: # this node is a sibling, no more children
nodes.insert(0,node)
return
def as_dict(self):
if len(self.children) > 1:
return {self.text: [node.as_dict() for node in self.children]}
elif len(self.children) == 1:
return {self.text: self.children[0].as_dict()}
else:
return self.text
# process our file here
with open(filename, 'r') as fh:
fileContent = fh.readlines()
fileParse = convertIndentation(fileContent)
# convert equals signs to indentation
root = Node('root')
root.add_children([Node(line) for line in fileParse.splitlines() if line.strip()])
d = root.as_dict()['root']
# this variable is storing the json output
jsonOutput = json.dumps(d, indent = 4, sort_keys = False)
print(jsonOutput)

将产生以下内容:

[
{
"key1": "value1"
},
{
"key2": [
{
"key2_1": "value2_1"
},
{
"key2_2": {
"key2_2_1": "value2_2_1"
}
},
{
"key2_3": "value2_3_1,value2_3_2,value2_3_3"
},
]
},
{
"key3": "value3_1,value3_2,value3_3"
}
]

但这仍然不是一个有效的JSON文件。

当我尝试使用'json'模块打开输出文件时,我得到了这个可预测的消息:"JSONDecodeError:期望属性名称包含在双引号中:第10行第5列(char 165)"

with open(r'C:UsersnigelOneDriveDocumentsLABleansample_01.02_R00.json', 'r', encoding='utf-8') as read_file:
data = json.load(read_file)

输出:

JSONDecodeError                           Traceback (most recent call last)
Input In [2], in <cell line: 1>()
1 with open(r'C:UsersnigelOneDriveDocumentsLABleansample_01.02_R00.json', 'r', encoding='utf-8') as read_file:
----> 2     data = json.load(read_file)
File ~Anaconda3libjson__init__.py:293, in load(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
274 def load(fp, *, cls=None, object_hook=None, parse_float=None,
275         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
276     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
277     a JSON document) to a Python object.
278 
(...)
291     kwarg; otherwise ``JSONDecoder`` is used.
292     """
--> 293     return loads(fp.read(),
294         cls=cls, object_hook=object_hook,
295         parse_float=parse_float, parse_int=parse_int,
296         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
File ~Anaconda3libjson__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
341     s = s.decode(detect_encoding(s), 'surrogatepass')
343 if (cls is None and object_hook is None and
344         parse_int is None and parse_float is None and
345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
347 if cls is None:
348     cls = JSONDecoder
File ~Anaconda3libjsondecoder.py:337, in JSONDecoder.decode(self, s, _w)
332 def decode(self, s, _w=WHITESPACE.match):
333     """Return the Python representation of ``s`` (a ``str`` instance
334     containing a JSON document).
335 
336     """
--> 337     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338     end = _w(s, end).end()
339     if end != len(s):
File ~Anaconda3libjsondecoder.py:353, in JSONDecoder.raw_decode(self, s, idx)
344 """Decode a JSON document from ``s`` (a ``str`` beginning with
345 a JSON document) and return a 2-tuple of the Python
346 representation and the index in ``s`` where the document ended.
(...)
350 
351 """
352 try:
--> 353     obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
355     raise JSONDecodeError("Expecting value", s, err.value) from None
JSONDecodeError: Expecting property name enclosed in double quotes: line 10 column 5 (char 165)

原因是JSON期望找到键(用双引号括起来的字符串),而实际上在它们的位置上找到JSON对象(嵌套字典)。就是这样!

我真的很感谢任何评论。最好的,

Nigel

对于登陆此页面的用户:我无法重现OP发布的错误。json.dumps()将非常不可能输出"坏json"。这仅仅是为了帮助海报。

将字符串拆分为列表

我假设根据你的评论,你的意思是你想要你的字符串,例如,这一行key2_3=value2_3_1,value2_3_2,value2_3_3并将这些值分解成"key2_3": ["value2_3_1", "value2_3_2", "value2_3_3"]

要做到这一点,您必须对提供给您的代码进行以下调整:
def as_dict(self):
if len(self.children) > 1:
return {self.text: [node.as_dict() for node in self.children]}
elif len(self.children) == 1:
return {self.text: self.children[0].as_dict()}
else:
return self.text.split(",") # was self.text

字典的字典而不是列表的字典

要使输出字典成为具有列表的节点基值的字典的字典,即{k1: {k2: [1, 2, 3]}}等,我们必须做2个更改。

  1. 更新as_dict方法以使用{}而不是[].
  2. 包含压缩键的功能

当我这样做的时候,我很难输出正确的数据结构…它基本上是这样的,{k1: {k1: {k2: {k2: value}}}}。如果不在代码中运行d = compress(root.as_dict()['root'])(d = root.as_dict()['root'])函数,这一点就会变得很明显。代码从

开始
def as_dict(self):
if len(self.children) > 1:
return {self.text: [node.as_dict() for node in self.children]}
elif len(self.children) == 1:
return {self.text: self.children[0].as_dict()}
else:
return self.text.split(",") if "," in self.text else self.text

def as_dict(self):
if len(self.children) > 1:
return {self.text: {node.text: node.as_dict() for node in self.children}}
elif len(self.children) == 1:
return {self.text: self.children[0].as_dict()}
else:
return self.text.split(",") if "," in self.text else self.text

,然后我加入了压缩函数

# for merging like sub keys and values
def compress(dictionary):
if isinstance(dictionary, dict):
for k, v in dictionary.items():
if isinstance(v, dict):
if k in v.keys():
dictionary[k] = dictionary[k].pop(k)
compress(dictionary[k])
compress(k)
return dictionary

完整代码如果你把下面的代码放到一个文件中,并从命令行运行它,它应该100%工作。否则,可能是蟒蛇或python版本的问题(尽管看起来不太可能)。

from io import StringIO
import json
# for merging like sub keys and values
def compress(dictionary):
if isinstance(dictionary, dict):
for k, v in dictionary.items():
if isinstance(v, dict):
if k in v.keys():
dictionary[k] = dictionary[k].pop(k)
compress(dictionary[k])
compress(k)
return dictionary
# helper method to convert equals sign to indentation for easier parsing
def convertIndentation(inputString):
indentCount = 0
indentVal = "    "
for position, eachLine in enumerate(inputString):
if "=" not in eachLine:
continue
else:
strSplit = eachLine.split("=", 1)
#get previous indentation
prevIndent = inputString[position].count(indentVal)
newVal = (indentVal * (prevIndent + 1)) + strSplit[1]
inputString[position] = strSplit[0] + 'n'
inputString.insert(position+1, newVal)
flatList = "".join(inputString)
return flatList

# helper class for node usage
class Node:
def __init__(self, indented_line):
self.children = []
self.level = len(indented_line) - len(indented_line.lstrip())
self.text = indented_line.strip()
def add_children(self, nodes):
childlevel = nodes[0].level
while nodes:
node = nodes.pop(0)
if node.level == childlevel: # add node as a child
self.children.append(node)
elif node.level > childlevel: # add nodes as grandchildren of the last child
nodes.insert(0,node)
self.children[-1].add_children(nodes)
elif node.level <= self.level: # this node is a sibling, no more children
nodes.insert(0,node)
return
def as_dict(self):
if len(self.children) > 1:
return {self.text: {node.text: node.as_dict() for node in self.children}}
elif len(self.children) == 1:
return {self.text: self.children[0].as_dict()}
else:
return self.text.split(",") if "," in self.text else self.text
if __name__ == "__main__":
s = """
key1=value1
key2
key2_1=value2_1
key2_2
key2_2_1
key2_2_1_1=value2_2_1_1
key2_3=value2_3_1,value2_3_2,value2_3_3
key3=value3_1,value3_2,value3_3
"""
fh = StringIO(s)
fileContent = fh.readlines()
fileParse = convertIndentation(fileContent)
# convert equals signs to indentation
root = Node('root')
root.add_children([Node(line) for line in fileParse.splitlines() if line.strip()])
d = compress(root.as_dict()['root'])
# this variable is storing the json output
jsonOutput = json.dumps(d, indent=4, sort_keys=False)
f = StringIO(jsonOutput)
# load the "file"
loaded = json.load(f)
print(s)
print(jsonOutput)
print(loaded)

相关内容

  • 没有找到相关文章

最新更新