我需要Python中提出的类似请求的帮助:递归地从路径创建字典,但我的路径也有列表。有些路径也有索引,比如
PATH VALUE
/a/b/c 'Hi'
/a/b/d 1
/a/c/d 1
/b/c/d 1
/a/e[0]/f 1
/a/e[1]/g 2
/b/x/y[1] 'thank'
/b/x/y[2] 'you'
/b/j/b/c 2
/b/j/b/d 1
/a/e[2]/k[0]/s '2D_1'
/a/e[2]/k[1]/s '2D_2'
期望输出我正在寻找的字典如下:
{
"a": {
"b": {
"c": "Hi",
"d": 1
},
"c": {
"d": 1
},
"e": [
{
"f": 1
},
{
"g": 2
},
{
"k": [
{
"s": "2D_1"
},
{
"s": "2D_2"
}
]
}
]
},
"b": {
"c": {
"d": 1
},
"x": {
"y": [
null,
"thank",
"you"
]
},
"j": {
"b": {
"c": 2,
"d": 1
}
}
}
}
注:路径可多于4段(任意数字)。递归的方式来处理这个问题,并从路径和值创建字典?
我尝试了下面的示例代码,但仍然坚持列表处理。
import re
def create_dict(data,path,value):
nodes = path.split('/')
thisdict = data
index = -1
for node in nodes[:-1]:
keyislist = False
if '[' in node :
index = int(re.findall(r"[s*+?(-?d+)s*]", node)[0])
node = node.split('[')[0]
keyislist = True
if (node not in thisdict):
if keyislist:
thisdict[node] = []
elif isinstance(thisdict,dict):
thisdict[node] = {}
if isinstance(thisdict[node],dict):
thisdict = thisdict[node]
elif isinstance(thisdict[node],list):
thisdict[node].insert(index,thisdict[node])
thisdict[nodes[-1]] = value
return data
data = {}
keys = '/a/b/c[0]/d/e/f'
value = 123456
path = keys[1:]
print(create_dict(data,path,value))
print('---------------')
keys = '/a/b/c[1]/d/e/g'
value = 'ABCDEFG'
path = keys[1:]
print(create_dict(data,path,value))
还增加了2个路径。路径k[1]的顺序可以是先k后k[0]。
/a/e[2]/k[1]/s '2D_2'
/a/e[2]/k[0]/s '2D_1'
与链接的代码一样,预期的结果是无效的,因此我对您的意图进行了一两个猜测。
首先,
{'b':
{'c': 'Hi' },
{'d': 1 }
},
是一个语法错误。像这样的键不能有两个值。'b'
必须是一个列表或字典。既然您煞费苦心地在规范中添加了列表,我认为这应该是一个字典。
{'y' : ['thank' , 'you']}
似乎是
期望的一个令人惊讶的结果。/b/x/y[1] 'thank'
/b/x/y[2] 'you'
使用索引1和2。如果你想要原始的结果,使用.append
(混淆!),或者修复输入中的索引(不混淆)。
除此之外,解析新的列表需求包括使用正则表达式挑选索引和元素,并使用索引和键进入下一级嵌套。
import json
import re
def add_path(d, path, val):
path = path.split("/")[1:]
for i, e in enumerate(path):
if re.search(r".?[d+]$", e):
e, idx = re.fullmatch(r"(.+)[(d+)]", e).groups()
idx = int(idx)
if e not in d:
d[e] = [None] * (idx + 1)
elif len(d[e]) <= idx:
d[e] += [None] * (idx - len(d[e]) + 1)
if i == len(path) - 1:
d[e][idx] = val
elif not d[e][idx]:
d[e][idx] = {}
d = d[e][idx]
else:
if i == len(path) - 1:
d[e] = val
else:
if e not in d:
d[e] = {}
d = d[e]
if __name__ == "__main__":
data = """
/a/b/c 'Hi'
/a/b/d 1
/a/c/d 1
/b/c/d 1
/a/e[0]/f 1
/a/e[1]/g 2
/b/x/y[1] 'thank'
/b/x/y[2] 'you'
/b/j/b/c 2
/b/j/b/d 1
"""
d = {}
def clean(x):
try:
return int(x)
except ValueError:
return x.strip(" '")
for path, val in [[clean(x) for x in re.split(r"s{4,}", x)][1:]
for x in data.split("n") if x.strip()]:
add_path(d, path, val)
print(json.dumps(d, indent=4))
输出:
{
"a": {
"b": {
"c": "Hi",
"d": 1
},
"c": {
"d": 1
},
"e": [
{
"f": 1
},
{
"g": 2
}
]
},
"b": {
"c": {
"d": 1
},
"x": {
"y": [
null,
"thank",
"you"
]
},
"j": {
"b": {
"c": 2,
"d": 1
}
}
}
}
清理这段代码留给读者作为练习。
哈哈。和@ gorlen的很像。
import re
import pprint
class Parser(object):
def __init__(self):
self.index_pattern = re.compile(r'([^[]*)[(d+)]')
def Add(self, tree, path, value):
for seg in path[:-1]:
match = self.index_pattern.fullmatch(seg)
if match:
lst, ix = self.AddList(match, tree, dict)
tree = lst[ix]
else:
node = tree[seg] if seg in tree else {}
tree[seg] = node
tree = node
match = self.index_pattern.fullmatch(path[-1])
if match:
lst, ix = self.AddList(match, tree, lambda: None)
lst[ix] = value
else:
tree[path[-1]] = value
@staticmethod
def AddList(match, tree, ctor):
name = match.group(1)
ix = int(match.group(2))
lst = tree[name] if name in tree else []
lst.extend(ctor() for i in range(ix - len(lst) + 1))
tree[name] = lst
return lst, ix
def Process(self, data):
tree = {}
for path, value in data.items():
self.Add(tree, path.split('/')[1:], value)
return tree
def Run():
data = {
'/a/b/c': 'Hi',
'/a/b/d': 1,
'/a/c/d': 1,
'/b/c/d': 1,
'/a/e[0]/f': 1,
'/a/e[1]/g': 2,
'/b/x/y[1]': 'thank',
'/b/x/y[2]': 'you',
'/b/j/b/c': 2,
'/b/j/b/d': 1,
}
pprint.pprint(Parser().Process(data))
Run()
输出:
{'a': {'b': {'c': 'Hi', 'd': 1}, 'c': {'d': 1}, 'e': [{'f': 1}, {'g': 2}]},
'b': {'c': {'d': 1},
'j': {'b': {'c': 2, 'd': 1}},
'x': {'y': [None, 'thank', 'you']}}}