我试图查询一个mongo数据库,并根据数据库中的一组两个键返回一个JSON键的所有值。我在下面的尝试,它返回
KeyError: ('test1', 'test2')
def distros(
mongo_uri: str,
mongo_source_db: str,
mongo_source_collection: str) -> Dict[str, List[str]]:
distros_per_param= OrderedDict()
mongo = pymongo.MongoClient(mongo_uri)
db = mongo.get_database(mongo_source_db)
col = db.get_collection(mongo_source_collection)
query = {}
total = col.count_documents(query)
cursor = col.find(query)
for doc in tqdm(cursor, total=total, desc='distributions'):
param_one = doc['param_one']
param_two = doc['param_two']
if param_type not in distributions_per_type:
distributions_per_param[param_one] = set()
if param_value not in distributions_per_type:
distributions_per_param[param_two] = set()
distro_value = str(doc['distro']).strip().lower()
if distro_value:
distributions_per_param[param_type, param_value].add(distro_value)
print(doc)
index = {k: sorted(v) for k, v in distributions_per_param.items()}
return index
数据是从mongo 查询的文档的json列表
sample_data = [{'param_one': 'x1', 'param_one': 'y2', 'distro': 'test1'},
{'param_one': 'x1', 'param_one': 'y2', 'distro': 'test2'},
{'param_one': 'x2', 'param_one': 'y1', 'distro': 'test3'},
{'param_one': 'x2', 'param_one': 'y1', 'distro': 'test4'}]
我需要得到的数据看起来像
result = [{'x1, y2': ['test1','test2']},
{'x2, y1': ['test3','test4']}]
我假设您在sample_data
中拼写错误,y{1/2}
的关键字是param_two
而不是param_one
(关键字在字典中必须是唯一的,并且param_one
已经用于x{1/2}
(
创建sample_data
变量后,您可以通过执行以下操作根据param_one
/param_two
对值进行分组:
from collections import defaultdict
def groupby_params(sample_data):
results = defaultdict(list)
for s in sample_data:
results[f"{s['param_one']}, {s['param_two']}"].append(s["distro"])
return [{k: v} for k, v in results.items()]
这将为您提供预期的输出:
sample_data = [{'param_one': 'x1', 'param_two': 'y2', 'distro': 'test1'},
{'param_one': 'x1', 'param_two': 'y2', 'distro': 'test2'},
{'param_one': 'x2', 'param_two': 'y1', 'distro': 'test3'},
{'param_one': 'x2', 'param_two': 'y1', 'distro': 'test4'}]
groupby_params(sample_data)
[{'x1, y2': ['test1', 'test2']}, {'x2, y1': ['test3', 'test4']}]