我需要将数百行的CSV转换为JSON,这样我就需要重复键值。
以下是我用来将其转换为当前状态的内容
#Quest 1
import csv
import json
def make_json(csvFilePath, jsonFilePath):
# create a dictionary
data = {}
# Open a csv reader called DictReader
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
# Convert each row into a dictionary
# and add it to data
for rows in csvReader:
# Assuming a column named 'No' to
# be the primary key
key = rows['MMSA']
data[key] = rows
# Open a json writer, and use the json.dumps()
# function to dump data
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath = r'/home/user/Downloads/mmsa-icu-beds.csv'
jsonFilePath = r'/home/user/Downloads/mmsa-icu-beds.json'
make_json(csvFilePath, jsonFilePath)
我的CSV文件是:当前csv
我现在得到的是这个JSON格式的
{"MMSA": "Manhattan, KS", "total_percent_at_risk": "47.29%", "high_risk_per_ICU_bed": "4489.84875", "high_risk_per_hospital": "8979.6975", "icu_beds": "8", "hospitals": "4", "total_at_risk": "35918.79"}
{"MMSA": "Hilton Head Island-Bluffton-Beaufort, SC", "total_percent_at_risk": "62.72%", "high_risk_per_ICU_bed": "3904.163571", "high_risk_per_hospital": "36438.86", "icu_beds": "28", "hospitals": "3", "total_at_risk": "109316.58"}
{"MMSA": "Kahului-Wailuku-Lahaina, HI", "total_percent_at_risk": "59.13%", "high_risk_per_ICU_bed": "3860.557", "high_risk_per_hospital": "19302.785", "icu_beds": "20", "hospitals": "4", "total_at_risk": "77211.14"}
{"MMSA": "Spartanburg, SC", "total_percent_at_risk": "66.12%", "high_risk_per_ICU_bed": "3786.115556", "high_risk_per_hospital": "85187.6", "icu_beds": "45", "hospitals": "2", "total_at_risk": "170375.2"}
{"MMSA": "Baton Rouge, LA", "total_percent_at_risk": "66.60%", "high_risk_per_ICU_bed": "3459.7325", "high_risk_per_hospital": "39000.62091", "icu_beds": "124", "hospitals": "11", "total_at_risk": "429006.83"}
{"MMSA": "Rockingham County-Strafford County, NH, Metropolitan Division", "total_percent_at_risk": "57.72%", "high_risk_per_ICU_bed": "3365.052", "high_risk_per_hospital": "40380.624", "icu_beds": "60", "hospitals": "5", "total_at_risk": "201903.12"}
{"MMSA": "Salisbury, MD-DE", "total_percent_at_risk": "68.32%", "high_risk_per_ICU_bed": "3292.271176", "high_risk_per_hospital": "37312.40667", "icu_beds": "68", "hospitals": "6", "total_at_risk": "223874.44"}
应为JSON格式:
{"MMSA": "Manhattan, KS", "Manhattan, KS total_percent_at_risk": "47.29%", "Manhattan, KS high_risk_per_ICU_bed": "4489.84875", "Manhattan, KS high_risk_per_hospital": "8979.6975", "Manhattan, KS icu_beds": "8", "Manhattan, KS hospitals": "4", "Manhattan, KS total_at_risk": "35918.79"}
您需要获取dicts列表中每个项的MMSA键的值,然后在每个键f'{row["MMSA"]} {k}': v
的开头构造一个具有相同值的字典。这应该对你有用:
import json
# data retuned by your csv
csv_data = [
{"MMSA": "Manhattan, KS", "total_percent_at_risk": "47.29%", "high_risk_per_ICU_bed": "4489.84875",
"high_risk_per_hospital": "8979.6975", "icu_beds": "8", "hospitals": "4", "total_at_risk": "35918.79"},
{"MMSA": "Hilton Head Island-Bluffton-Beaufort, SC", "total_percent_at_risk": "62.72%", "high_risk_per_ICU_bed": "3904.163571",
"high_risk_per_hospital": "36438.86", "icu_beds": "28", "hospitals": "3", "total_at_risk": "109316.58"},
{"MMSA": "Kahului-Wailuku-Lahaina, HI", "total_percent_at_risk": "59.13%", "high_risk_per_ICU_bed": "3860.557",
"high_risk_per_hospital": "19302.785", "icu_beds": "20", "hospitals": "4", "total_at_risk": "77211.14"},
{"MMSA": "Spartanburg, SC", "total_percent_at_risk": "66.12%", "high_risk_per_ICU_bed": "3786.115556",
"high_risk_per_hospital": "85187.6", "icu_beds": "45", "hospitals": "2", "total_at_risk": "170375.2"},
{"MMSA": "Baton Rouge, LA", "total_percent_at_risk": "66.60%", "high_risk_per_ICU_bed": "3459.7325",
"high_risk_per_hospital": "39000.62091", "icu_beds": "124", "hospitals": "11", "total_at_risk": "429006.83"},
{"MMSA": "Rockingham County-Strafford County, NH, Metropolitan Division", "total_percent_at_risk": "57.72%",
"high_risk_per_ICU_bed": "3365.052", "high_risk_per_hospital": "40380.624", "icu_beds": "60", "hospitals": "5", "total_at_risk": "201903.12"},
{"MMSA": "Salisbury, MD-DE", "total_percent_at_risk": "68.32%", "high_risk_per_ICU_bed": "3292.271176",
"high_risk_per_hospital": "37312.40667", "icu_beds": "68", "hospitals": "6", "total_at_risk": "223874.44"},
]
# super fancy comprehension with unpacking
json_data = [{'MMSA': row['MMSA'], # keep the MMSA key
**{f'{row["MMSA"]} {k}': v # combine with the other keys and add the MMSA value
for k, v in row.items()
if k != 'MMSA'}}
for row in csv_data]
def write_json(path, data, indent=4):
with open(path, 'w') as file:
json.dump(data, file, indent=indent)
write_json('./mmsa-icu-beds.json', json_data)
json输出:
[
{
"MMSA": "Manhattan, KS",
"Manhattan, KS total_percent_at_risk": "47.29%",
"Manhattan, KS high_risk_per_ICU_bed": "4489.84875",
"Manhattan, KS high_risk_per_hospital": "8979.6975",
"Manhattan, KS icu_beds": "8",
"Manhattan, KS hospitals": "4",
"Manhattan, KS total_at_risk": "35918.79"
},
{
"MMSA": "Hilton Head Island-Bluffton-Beaufort, SC",
"Hilton Head Island-Bluffton-Beaufort, SC total_percent_at_risk": "62.72%",
"Hilton Head Island-Bluffton-Beaufort, SC high_risk_per_ICU_bed": "3904.163571",
"Hilton Head Island-Bluffton-Beaufort, SC high_risk_per_hospital": "36438.86",
"Hilton Head Island-Bluffton-Beaufort, SC icu_beds": "28",
"Hilton Head Island-Bluffton-Beaufort, SC hospitals": "3",
"Hilton Head Island-Bluffton-Beaufort, SC total_at_risk": "109316.58"
},
{
"MMSA": "Kahului-Wailuku-Lahaina, HI",
"Kahului-Wailuku-Lahaina, HI total_percent_at_risk": "59.13%",
"Kahului-Wailuku-Lahaina, HI high_risk_per_ICU_bed": "3860.557",
"Kahului-Wailuku-Lahaina, HI high_risk_per_hospital": "19302.785",
"Kahului-Wailuku-Lahaina, HI icu_beds": "20",
"Kahului-Wailuku-Lahaina, HI hospitals": "4",
"Kahului-Wailuku-Lahaina, HI total_at_risk": "77211.14"
},
{
"MMSA": "Spartanburg, SC",
"Spartanburg, SC total_percent_at_risk": "66.12%",
"Spartanburg, SC high_risk_per_ICU_bed": "3786.115556",
"Spartanburg, SC high_risk_per_hospital": "85187.6",
"Spartanburg, SC icu_beds": "45",
"Spartanburg, SC hospitals": "2",
"Spartanburg, SC total_at_risk": "170375.2"
},
{
"MMSA": "Baton Rouge, LA",
"Baton Rouge, LA total_percent_at_risk": "66.60%",
"Baton Rouge, LA high_risk_per_ICU_bed": "3459.7325",
"Baton Rouge, LA high_risk_per_hospital": "39000.62091",
"Baton Rouge, LA icu_beds": "124",
"Baton Rouge, LA hospitals": "11",
"Baton Rouge, LA total_at_risk": "429006.83"
},
{
"MMSA": "Rockingham County-Strafford County, NH, Metropolitan Division",
"Rockingham County-Strafford County, NH, Metropolitan Division total_percent_at_risk": "57.72%",
"Rockingham County-Strafford County, NH, Metropolitan Division high_risk_per_ICU_bed": "3365.052",
"Rockingham County-Strafford County, NH, Metropolitan Division high_risk_per_hospital": "40380.624",
"Rockingham County-Strafford County, NH, Metropolitan Division icu_beds": "60",
"Rockingham County-Strafford County, NH, Metropolitan Division hospitals": "5",
"Rockingham County-Strafford County, NH, Metropolitan Division total_at_risk": "201903.12"
},
{
"MMSA": "Salisbury, MD-DE",
"Salisbury, MD-DE total_percent_at_risk": "68.32%",
"Salisbury, MD-DE high_risk_per_ICU_bed": "3292.271176",
"Salisbury, MD-DE high_risk_per_hospital": "37312.40667",
"Salisbury, MD-DE icu_beds": "68",
"Salisbury, MD-DE hospitals": "6",
"Salisbury, MD-DE total_at_risk": "223874.44"
}
]
您成功地将csv转换为以列标题为键的dict。在下一步中,您可以基于";MMSA";。
你的问题归结为";如何基于其值之一来更新dict密钥";。
这里有一种方法:
first_step = {"MMSA": "Manhattan, KS", "total_percent_at_risk": "47.29%", "high_risk_per_ICU_bed": "4489.84875", "high_risk_per_hospital": "8979.6975", "icu_beds": "8", "hospitals": "4", "total_at_risk": "35918.79"}
mmsa = first_step["MMSA"]
def convert_key(key):
if key == "MMSA":
return key
else:
return mmsa + " " + key
second_step = {convert_key(key): value for key, value in first_step.items()}
>>>> {'MMSA': 'Manhattan, KS', 'Manhattan, KS total_percent_at_risk': '47.29%', 'Manhattan, KS high_risk_per_ICU_bed': '4489.84875', 'Manhattan, KS high_risk_per_hospital': '8979.6975', 'Manhattan, KS icu_beds': '8', 'Manhattan, KS hospitals': '4', 'Manhattan, KS total_at_risk': '35918.79'}