Python:如何正确地将没有头的CSV转换为JSON



我必须将许多CSV文件转换为JSON文件。正如您在下面看到的,没有标题。我已经设法为此创建了代码:

import gspread
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload

import csv
import json
import pandas as pd
from pathlib import Path

#Odczyt pliku CSV
def read_CSV(file, json_file):
csv_rows = []
with open(file) as csvfile:
reader = csv.DictReader(csvfile)
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
convert_write_json(csv_rows, json_file) #definicja funkcji ponizej
#Zamiana CSV na JSON
def convert_write_json(data, json_file):
with open(json_file, "w") as f:
f.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '))) 
f.write(json.dumps(data))
#pętla w folderze
pliki = "/users/user/CSVtoGD/"
files = Path(pliki).glob('*.csv') 
for f in files:
read_CSV(f, str(f.with_suffix('.json'))) 

csv如下所示:


Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Bold_0.ttf,Roboto,Bold,Roboto Bold,Roboto-Bold,Version 2.137; 2017,Roboto Bold,Google,Christian Robertson,Google.com,Roboto is a trademark of Google.,Copyright 2011 Google Inc. All Rights Reserved.,http://www.apache.org/licenses/LICENSE-2.0,,GOOG
Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Medium_0.ttf,Roboto Medium,Regular,Roboto Medium,Roboto-Medium,Roboto,,Version 2.137; 2017,Roboto Medium,Google,Christian Robertson,Google.com,Roboto is a trademark of Google.,Copyright 2011 Google Inc. All Rights Reserved.,http://www.apache.org/licenses/LICENSE-2.0,,GOOG
(...)

但结果并不十分壮观:

[
{
"Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Bold_0.ttf": "Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Medium_0.ttf",
"Roboto": "Roboto Medium",
"Bold": "Regular",
"Roboto Bold": "",
"Roboto-Bold": "Roboto-Medium",
"Version 2.137; 2017": "Roboto",
"Google": "Version 2.137; 2017",
"Christian Robertson": "Roboto Medium",
"Google.com": "Google",
"Roboto is a trademark of Google.": "Christian Robertson",
"Copyright 2011 Google Inc. All Rights Reserved.": "Google.com",
"http://www.apache.org/licenses/LICENSE-2.0": "Roboto is a trademark of Google.",
"": "Copyright 2011 Google Inc. All Rights Reserved.",
"GOOG": "http://www.apache.org/licenses/LICENSE-2.0"
}
][{"Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Bold_0.ttf": "Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Medium_0.ttf", "Roboto": "Roboto Medium", "Bold": "Regular", "Roboto Bold": "", "Roboto-Bold": "Roboto-Medium", "Version 2.137; 2017": "Roboto", "Google": "Version 2.137; 2017", "Christian Robertson": "Roboto Medium", "Google.com": "Google", "Roboto is a trademark of Google.": "Christian Robertson", "Copyright 2011 Google Inc. All Rights Reserved.": "Google.com", "http://www.apache.org/licenses/LICENSE-2.0": "Roboto is a trademark of Google.", "": "Copyright 2011 Google Inc. All Rights Reserved.", "GOOG": "http://www.apache.org/licenses/LICENSE-2.0"}]

我不知道如何排序JSON文件中的信息。如您所见,csv只是每行中一个TTF的数据。(如家庭、供应商等)

编辑:我想要的结果是这样的:

[
{
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Family": "Orange Kid",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Subfamily": "Regular",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Full name": "OrangeKid-Regular",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:PostScript name": "OrangeKid-Regular",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred family": "Orange Kid",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Preferred subfamily": "Regular",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Mac font menu name": "OrangeKid-Regular",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Version": "OTF 4.000;PS 001.001;Core 1.0.29",
"Assets/HighroadEngine/MMTools/Achievements/Fonts/orange kid.ttf:Unique ID": ""
}
]
[
{
"nextfont.ttf:Family": "Orange Kid",
"nextfont:Subfamily": "Regular",
"nextfont.ttf:Full name": "OrangeKid-Regular",
"nextfont.ttf:PostScript name": "OrangeKid-Regular",
(...)
}
]

我不知道你期望什么结果,但你可以简单地添加自己的列表与标题

fields = ["Family", "Vendor", "Other1", "Other2", "Other3", ...]

不是

field = reader.fieldnames

最小工作代码。

我使用io只在内存中创建文件,所以每个人都可以简单地复制和删除它,但你应该使用open()

我发现只有一个问题。您的示例行有一些值在不同的列-即。版本。

data = '''Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Bold_0.ttf,Roboto,Bold,Roboto Bold,Roboto-Bold,Version 2.137; 2017,Roboto Bold,Google,Christian Robertson,Google.com,Roboto is a trademark of Google.,Copyright 2011 Google Inc. All Rights Reserved.,http://www.apache.org/licenses/LICENSE-2.0,,GOOG
Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Medium_0.ttf,Roboto Medium,Regular,Roboto Medium,Roboto-Medium,Version 2.137; 2017,Roboto Medium,Google,Christian Robertson,Google.com,Roboto is a trademark of Google.,Copyright 2011 Google Inc. All Rights Reserved.,http://www.apache.org/licenses/LICENSE-2.0,,GOOG
'''
import csv
import json
import io
# --- load CVS ---
input_filename  = 'input.csv'
output_filename = 'output.json'
fields = ["Family", "Vendor", "Other1", "Other2", "Other3", "Other4", "Other5", "Other6", "Other7"]
csv_rows = []
#with open(input_filename) as fh_in:
with io.StringIO(data) as fh_in:
reader = csv.reader(fh_in)
for row in reader:
dictionary = dict(zip(fields, row))
csv_rows.append(dictionary)
# --- write JSON in file ---
with open(output_filename, "w") as fh_out:
json.dump(csv_rows, fh_out, indent=4)
# --- display JSON ---
print(json.dumps(csv_rows, indent=4))

结果:

[
{
"Family": "Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Bold_0.ttf",
"Vendor": "Roboto",
"Other1": "Bold",
"Other2": "Roboto Bold",
"Other3": "Roboto-Bold",
"Other4": "Version 2.137; 2017",
"Other5": "Roboto Bold",
"Other6": "Google",
"Other7": "Christian Robertson"
},
{
"Family": "Assets/Lakeside/Resources/Graphics/font_ttf/Roboto-Medium_0.ttf",
"Vendor": "Roboto Medium",
"Other1": "Regular",
"Other2": "Roboto Medium",
"Other3": "Roboto-Medium",
"Other4": "Version 2.137; 2017",
"Other5": "Roboto Medium",
"Other6": "Google",
"Other7": "Christian Robertson"
}
]