从JSON数据中检索特定值并生成CSV文件



我正在从api检索此数据,它以JSON格式出现。我只需要数据的某一部分,而忽略所有其他数据。请检查我的输出csv如何最终的csv看起来像这样。我需要结果键,在结果值中我需要id, unid和userHierarchies字段。

{
"apiVersion": "3.0",
"loggedInUser": {
"id": "api@api.com",
"unid": "192",
"userHierarchies": [
{
"hierarchyField": "Project",
"value": "Eli-f"
},
{
"hierarchyField": "Division",
"value": "DDD"
},
{
"hierarchyField": "Site",
"value": "RD02"
},
{
"hierarchyField": "Company",
"value": "Core"
},
{
"hierarchyField": "Department",
"value": "Operations"
}
]

},
"results":[
{
"id":"Random_Company_57",
"unid":"75",
"userHierarchies":[
{
"hierarchyField":"Company",
"value":"ABC Company"
},
{
"hierarchyField":"Department",
"value":"gfds"
},
{
"hierarchyField":"Project",
"value":"JKL-SDFGHJW"
},
{
"hierarchyField":"Division",
"value":"Silver RC"
},
{
"hierarchyField":"Site",
"value":"SQ06"
}
],
"preferredLanguage":"en-AU",
"prefName":"Christmas Bells",

},
{
"id":"xyz.abc@safe.net",
"unid":"98",
"userHierarchies":[
{
"hierarchyField":"Company",
"value":"ABC Company"
},
{
"hierarchyField":"Department",
"value":"PUHJ"
},
{
"hierarchyField":"Project",
"value":"RPOJ-SDFGHJW"
},
{
"hierarchyField":"Division",
"value":"Silver RC"
},
{
"hierarchyField":"Site",
"value":"SQ06"
}
],
"preferredLanguage":"en-AU",
"prefName":"Christmas Bells",

}
]
}

我的输出CSV是这样的:

id,unid,hierarchyField,value
Random_Company_57,75,Company,ABC Company
Random_Company_57,75,Department,gfds
Random_Company_57,75,Project,JKL-SDFGHJW
Random_Company_57,75,Division,Silver RC
Random_Company_57,75,Site,SQ06
xyz.abc@safe.net,98,Company,ABC Company
xyz.abc@safe.net,98,Department,PUHJ
xyz.abc@safe.net,98,Project,RPOJ-SDFGHJW
xyz.abc@safe.net,98,Division,Silver RC

我的python代码是这样的:

import requests
from pathlib import Path
from pprint import pprint
import pandas as pd
import time
import os
import argparse
parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument("-path_save", help="define where to save the file")
parser.add_argument("--verbose", help="display processing information")
start = time.time()
def GetData(URL, endPoint, path_save, verbose):
response = requests.get(URL, auth=('api@api.net', 'uojk00'),
headers={
'Content-Type': 'application/json',
'x-api-key': 'ydVtsni1blwJHb65OJBrrtV',
})
print(endPoint,response)
df = pd.DataFrame(response.json()["results"])
print(df)
df.to_csv(os.path.join(path_save,f"{endPoint}.csv"), index=False)

if __name__ == '__main__':
start = time.time()
args = parser.parse_args()
path_save = Path(args.path_save)
verbose = args.verbose
endPoint=['users']   
for endPt in endPoint:
URL = "https://api.com/v10/chor/" + endPt
GetData(URL, endPt, path_save, verbose)
print("Processed time:", time.time() - start)  # Total Time

任何帮助我如何生成CSV??

如果data是你的数据从api你有在你的问题,你可以使用下一个例子如何将其保存为CSV所需的格式:

df = pd.DataFrame(data["results"]).explode("userHierarchies")
df = pd.concat([df, df.pop("userHierarchies").apply(pd.Series)], axis=1)
df = df[["id", "unid", "hierarchyField", "value"]]
df.to_csv("data.csv", index=False)

保存data.csv:

id,unid,hierarchyField,value
Random_Company_57,75,Company,ABC Company
Random_Company_57,75,Department,gfds
Random_Company_57,75,Project,JKL-SDFGHJW
Random_Company_57,75,Division,Silver RC
Random_Company_57,75,Site,SQ06
xyz.abc@safe.net,98,Company,ABC Company
xyz.abc@safe.net,98,Department,PUHJ
xyz.abc@safe.net,98,Project,RPOJ-SDFGHJW
xyz.abc@safe.net,98,Division,Silver RC
xyz.abc@safe.net,98,Site,SQ06

最新更新