Python:Json 字符串通过 Pandas 到 csv --> ValueError:将字典与非系列混合可能会导致不明确的排序



你好,我在json字符串到data.frame转换时面临问题。

print (json_resp.text)
{
  "error_code": 0,
  "description": "",
  "img_size": { "w": 650, "h": 488 },
  "people": [
  {
    "age": 22,
    "gender": 84,
    "mood": 29,
    "position": { "x": 190, "y": 161, "w": 259, "h": 259 },
    "rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
    "landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
    "clothingcolors": [  ],
    "ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
    "emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
  }
  ]

但是,当我尝试将json字符串更改为data.frame时,我得到了:

import pandas as pd
df_json = pd.read_json(json_resp.text, typ='frame')
print (df_json)
Traceback (most recent call last):
  File "<input>", line 1, in <module>
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandasiojsonjson.py", line 427, in read_json
    result = json_reader.read()
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandasiojsonjson.py", line 537, in read
    obj = self._get_object_parser(self.data)
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandasiojsonjson.py", line 556, in _get_object_parser
    obj = FrameParser(json, **kwargs).parse()
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandasiojsonjson.py", line 652, in parse
    self._parse_no_numpy()
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandasiojsonjson.py", line 871, in _parse_no_numpy
    loads(json, precise_float=self.precise_float), dtype=None)
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandascoreframe.py", line 392, in __init__
    mgr = init_dict(data, index, columns, dtype=dtype)
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandascoreinternalsconstruction.py", line 212, in init_dict
    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandascoreinternalsconstruction.py", line 51, in arrays_to_mgr
    index = extract_index(arrays)
  File "C:UsersuzytkownikPycharmProjectsFace APIvenvlibsite-packagespandascoreinternalsconstruction.py", line 320, in extract_index
    raise ValueError('Mixing dicts with non-Series may lead to '
ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

我该更改代码以获取简单数据。

json_normalize是您想做的。但是,有一些嵌套列表,这意味着它仅将标准化/平坦到第一级。

我认为这个问题带有Landmarks.MaskPoints,因为它使用2列xy创建了70行。因此,尝试创建一个行,其中包含70行的东西可能是一个问题。

,如果您只是开始尝试拆开/弄平它,您就可以看到我的意思。从本质上讲,要使每个部分都归一化,然后最后将它们全部合并为一行,但是您可以看到MaskPoints的问题。

jsonStr = '''
{
  "error_code": 0,
  "description": "",
  "img_size": { "w": 650, "h": 488 },
  "people": [
  {
    "age": 22,
    "gender": 84,
    "mood": 29,
    "position": { "x": 190, "y": 161, "w": 259, "h": 259 },
    "rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
    "landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
    "clothingcolors": [  ],
    "ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
    "emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
  }
  ]
  }'''
import json
from pandas.io.json import json_normalize
jsonObj = json.loads(jsonStr)
# flatten at 1st level. But still nested lists/dictionaries in column `people`
df_a = json_normalize(jsonObj)
# so flatten out people, and you'll see clothingcolors still has a list and landmarks too
df_people = json_normalize(jsonObj['people'])
df_clothingcolors = json_normalize(jsonObj['people'][0]['clothingcolors'])
df_landmarks = json_normalize(jsonObj['people'][0]['landmarks'])

# the landmarks column still need to flatten maskpoints...but maskpoints produces 70 rows, and there's your issue
df_maskpoints = json_normalize(jsonObj['people'][0]['landmarks']['maskpoints'])

因此,如果您查看这些形状:

print (df_a.shape)
(1, 5)
print (df_people.shape)
(1, 26)
print (df_clothingcolors.shape)
(0, 0)
print (df_landmarks.shape)
(1, 5)
print (df_maskpoints.shape)
(70, 2)

...您看到maskPoints形状为70行。

但是,

我发现这个博客很有用。本质上,它拆开了所有这些嵌套列表,以便您最终得到1个大平台。

jsonStr = '''
{
  "error_code": 0,
  "description": "",
  "img_size": { "w": 650, "h": 488 },
  "people": [
  {
    "age": 22,
    "gender": 84,
    "mood": 29,
    "position": { "x": 190, "y": 161, "w": 259, "h": 259 },
    "rotation": { "yaw": -3, "pitch": 3, "roll": -1 },
    "landmarks": { "lefteye": { "x": 371, "y": 233 }, "righteye": { "x": 266, "y": 236 }, "maskpoints": [ { "x": 371, "y": 233 }, { "x": 266, "y": 236 }, { "x": 203, "y": 234 }, { "x": 206, "y": 261 }, { "x": 212, "y": 287 }, { "x": 220, "y": 313 }, { "x": 233, "y": 338 }, { "x": 250, "y": 357 }, { "x": 273, "y": 373 }, { "x": 296, "y": 388 }, { "x": 321, "y": 394 }, { "x": 346, "y": 390 }, { "x": 371, "y": 377 }, { "x": 396, "y": 362 }, { "x": 416, "y": 341 }, { "x": 430, "y": 315 }, { "x": 437, "y": 287 }, { "x": 444, "y": 258 }, { "x": 448, "y": 227 }, { "x": 215, "y": 221 }, { "x": 226, "y": 203 }, { "x": 247, "y": 195 }, { "x": 269, "y": 198 }, { "x": 291, "y": 204 }, { "x": 336, "y": 201 }, { "x": 360, "y": 193 }, { "x": 385, "y": 190 }, { "x": 408, "y": 198 }, { "x": 423, "y": 216 }, { "x": 314, "y": 231 }, { "x": 314, "y": 247 }, { "x": 314, "y": 263 }, { "x": 314, "y": 279 }, { "x": 294, "y": 297 }, { "x": 304, "y": 300 }, { "x": 315, "y": 302 }, { "x": 327, "y": 299 }, { "x": 338, "y": 297 }, { "x": 242, "y": 236 }, { "x": 254, "y": 226 }, { "x": 271, "y": 226 }, { "x": 284, "y": 240 }, { "x": 270, "y": 245 }, { "x": 253, "y": 244 }, { "x": 349, "y": 238 }, { "x": 362, "y": 224 }, { "x": 379, "y": 224 }, { "x": 393, "y": 234 }, { "x": 381, "y": 242 }, { "x": 363, "y": 242 }, { "x": 281, "y": 332 }, { "x": 294, "y": 327 }, { "x": 306, "y": 322 }, { "x": 315, "y": 325 }, { "x": 325, "y": 323 }, { "x": 340, "y": 328 }, { "x": 357, "y": 335 }, { "x": 341, "y": 347 }, { "x": 327, "y": 354 }, { "x": 317, "y": 354 }, { "x": 306, "y": 353 }, { "x": 294, "y": 347 }, { "x": 289, "y": 333 }, { "x": 306, "y": 331 }, { "x": 316, "y": 332 }, { "x": 325, "y": 331 }, { "x": 349, "y": 334 }, { "x": 326, "y": 339 }, { "x": 316, "y": 340 }, { "x": 306, "y": 339 } ] },
    "clothingcolors": [  ],
    "ethnicity": { "african": 83, "asian": 0, "caucasian": 12, "hispanic": 3 },
    "emotions": { "happiness": 1, "surprise": 5, "anger": 2, "disgust": 2, "fear": 1, "sadness": 11 }
  }
  ]
  }'''

    import json
    from pandas.io.json import json_normalize

    def flatten_json(y):
        out = {}
        def flatten(x, name=''):
            if type(x) is dict:
                for a in x:
                    flatten(x[a], name + a + '_')
            elif type(x) is list:
                i = 0
                for a in x:
                    flatten(a, name + str(i) + '_')
                    i += 1
            else:
                out[name[:-1]] = x
        flatten(y)
        return out
    jsonObj = json.loads(jsonStr)
    flat = flatten_json(jsonObj)
    df = json_normalize(flat)

输出将是您的1行,有168列。

相关内容

最新更新