我的数据帧如下。我需要从输入数组类型列中提取值。我该如何在Pyspark中实现这一目标?
None
root
|-- input: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: map (valueContainsNull = true)
| | | |-- key: string
| | | |-- value: double (valueContainsNull = true)
|-- A: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: map (valueContainsNull = true)
| | | |-- key: string
| | | |-- value: double (valueContainsNull = true)
|-- B: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: map (valueContainsNull = true)
| | | |-- key: string
| | | |-- value: double (valueContainsNull = true)
|-- C: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: map (valueContainsNull = true)
| | | |-- key: string
| | | |-- value: double (valueContainsNull = true)
|-- D: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: map (valueContainsNull = true)
| | | |-- key: string
| | | |-- value: double (valueContainsNull = true)
|-- E: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: map (valueContainsNull = true)
| | | |-- key: string
| | | |-- value: double (valueContainsNull = true)
|-- timestamp: array (nullable = true)
| |-- element: map (containsNull = true)
| | |-- key: string
| | |-- value: map (valueContainsNull = true)
| | | |-- key: string
| | | |-- value: double (valueContainsNull = true)
from itertools import chain
df.select('input').rdd.flatMap(lambda x: chain(*(x))).map(lambda x: x.values()).collect()