将列表转换为Dataframe(Spark Scala)



我有一个对象列表,我想将其转换为数据帧

def createDf(list: ListBuffer[TestTO]): DataFrame = {
val structArrLength = 6
val cols: Array[StructField] = new Array[StructField](structArrLength)
cols(0) = StructField("a", StringType, nullable=true)
cols(2) = StructField("b", StringType, nullable=true)
cols(3) = StructField("c", StringType, nullable=true)
cols(4) = StructField("d", DateType, nullable=true)
cols(5) = StructField("e", BooleanType, nullable=true)
val schema = StructType(cols)
val rddMap=sparkSession.sparkContext.parallelize(list)
val rows = rddMap.map {  x => Row(Seq(x))}
val df = sparkSession.createDataFrame(rows, schema)
df.show(false)
}

我收到以下错误:-引起原因:java.lang.RuntimeException:scala.collection.invariable.$colon$colon不是字符串架构的有效外部类型

我是scala和spark世界的新手。如果有人能帮我在spark/scala中将列表转换为数据帧,那将是一个很大的帮助。

谢谢

解决方案:-

def createDf(list: ListBuffer[MaskScheduleTO]): Dataset[Row] = {
val schema = createMaskSchema
var data: ListBuffer[Row] = new ListBuffer[Row]
list.foreach(x => data += Row(x.prop1,x.prop2))
val rddMap = sparkSession.sparkContext.parallelize(data.toList)
sparkSession.createDataFrame(rddMap, schema)
}

private def createMaskSchema = {
StructType(List(
StructField("prop1", StringType, nullable = true),
StructField("prop2", StringType, nullable = true),
))
}

解决方案:-

def createDf(list: ListBuffer[MaskScheduleTO]): Dataset[Row] = {
val schema = createMaskSchema
var data: ListBuffer[Row] = new ListBuffer[Row]
list.foreach(x => data += Row(x.prop1,x.prop2))
val rddMap = sparkSession.sparkContext.parallelize(data.toList)
sparkSession.createDataFrame(rddMap, schema)
}

private def createMaskSchema = {
StructType(List(
StructField("prop1", StringType, nullable = true),
StructField("prop2", StringType, nullable = true),
))
}

最新更新