从orc文件中获取表DDL的最简单方法是什么



使用spark我可以做例如:

spark.read.orc("/path/to/file").printSchema

但我想在hive中得到类似show create table的输出。有可能吗?

这应该可以处理大多数情况(如果需要,根据您的具体情况进行定制(:

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{ArrayType, BooleanType, DoubleType, IntegerType, LongType, StringType, StructField}
object Main {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local[*]").getOrCreate()
val types = spark.read.orc("/path/to/orc/orc_file.orc").schema
println("CREATE EXTERNAL TABLE name (")
types.foreach {
//case (name, typ) => println("    " + name + " " + getType(typ))
case StructField(name, dataType, nullable, metadata) =>
println("  " + name.toLowerCase + " " + getType(dataType) + ",")
}
println(")")
}
def getType(typ: Any): String = {
typ match {
case StringType => "string"
case IntegerType => "int"
case DoubleType => "double"
case LongType => "bigint"
case BooleanType => "boolean"
case ArrayType(elementType, containsNull) => "array<" + getType(elementType) + ">"
case StructField(name, dataType, nullable, metadata) => s"${name.toLowerCase}:${getType(dataType)}"
case seq: Seq[StructField] => "struct<" + seq.map(e => getType(e)).mkString(",") + ">"
}
}
}

最新更新