Scalafix中的准引号

下面是Spark 2.4使用unionAll的代码

import org.apache.spark.sql.{DataFrame, Dataset}
object UnionRewrite {
def inSource(
df1: DataFrame,
df2: DataFrame,
df3: DataFrame,
ds1: Dataset[String],
ds2: Dataset[String]
): Unit = {
val res1 = df1.unionAll(df2)
val res2 = df1.unionAll(df2).unionAll(df3)
val res3 = Seq(df1, df2, df3).reduce(_ unionAll _)
val res4 = ds1.unionAll(ds2)
val res5 = Seq(ds1, ds2).reduce(_ unionAll _)
}
}

在Spark 3。+unionAll已弃用。下面是使用union

的等效代码

import org.apache.spark.sql.{DataFrame, Dataset}
object UnionRewrite {
def inSource(
df1: DataFrame,
df2: DataFrame,
df3: DataFrame,
ds1: Dataset[String],
ds2: Dataset[String]
): Unit = {
val res1 = df1.union(df2)
val res2 = df1.union(df2).union(df3)
val res3 = Seq(df1, df2, df3).reduce(_ union _)
val res4 = ds1.union(ds2)
val res5 = Seq(ds1, ds2).reduce(_ union _)
}
}

问题是如何写一个Scalafix规则(使用准引号)替换unionAll与union?

没有准引号，我实现了这个规则，它是工作的

override def fix(implicit doc: SemanticDocument): Patch = {
def matchOnTree(t: Tree): Patch = {
t.collect {
case Term.Apply(
Term.Select(_, deprecated @ Term.Name(name)),
_
) if config.deprecatedMethod.contains(name) =>
Patch.replaceTree(
deprecated,
config.deprecatedMethod(name)
)
case Term.Apply(
Term.Select(_, _ @Term.Name(name)),
List(
Term.AnonymousFunction(
Term.ApplyInfix(
_,
deprecatedAnm @ Term.Name(nameAnm),
_,
_
)
)
)
) if "reduce".contains(name) && config.deprecatedMethod.contains(nameAnm) =>
Patch.replaceTree(
deprecatedAnm,
config.deprecatedMethod(nameAnm)
)
}.asPatch
}
matchOnTree(doc.tree)
}

Ver: 1

package fix
import scalafix.v1._
import scala.meta._
class RuleQuasiquotesUnionAll extends SemanticRule("RuleQuasiquotesUnionAll") {
override val description =
"""Quasiquotes in Scalafix. Replacing unionAll with union"""
override val isRewrite = true
override def fix(implicit doc: SemanticDocument): Patch = {
def matchOnTree(t: Tree): Patch = {
t.collect { case tt: Term =>
tt match {
case q"""unionAll""" =>
Patch.replaceTree(tt, """union""")
case _ => Patch.empty
}
}.asPatch
}
matchOnTree(doc.tree)
}
}

版本2:

package fix
import scalafix.v1._
import scala.meta._
class UnionRewriteWithCheckType
extends SemanticRule("UnionRewriteWithCheckType") {
override val description = {
"""Replacing unionAll with union only forch Dataset and DataFrame"""
// TODO: added type(s) to config
}
override val isRewrite = true
override def fix(implicit doc: SemanticDocument): Patch = {
def isDatasetDataFrame(
tp: String,
q: Term,
a: List[Term]
): Boolean = {
if (a.nonEmpty) {
if (q.toString().indexOf("unionAll") >= 0 && tp == "DataFrame") {
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// When val res: Dataset[Row]= DataFrame1.unionAll(DataFrame2) !!
// !!!!! result type Dataset[Row] !!!!!                        !!
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
(q.symbol.info.get.signature.toString().indexOf("Dataset") >= 0)
.equals(true) &&
(a.head.symbol.info.get.signature.toString().indexOf(tp) >= 0)
.equals(true)
} else
(q.symbol.info.get.signature.toString().indexOf(tp) >= 0)
.equals(true) &&
(a.head.symbol.info.get.signature.toString().indexOf(tp) >= 0)
.equals(true)
} else false
}
def matchOnTree(t: Tree): Patch = {
t collect {
case meth @ Defn.Def(a1, a2, a3, a4, a5, a6) =>
a6.collect {
case ta @ Term.Apply(
Term.Select(qual, trm @ q"""unionAll"""),
args
) =>
if (
isDatasetDataFrame(
"DataFrame",
qual,
args
) || isDatasetDataFrame("Dataset", qual, args)
) {
Patch.replaceTree(
trm,
"""union"""
)
} else Patch.empty
case tasr @ Term.Apply(
Term.Select(qual, tnr @ q"""reduce"""),
args @ List(
Term.AnonymousFunction(
Term.ApplyInfix(_, op @ q"""unionAll""", _, _)
)
)
) =>
if (
qual.symbol.info.get.signature
.toString()
.indexOf("Dataset") >= 0 || qual.symbol.info.get.signature
.toString()
.indexOf("DataFrame") >= 0
) Patch.replaceTree(op, """union""")
else Patch.empty
case _ => Patch.empty
}.asPatch
case _ => Patch.empty
}
}.asPatch
matchOnTree(doc.tree)
}
}

enter code here

回答Dmytro Mitin

检查1。当我们使用Slick

def inSourceSlickUnionAll(): Unit = {
case class Coffee(name: String, price: Double)
class Coffees(tag: Tag) extends Table[(String, Double)](tag, "COFFEES") {
def name = column[String]("COF_NAME")
def price = column[Double]("PRICE")
def * = (name, price)
}
val coffees = TableQuery[Coffees]
val q1 = coffees.filter(_.price < 8.0)
val q2 = coffees.filter(_.price > 9.0)
val unionQuery = q1 union q2
val unionAllQuery = q1 unionAll q2
val unionAllQuery1 = q1 ++ q2
}

Result your rule

=======
=> Diff
=======
--- obtained
+++ expected
@@ -82,3 +82,3 @@
val unionQuery = q1 union q2
-    val unionAllQuery = q1 union q2
+    val unionAllQuery = q1 unionAll q2
val unionAllQuery1 = q1 ++ q2

试试

override def fix(implicit doc: SemanticDocument): Patch = {
def isDatasetSubtype(expr: Tree): Boolean =
expr.symbol.info.flatMap(_.signature match {
case ValueSignature(tpe)        => Some(tpe)
case MethodSignature(_, _, tpe) => Some(tpe)
case _                          => None
}) match {
case Some(TypeRef(_, symbol, _)) =>
Seq("package.DataFrame", "Dataset")
.map(tp => Symbol(s"org/apache/spark/sql/$tp#"))
.contains(symbol)
case _ => false
}
def mkPatch(ename: Tree): Patch = Patch.replaceTree(ename, "union")
def matchOnTree(t: Tree): Patch =
t.collect {
case q"$expr.${ename@q"unionAll"}($expr1)" if isDatasetSubtype(expr) =>
mkPatch(ename)
// infix application
case q"$expr ${ename@q"unionAll"} $expr1" /*if isDatasetSubtype(expr)*/ =>
mkPatch(ename)
}.asPatch
matchOnTree(doc.tree)
}

它转换

import org.apache.spark.sql.{DataFrame, Dataset}
object UnionRewrite {
def inSource(
df1: DataFrame,
df2: DataFrame,
df3: DataFrame,
ds1: Dataset[String],
ds2: Dataset[String]
): Unit = {
val res1 = df1.unionAll(df2)
val res2 = df1.unionAll(df2).unionAll(df3)
val res3 = Seq(df1, df2, df3).reduce(_ unionAll _)
val res4 = ds1.unionAll(ds2)
val res5 = Seq(ds1, ds2).reduce(_ unionAll _)
val res6 = Seq(ds1, ds2).reduce(_ unionAll (_))
val unionAll = 42
}
}

到

import org.apache.spark.sql.{DataFrame, Dataset}
object UnionRewrite {
def inSource(
df1: DataFrame,
df2: DataFrame,
df3: DataFrame,
ds1: Dataset[String],
ds2: Dataset[String]
): Unit = {
val res1 = df1.union(df2)
val res2 = df1.union(df2).union(df3)
val res3 = Seq(df1, df2, df3).reduce(_ union _)
val res4 = ds1.union(ds2)
val res5 = Seq(ds1, ds2).reduce(_ union _)
val res6 = Seq(ds1, ds2).reduce(_ union (_))
val unionAll = 42
}
}

https://scalacenter.github.io/scalafix/docs/developers/setup.html

https://scalameta.org/docs/trees/quasiquotes.html

https://scalameta.org/docs/semanticdb/guide.html

您的Ver: 1实现错误地将val unionAll = 42转换为val union = 42。

遗憾的是，不能为中缀应用程序检查<: Dataset[_]，因为在这种情况下，SemanticDB似乎没有类型信息(lambda中的下划线_)。这似乎是语义数据库的限制。如果在这种情况下你真的需要子类型检查，那么你可能需要一个编译器插件。

更新。我们可以使用多个规则:首先应用将下划线lambdas替换为参数lambdas

的规则

override def fix(implicit doc: SemanticDocument): Patch = {
def matchOnTree(t: Tree): Patch =
t.collect {
case t1@q"_.unionAll(_)" =>
Patch.replaceTree(t1, "(x, y) => x.unionAll(y)")
case t1@q"_ unionAll _" =>
Patch.replaceTree(t1, "(x, y) => x unionAll y")
}.asPatch
matchOnTree(doc.tree)
}

然后重新编译代码(将生成新的.semanticdb文件)，应用第二条规则将unionAll替换为union(如果类型对应)

override def fix(implicit doc: SemanticDocument): Patch = {
def isDatasetSubtype(expr: Tree): Boolean = {
expr.symbol.info.flatMap(_.signature match {
case ValueSignature(tpe)        => Some(tpe)
case MethodSignature(_, _, tpe) => Some(tpe)
case _                          => None
}) match {
case Some(TypeRef(_, symbol, _)) =>
Seq("package.DataFrame", "Dataset")
.map(tp => Symbol(s"org/apache/spark/sql/$tp#"))
.contains(symbol)
case _ => false
}
}
def mkPatch(ename: Tree): Patch = Patch.replaceTree(ename, "union")
def matchOnTree(t: Tree): Patch =
t.collect {
case q"$expr.${ename@q"unionAll"}($_)" if isDatasetSubtype(expr) =>
mkPatch(ename)
case q"$expr ${ename@q"unionAll"} $_" if isDatasetSubtype(expr) =>
mkPatch(ename)
}.asPatch
matchOnTree(doc.tree)
}

则应用第三条规则将参数lambdas替换为下划线lambdas

override def fix(implicit doc: SemanticDocument): Patch = {
def matchOnTree(t: Tree): Patch =
t.collect {
case t1@q"(x, y) => x.union(y)" =>
Patch.replaceTree(t1, "_.union(_)")
case t1@q"(x, y) => x union y" =>
Patch.replaceTree(t1, "_ union _")
}.asPatch
matchOnTree(doc.tree)
}

第一条和第三条规则可以是语法性的。

相关内容

最新更新

热门标签：