类型错误:'JavaPackage'对象不可调用 |将 Java 11 用于 Spark 3.3.0、Sparknlp 4.0.1 和 Sparknlp jar 来自 Spark-NLP-m1_2



spark nlp jar,我从https://jar-download.com/artifacts/com.johnsnowlabs.nlp/spark-nlp-m1_2.12/4.0.1/source-code得到的

JAVA_HOME = C:Program FilesJava jdk-18.0.1.1在系统变量和用户管理变量中。

"

import pyspark
from pyspark import SparkContext
from pyspark.sql import SparkSession
import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

sc = SparkContext(master='local[2]')
spark = SparkSession.builder.appName('test')
.config("spark.jars", "/Users/Admin/Anaconda3/Lib/site-packages/sparknlp/lib/jar_files.jar")
.config("spark.driver.extraClassPath", "/Users/Admin/Anaconda3/Lib/site-packages/sparknlp/lib/jar_files.jar")
.config("spark.executor.extraClassPath", "/Users/Admin/Anaconda3/Lib/site-packages/sparknlp/lib/jar_files.jar")
.getOrCreate()
print("Spark NLP version", sparknlp.version())
print("Apache Spark version:", spark.version)
document = DocumentAssembler().setInputCol("description").setOutputCol("document")

**Executed the above**
**Throws the following error**
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~AppDataLocalTemp/ipykernel_6864/2064948474.py in <module>
----> 1 **document = DocumentAssembler().setInputCol("description").setOutputCol("document")**
~Anaconda3libsite-packagespyspark__init__.py in wrapper(self, *args, **kwargs)
133             raise TypeError("Method %s forces keyword arguments." % func.__name__)
134         self._input_kwargs = kwargs
--> 135         return func(self, **kwargs)
136 
137     return cast(F, wrapper)
~Anaconda3libsite-packagessparknlpbasedocument_assembler.py in __init__(self)
90     @keyword_only
91     def __init__(self):
---> 92         super(DocumentAssembler, self).__init__(classname="com.johnsnowlabs.nlp.DocumentAssembler")
93         self._setDefault(outputCol="document", cleanupMode='disabled')
94 
~Anaconda3libsite-packagespyspark__init__.py in wrapper(self, *args, **kwargs)
133             raise TypeError("Method %s forces keyword arguments." % func.__name__)
134         self._input_kwargs = kwargs
--> 135         return func(self, **kwargs)
136 
137     return cast(F, wrapper)
~Anaconda3libsite-packagessparknlpinternalannotator_transformer.py in __init__(self, classname)
31         self.setParams(**kwargs)
32         self.__class__._java_class_name = classname
---> 33         self._java_obj = self._new_java_obj(classname, self.uid)
34 
~Anaconda3libsite-packagespysparkmlwrapper.py in _new_java_obj(java_class, *args)
84             java_obj = getattr(java_obj, name)
85         java_args = [_py2java(sc, arg) for arg in args]
---> 86         return java_obj(*java_args)
87 
88     @staticmethod
**TypeError: 'JavaPackage' object is not callable**

"

这些是jar文件中的内容https://i.stack.imgur.com/GlIgD.png

您可以向spark会话提供您的maven坐标,而不是下载然后引用jar。

SPARK_JARS = ["com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.26.0",
"com.johnsnowlabs.nlp:spark-nlp_2.12:4.1.0"]
def get_spark(master="local[*]", name="Colab"):
builder = SparkSession.builder.appName(name)
builder.config('spark.ui.port', '4050')
builder.config('spark.jars.packages', ",".join(SPARK_JARS))
builder.config("spark.driver.memory", "16G")
builder.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
builder.config("spark.kryoserializer.buffer.max", "2000M")
builder.config("spark.driver.maxResultSize", "0")
return builder.getOrCreate()

创建spark会话:

spark = get_spark()

BTW,下面有上述引用的jar:https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp_2.12/4.1.0

最新更新