spark nlp jar,我从https://jar-download.com/artifacts/com.johnsnowlabs.nlp/spark-nlp-m1_2.12/4.0.1/source-code得到的
JAVA_HOME = C:Program FilesJava jdk-18.0.1.1在系统变量和用户管理变量中。
"
import pyspark
from pyspark import SparkContext
from pyspark.sql import SparkSession
import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
sc = SparkContext(master='local[2]')
spark = SparkSession.builder.appName('test')
.config("spark.jars", "/Users/Admin/Anaconda3/Lib/site-packages/sparknlp/lib/jar_files.jar")
.config("spark.driver.extraClassPath", "/Users/Admin/Anaconda3/Lib/site-packages/sparknlp/lib/jar_files.jar")
.config("spark.executor.extraClassPath", "/Users/Admin/Anaconda3/Lib/site-packages/sparknlp/lib/jar_files.jar")
.getOrCreate()
print("Spark NLP version", sparknlp.version())
print("Apache Spark version:", spark.version)
document = DocumentAssembler().setInputCol("description").setOutputCol("document")
**Executed the above**
**Throws the following error**
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~AppDataLocalTemp/ipykernel_6864/2064948474.py in <module>
----> 1 **document = DocumentAssembler().setInputCol("description").setOutputCol("document")**
~Anaconda3libsite-packagespyspark__init__.py in wrapper(self, *args, **kwargs)
133 raise TypeError("Method %s forces keyword arguments." % func.__name__)
134 self._input_kwargs = kwargs
--> 135 return func(self, **kwargs)
136
137 return cast(F, wrapper)
~Anaconda3libsite-packagessparknlpbasedocument_assembler.py in __init__(self)
90 @keyword_only
91 def __init__(self):
---> 92 super(DocumentAssembler, self).__init__(classname="com.johnsnowlabs.nlp.DocumentAssembler")
93 self._setDefault(outputCol="document", cleanupMode='disabled')
94
~Anaconda3libsite-packagespyspark__init__.py in wrapper(self, *args, **kwargs)
133 raise TypeError("Method %s forces keyword arguments." % func.__name__)
134 self._input_kwargs = kwargs
--> 135 return func(self, **kwargs)
136
137 return cast(F, wrapper)
~Anaconda3libsite-packagessparknlpinternalannotator_transformer.py in __init__(self, classname)
31 self.setParams(**kwargs)
32 self.__class__._java_class_name = classname
---> 33 self._java_obj = self._new_java_obj(classname, self.uid)
34
~Anaconda3libsite-packagespysparkmlwrapper.py in _new_java_obj(java_class, *args)
84 java_obj = getattr(java_obj, name)
85 java_args = [_py2java(sc, arg) for arg in args]
---> 86 return java_obj(*java_args)
87
88 @staticmethod
**TypeError: 'JavaPackage' object is not callable**
"
这些是jar文件中的内容https://i.stack.imgur.com/GlIgD.png
您可以向spark会话提供您的maven坐标,而不是下载然后引用jar。
SPARK_JARS = ["com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.26.0",
"com.johnsnowlabs.nlp:spark-nlp_2.12:4.1.0"]
def get_spark(master="local[*]", name="Colab"):
builder = SparkSession.builder.appName(name)
builder.config('spark.ui.port', '4050')
builder.config('spark.jars.packages', ",".join(SPARK_JARS))
builder.config("spark.driver.memory", "16G")
builder.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
builder.config("spark.kryoserializer.buffer.max", "2000M")
builder.config("spark.driver.maxResultSize", "0")
return builder.getOrCreate()
创建spark会话:
spark = get_spark()
BTW,下面有上述引用的jar:https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp_2.12/4.1.0