使用Spacy的电子邮件分类器,在尝试实现BOW时由于版本问题而引发以下错误



我正在尝试创建具有独占类和"弓形;架构,但由于版本问题,它抛出了以下错误,我的python版本是3.8,我的spacy版本是3.2.3,请有人帮助我解决这个

######## Main method ########
def main():
# Load dataset
data = pd.read_csv(data_path, sep='t')
observations = len(data.index)
# print("Dataset Size: {}".format(observations))
# Create an empty spacy model
nlp = spacy.blank("en")
# Create the TextCategorizer with exclusive classes and "bow" architecture
text_cat = nlp.create_pipe(
"textcat",
config={
"exclusive_classes": True,
"architecture": "bow"})
# Adding the TextCategorizer to the created empty model
nlp.add_pipe(text_cat)
# Add labels to text classifier
text_cat.add_label("ham")
text_cat.add_label("spam")
# Split data into train and test datasets
x_train, x_test, y_train, y_test = train_test_split(
data['text'], data['label'], test_size=0.33, random_state=7)
# Create the train and test data for the spacy model
train_lables = [{'cats': {'ham': label == 'ham',
'spam': label == 'spam'}}  for label in y_train]
test_lables = [{'cats': {'ham': label == 'ham',
'spam': label == 'spam'}}  for label in y_test]
# Spacy model data
train_data = list(zip(x_train, train_lables))
test_data = list(zip(x_test, test_lables))
# Model configurations
optimizer = nlp.begin_training()
batch_size = 5
epochs = 10
# Training the model
train_model(nlp, train_data, optimizer, batch_size, epochs)
# Sample predictions
# print(train_data[0])
# sample_test = nlp(train_data[0][0])
# print(sample_test.cats)
# Train and test accuracy
train_predictions = get_predictions(nlp, x_train)
test_predictions = get_predictions(nlp, x_test)
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)
print("Train accuracy: {}".format(train_accuracy))
print("Test accuracy: {}".format(test_accuracy))
# Creating the confusion matrix graphs
cf_train_matrix = confusion_matrix(y_train, train_predictions)
plt.figure(figsize=(10,8))
sns.heatmap(cf_train_matrix, annot=True, fmt='d')
cf_test_matrix = confusion_matrix(y_test, test_predictions)
plt.figure(figsize=(10,8))
sns.heatmap(cf_test_matrix, annot=True, fmt='d')

if __name__ == "__main__":
main()

以下是错误

---------------------------------------------------------------------------
ConfigValidationError                     Traceback (most recent call last)
<ipython-input-6-a77bb5692b25> in <module>
72 
73 if __name__ == "__main__":
---> 74     main()
<ipython-input-6-a77bb5692b25> in main()
12 
13     # Create the TextCategorizer with exclusive classes and "bow" architecture
---> 14     text_cat = nlp.add_pipe(
15                   "textcat",
16                   config={
~anaconda3libsite-packagesspacylanguage.py in add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
790                     lang_code=self.lang,
791                 )
--> 792             pipe_component = self.create_pipe(
793                 factory_name,
794                 name=name,
~anaconda3libsite-packagesspacylanguage.py in create_pipe(self, factory_name, name, config, raw_config, validate)
672         # We're calling the internal _fill here to avoid constructing the
673         # registered functions twice
--> 674         resolved = registry.resolve(cfg, validate=validate)
675         filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
676         filled = Config(filled)
~anaconda3libsite-packagesthincconfig.py in resolve(cls, config, schema, overrides, validate)
727         validate: bool = True,
728     ) -> Dict[str, Any]:
--> 729         resolved, _ = cls._make(
730             config, schema=schema, overrides=overrides, validate=validate, resolve=True
731         )
~anaconda3libsite-packagesthincconfig.py in _make(cls, config, schema, overrides, resolve, validate)
776         if not is_interpolated:
777             config = Config(orig_config).interpolate()
--> 778         filled, _, resolved = cls._fill(
779             config, schema, validate=validate, overrides=overrides, resolve=resolve
780         )
~anaconda3libsite-packagesthincconfig.py in _fill(cls, config, schema, validate, resolve, parent, overrides)
831                     schema.__fields__[key] = copy_model_field(field, Any)
832                 promise_schema = cls.make_promise_schema(value, resolve=resolve)
--> 833                 filled[key], validation[v_key], final[key] = cls._fill(
834                     value,
835                     promise_schema,
~anaconda3libsite-packagesthincconfig.py in _fill(cls, config, schema, validate, resolve, parent, overrides)
897                 result = schema.parse_obj(validation)
898             except ValidationError as e:
--> 899                 raise ConfigValidationError(
900                     config=config, errors=e.errors(), parent=parent
901                 ) from None
ConfigValidationError: 
Config validation error
textcat -> architecture        extra fields not permitted
textcat -> exclusive_classes   extra fields not permitted
{'nlp': <spacy.lang.en.English object at 0x000001B90CD4BF70>, 'name': 'textcat', 'architecture': 'bow', 'exclusive_classes': True, 'model': {'@architectures': 'spacy.TextCatEnsemble.v2', 'linear_model': {'@architectures': 'spacy.TextCatBOW.v2', 'exclusive_classes': True, 'ngram_size': 1, 'no_output_layer': False}, 'tok2vec': {'@architectures': 'spacy.Tok2Vec.v2', 'embed': {'@architectures': 'spacy.MultiHashEmbed.v2', 'width': 64, 'rows': [2000, 2000, 1000, 1000, 1000, 1000], 'attrs': ['ORTH', 'LOWER', 'PREFIX', 'SUFFIX', 'SHAPE', 'ID'], 'include_static_vectors': False}, 'encode': {'@architectures': 'spacy.MaxoutWindowEncoder.v2', 'width': 64, 'window_size': 1, 'maxout_pieces': 3, 'depth': 2}}}, 'scorer': {'@scorers': 'spacy.textcat_scorer.v1'}, 'threshold': 0.5, '@factories': 'textcat'}

我的Spacy版本

print(spacy.__version__)

3.2.3

我的Python版本

import sys
print(sys.version)

3.8.8(默认,2021年4月13日15:08:03([MSC v.1916 64位(AMD64(]

Tring降级Spacy版本

!conda install -c conda-forge spacy = 2.1.8
Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Solving environment: ...working... 
Building graph of deps:   0%|          | 0/5 [00:00<?, ?it/s]
Examining spacy=2.1.8:   0%|          | 0/5 [00:00<?, ?it/s] 
Examining python=3.8:  20%|##        | 1/5 [00:00<00:00,  4.80it/s]
Examining python=3.8:  40%|####      | 2/5 [00:00<00:00,  9.60it/s]
Examining @/win-64::__cuda==11.6=0:  40%|####      | 2/5 [00:01<00:00,  9.60it/s]
Examining @/win-64::__cuda==11.6=0:  60%|######    | 3/5 [00:01<00:01,  1.97it/s]
Examining @/win-64::__win==0=0:  60%|######    | 3/5 [00:01<00:01,  1.97it/s]    
Examining @/win-64::__archspec==1=x86_64:  80%|########  | 4/5 [00:01<00:00,  1.97it/s]
                 
Determining conflicts:   0%|          | 0/5 [00:00<?, ?it/s]
Examining conflict for spacy python:   0%|          | 0/5 [00:00<?, ?it/s]
    
UnsatisfiableError: The following specifications were found
to be incompatible with the existing python installation in your environment:
Specifications:
- spacy=2.1.8 -> python[version='>=3.6,<3.7.0a0|>=3.7,<3.8.0a0']
Your python: python=3.8
Found conflicts! Looking for incompatible packages.
This can take several minutes.  Press CTRL-C to abort.
failed

If python is on the left-most side of the chain, that's the version you've asked for.
When python appears to the right, that indicates that the thing on the left is somehow
not available for the python version you are constrained to. Note that conda will not
change your python version to a different minor version unless you explicitly specify
that.

请随时发表评论或提问。谢谢

从我理解错误消息的方式来看,它告诉您要安装的spacy版本(2.1.8(与您拥有的python版本(3.8.8(不兼容。它需要python 3.6或3.7。

因此,要么用Python 3.6或3.7创建一个环境(在conda中创建新环境时,很容易指定Python版本(,要么使用更高版本的spacy。如果你只使用最新版本的spacy,你是否已经尝试过代码是否有效?

你使用这个spacy版本的具体原因是什么?如果您正在使用一些不再受支持的方法,那么将代码更新为较新的spacy方法可能更有意义。尤其是如果你这样做是为了了解spacy,那么学习不再支持的方法会适得其反。遗憾的是,许多教程要么没有更新他们的代码,要么至少没有指定他们使用的版本,然后让他们的代码在线多年。

最新更新