在spaCy 3.0中加载管道时出错



更新到spaCy 3.0.6后,我无法在任何一个经过训练的管道中加载,尽管两者似乎都安装正确:

================= Installed pipeline packages (spaCy v3.0.6) =================
ℹ spaCy installation:
/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/spacy
NAME              SPACY            VERSION                            
en_core_web_sm    >=3.0.0,<3.1.0   3.0.0   ✔
en_core_web_trf   >=3.0.0,<3.1.0   3.0.0   ✔

当使用spacy.load((并将管道作为模块导入时会发生这种情况(以下所有行的错误都相同(:

nlp = spacy.load("en_core_web_trf")
nlp = spacy.load("en_core_web_sm")
import en_core_web_sm
nlp = en_core_web_sm.load()
import en_core_web_trf
nlp = en_core_web_trf.load()

我得到的错误如下:

---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-9-b38eb3aae320> in <module>
1 import en_core_web_trf
----> 2 nlp = en_core_web_trf.load()
~/anaconda3/envs/ml/lib/python3.8/site-packages/en_core_web_trf/__init__.py in load(**overrides)
8 
9 def load(**overrides):
---> 10     return load_model_from_init_py(__file__, **overrides)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_init_py(init_file, vocab, disable, exclude, config)
514     if not model_path.exists():
515         raise IOError(Errors.E052.format(path=data_path))
--> 516     return load_model_from_path(
517         data_path,
518         vocab=vocab,
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_path(model_path, meta, vocab, disable, exclude, config)
389     config_path = model_path / "config.cfg"
390     config = load_config(config_path, overrides=dict_to_dot(config))
--> 391     nlp = load_model_from_config(config, vocab=vocab, disable=disable, exclude=exclude)
392     return nlp.from_disk(model_path, exclude=exclude)
393 
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/util.py in load_model_from_config(config, vocab, disable, exclude, auto_fill, validate)
426     # registry, including custom subclasses provided via entry points
427     lang_cls = get_lang_class(nlp_config["lang"])
--> 428     nlp = lang_cls.from_config(
429         config,
430         vocab=vocab,
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in from_config(cls, config, vocab, disable, exclude, meta, auto_fill, validate)
1637         # then we would load them twice at runtime: once when we make from config,
1638         # and then again when we load from disk.
-> 1639         nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer, meta=meta)
1640         if after_creation is not None:
1641             nlp = after_creation(nlp)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy/language.py in __init__(self, vocab, max_length, meta, create_tokenizer, batch_size, **kwargs)
148         # points. The factory decorator applied to these functions takes care
149         # of the rest.
--> 150         util.registry._entry_point_factories.get_all()
151 
152         self._config = DEFAULT_CONFIG.merge(self.default_config)
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_all(self)
106         result = {}
107         if self.entry_points:
--> 108             result.update(self.get_entry_points())
109         for keys, value in REGISTRY.items():
110             if len(self.namespace) == len(keys) - 1 and all(
~/anaconda3/envs/ml/lib/python3.8/site-packages/catalogue/__init__.py in get_entry_points(self)
121         result = {}
122         for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):
--> 123             result[entry_point.name] = entry_point.load()
124         return result
125 
~/anaconda3/envs/ml/lib/python3.8/importlib/metadata.py in load(self)
75         """
76         match = self.pattern.match(self.value)
---> 77         module = import_module(match.group('module'))
78         attrs = filter(None, (match.group('attr') or '').split('.'))
79         return functools.reduce(getattr, attrs, module)
~/anaconda3/envs/ml/lib/python3.8/importlib/__init__.py in import_module(name, package)
125                 break
126             level += 1
--> 127     return _bootstrap._gcd_import(name[level:], package, level)
128 
129 
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name, package, level)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _gcd_import(name, package, level)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _load_unlocked(spec)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap_external.py in exec_module(self, module)
~/anaconda3/envs/ml/lib/python3.8/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/__init__.py in <module>
----> 1 from . import architectures
2 from . import annotation_setters
3 from . import span_getters
4 from .layers import TransformerModel
5 from .pipeline_component import Transformer, install_extensions
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/architectures.py in <module>
3 from thinc.types import Ragged, Floats2d
4 from spacy.tokens import Doc
----> 5 from .layers import TransformerModel, TransformerListener
6 from .layers import trfs2arrays, split_trf_batch
7 from .util import registry
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/__init__.py in <module>
----> 1 from .listener import TransformerListener
2 from .transformer_model import TransformerModel
3 from .split_trf import split_trf_batch
4 from .trfs2arrays import trfs2arrays
5 
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/layers/listener.py in <module>
2 from thinc.api import Model
3 from spacy.tokens import Doc
----> 4 from ..data_classes import TransformerData
5 
6 
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/data_classes.py in <module>
9 import srsly
10 
---> 11 from .util import transpose_list
12 from .align import get_token_positions
13 
~/anaconda3/envs/ml/lib/python3.8/site-packages/spacy_transformers/util.py in <module>
2 from pathlib import Path
3 import random
----> 4 from transformers import AutoModel, AutoTokenizer
5 from transformers.tokenization_utils import BatchEncoding
6 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/__init__.py in <module>
624 
625     # Trainer
--> 626     from .trainer import Trainer
627     from .trainer_pt_utils import torch_distributed_zero_first
628 else:
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer.py in <module>
67     TrainerState,
68 )
---> 69 from .trainer_pt_utils import (
70     DistributedTensorGatherer,
71     SequentialDistributedSampler,
~/anaconda3/envs/ml/lib/python3.8/site-packages/transformers/trainer_pt_utils.py in <module>
38     SAVE_STATE_WARNING = ""
39 else:
---> 40     from torch.optim.lr_scheduler import SAVE_STATE_WARNING
41 
42 logger = logging.get_logger(__name__)
ImportError: cannot import name 'SAVE_STATE_WARNING' from 'torch.optim.lr_scheduler' (/Users/baconbaker/anaconda3/envs/ml/lib/python3.8/site-packages/torch/optim/lr_scheduler.py)

从当前的稳定版本1.8.1恢复到torch 1.4.0解决了这个问题,但我不想这么做。

有其他解决方案吗?

在较新版本的transformers中似乎已修复此问题(https://github.com/huggingface/transformers/pull/8979)。尝试同时升级transformersspacy-transformers

最新更新