在Google collaboration上,我尝试了所有3种运行时:CPU, GPU, TPU。都给出相同的错误。
细胞:
# NB: Only run in TPU environment
!pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl
!pip -q install pytorch-lightning==1.2.7 transformers torchmetrics awscli mlflow boto3 pycm
import os
import sys
import logging
from pytorch_lightning import LightningDataModule
错误:
ImportError Traceback (most recent call last)
<ipython-input-6-09509a67016b> in <module>()
3 import logging
4
----> 5 from pytorch_lightning import LightningDataModule
6 from torch.utils.data import DataLoader, Dataset
7 from transformers import AutoTokenizer
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/__init__.py in <module>()
26 _PROJECT_ROOT = os.path.dirname(_PACKAGE_ROOT)
27
---> 28 from pytorch_lightning import metrics # noqa: E402
29 from pytorch_lightning.callbacks import Callback # noqa: E402
30 from pytorch_lightning.core import LightningDataModule, LightningModule # noqa: E402
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/metrics/__init__.py in <module>()
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 14 from pytorch_lightning.metrics.classification import ( # noqa: F401
15 Accuracy,
16 AUC,
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/metrics/classification/__init__.py in <module>()
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 14 from pytorch_lightning.metrics.classification.accuracy import Accuracy # noqa: F401
15 from pytorch_lightning.metrics.classification.auc import AUC # noqa: F401
16 from pytorch_lightning.metrics.classification.auroc import AUROC # noqa: F401
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/metrics/classification/accuracy.py in <module>()
16 import torch
17
---> 18 from pytorch_lightning.metrics.functional.accuracy import _accuracy_compute, _accuracy_update
19 from pytorch_lightning.metrics.metric import Metric
20
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/metrics/functional/__init__.py in <module>()
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 14 from pytorch_lightning.metrics.functional.accuracy import accuracy # noqa: F401
15 from pytorch_lightning.metrics.functional.auc import auc # noqa: F401
16 from pytorch_lightning.metrics.functional.auroc import auroc # noqa: F401
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/metrics/functional/accuracy.py in <module>()
16 import torch
17
---> 18 from pytorch_lightning.metrics.classification.helpers import _input_format_classification, DataType
19
20
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/metrics/classification/helpers.py in <module>()
17 import torch
18
---> 19 from pytorch_lightning.metrics.utils import select_topk, to_onehot
20 from pytorch_lightning.utilities import LightningEnum
21
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/metrics/utils.py in <module>()
16 import torch
17
---> 18 from pytorch_lightning.utilities import rank_zero_warn
19
20 METRIC_EPS = 1e-6
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/utilities/__init__.py in <module>()
46 )
47 from pytorch_lightning.utilities.parsing import AttributeDict, flatten_dict, is_picklable # noqa: F401
---> 48 from pytorch_lightning.utilities.xla_device import XLADeviceUtils # noqa: F401
49
50 _TPU_AVAILABLE = XLADeviceUtils.tpu_device_exists()
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/utilities/xla_device.py in <module>()
21
22 if _XLA_AVAILABLE:
---> 23 import torch_xla.core.xla_model as xm
24
25 #: define waiting time got checking TPU available in sec
/usr/local/lib/python3.7/dist-packages/torch_xla/__init__.py in <module>()
126 import torch
127 from ._patched_functions import _apply_patches
--> 128 import _XLAC
129
130
ImportError: /usr/local/lib/python3.7/dist-packages/_XLAC.cpython-37m-x86_64-linux-gnu.so: undefined symbol: _ZN2at11result_typeERKNS_6TensorEN3c106ScalarE
在线搜索;对于同一个问题,似乎有很多原因。
在我的例子中,在Google Colaboratory
中设置Accelerator
为None
解决了这个问题。
当torch和torch-xla版本不同时出现此错误。您可以使用pip list | grep torch
似乎谷歌Colab是混淆使用GPU,而cloud-tpu客户端安装。我卸载了这个包,然后它就正常工作了。
!pip uninstall cloud-tpu-client