从张量流数据集"Error while extracting"



我想在COCO上训练一个张量流图像分割模型,并想利用已经包含的数据集构建器。下载似乎已完成,但在提取zip文件时崩溃。

在 conda 环境下在 Jupyter 笔记本上使用 TF 2.0.0 运行。计算机是 64 位 Windows 10。官方图像分割教程中使用的牛津宠物III数据集工作正常。

下面是错误消息(我的本地用户名替换为%user%(。

---------------------------------------------------------------------------
OutOfRangeError                           Traceback (most recent call last)
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredownloadextractor.py in _sync_extract(self, from_path, method, to_path)
88     try:
---> 89       for path, handle in iter_archive(from_path, method):
90         path = tf.compat.as_text(path)
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredownloadextractor.py in iter_zip(arch_f)
176   with _open_or_pass(arch_f) as fobj:
--> 177     z = zipfile.ZipFile(fobj)
178     for member in z.infolist():
~.condaenvstf-tutoriallibzipfile.py in __init__(self, file, mode, compression, allowZip64)
1130             if mode == 'r':
-> 1131                 self._RealGetContents()
1132             elif mode in ('w', 'x'):
~.condaenvstf-tutoriallibzipfile.py in _RealGetContents(self)
1193         try:
-> 1194             endrec = _EndRecData(fp)
1195         except OSError:
~.condaenvstf-tutoriallibzipfile.py in _EndRecData(fpin)
263     # Determine file size
--> 264     fpin.seek(0, 2)
265     filesize = fpin.tell()
~.condaenvstf-tutoriallibsite-packagestensorflow_corepythonutildeprecation.py in new_func(*args, **kwargs)
506                 instructions)
--> 507       return func(*args, **kwargs)
508 
~.condaenvstf-tutoriallibsite-packagestensorflow_corepythonlibiofile_io.py in seek(self, offset, whence, position)
166       elif whence == 2:
--> 167         offset += self.size()
168       else:
~.condaenvstf-tutoriallibsite-packagestensorflow_corepythonlibiofile_io.py in size(self)
101     """Returns the size of the file."""
--> 102     return stat(self.__name).length
103 
~.condaenvstf-tutoriallibsite-packagestensorflow_corepythonlibiofile_io.py in stat(filename)
726   """
--> 727   return stat_v2(filename)
728 
~.condaenvstf-tutoriallibsite-packagestensorflow_corepythonlibiofile_io.py in stat_v2(path)
743   file_statistics = pywrap_tensorflow.FileStatistics()
--> 744   pywrap_tensorflow.Stat(compat.as_bytes(path), file_statistics)
745   return file_statistics
OutOfRangeError: C:Users%user%tensorflow_datasetsdownloadsimages.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error
During handling of the above exception, another exception occurred:
ExtractError                              Traceback (most recent call last)
<ipython-input-27-887fa0198611> in <module>
1 cocoBuilder = tfds.builder('coco')
2 info = cocoBuilder.info
----> 3 cocoBuilder.download_and_prepare()
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoreapi_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
50     _check_no_positional(fn, args, ismethod, allowed=allowed)
51     _check_required(fn, kwargs)
---> 52     return fn(*args, **kwargs)
53 
54   return disallow_positional_args_dec(wrapped)  # pylint: disable=no-value-for-parameter
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredataset_builder.py in download_and_prepare(self, download_dir, download_config)
285         self._download_and_prepare(
286             dl_manager=dl_manager,
--> 287             download_config=download_config)
288 
289         # NOTE: If modifying the lines below to put additional information in
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredataset_builder.py in _download_and_prepare(self, dl_manager, download_config)
946     super(GeneratorBasedBuilder, self)._download_and_prepare(
947         dl_manager=dl_manager,
--> 948         max_examples_per_split=download_config.max_examples_per_split,
949     )
950 
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredataset_builder.py in _download_and_prepare(self, dl_manager, **prepare_split_kwargs)
802     # Generating data for all splits
803     split_dict = splits_lib.SplitDict()
--> 804     for split_generator in self._split_generators(dl_manager):
805       if splits_lib.Split.ALL == split_generator.split_info.name:
806         raise ValueError(
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetsimagecoco.py in _split_generators(self, dl_manager)
237     root_url = 'http://images.cocodataset.org/'
238     extracted_paths = dl_manager.download_and_extract({
--> 239         key: root_url + url for key, url in urls.items()
240     })
241 
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredownloaddownload_manager.py in download_and_extract(self, url_or_urls)
357     with self._downloader.tqdm():
358       with self._extractor.tqdm():
--> 359         return _map_promise(self._download_extract, url_or_urls)
360 
361   @property
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredownloaddownload_manager.py in _map_promise(map_fn, all_inputs)
393   """Map the function into each element and resolve the promise."""
394   all_promises = utils.map_nested(map_fn, all_inputs)  # Apply the function
--> 395   res = utils.map_nested(_wait_on_promise, all_promises)
396   return res
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoreutilspy_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
127     return {
128         k: map_nested(function, v, dict_only, map_tuple)
--> 129         for k, v in data_struct.items()
130     }
131   elif not dict_only:
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoreutilspy_utils.py in <dictcomp>(.0)
127     return {
128         k: map_nested(function, v, dict_only, map_tuple)
--> 129         for k, v in data_struct.items()
130     }
131   elif not dict_only:
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoreutilspy_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
141         return tuple(mapped)
142   # Singleton
--> 143   return function(data_struct)
144 
145 
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredownloaddownload_manager.py in _wait_on_promise(p)
377 
378   def _wait_on_promise(p):
--> 379     return p.get()
380 
381 else:
~.condaenvstf-tutoriallibsite-packagespromisepromise.py in get(self, timeout)
508         target = self._target()
509         self._wait(timeout or DEFAULT_TIMEOUT)
--> 510         return self._target_settled_value(_raise=True)
511 
512     def _target_settled_value(self, _raise=False):
~.condaenvstf-tutoriallibsite-packagespromisepromise.py in _target_settled_value(self, _raise)
512     def _target_settled_value(self, _raise=False):
513         # type: (bool) -> Any
--> 514         return self._target()._settled_value(_raise)
515 
516     _value = _reason = _target_settled_value
~.condaenvstf-tutoriallibsite-packagespromisepromise.py in _settled_value(self, _raise)
222             if _raise:
223                 raise_val = self._fulfillment_handler0
--> 224                 reraise(type(raise_val), raise_val, self._traceback)
225             return self._fulfillment_handler0
226 
~.condaenvstf-tutoriallibsite-packagessix.py in reraise(tp, value, tb)
694             if value.__traceback__ is not tb:
695                 raise value.with_traceback(tb)
--> 696             raise value
697         finally:
698             value = None
~.condaenvstf-tutoriallibsite-packagespromisepromise.py in handle_future_result(future)
840         # type: (Any) -> None
841         try:
--> 842             resolve(future.result())
843         except Exception as e:
844             tb = exc_info()[2]
~.condaenvstf-tutoriallibconcurrentfutures_base.py in result(self, timeout)
423                 raise CancelledError()
424             elif self._state == FINISHED:
--> 425                 return self.__get_result()
426 
427             self._condition.wait(timeout)
~.condaenvstf-tutoriallibconcurrentfutures_base.py in __get_result(self)
382     def __get_result(self):
383         if self._exception:
--> 384             raise self._exception
385         else:
386             return self._result
~.condaenvstf-tutoriallibconcurrentfuturesthread.py in run(self)
54 
55         try:
---> 56             result = self.fn(*self.args, **self.kwargs)
57         except BaseException as exc:
58             self.future.set_exception(exc)
~.condaenvstf-tutoriallibsite-packagestensorflow_datasetscoredownloadextractor.py in _sync_extract(self, from_path, method, to_path)
92     except BaseException as err:
93       msg = 'Error while extracting %s to %s : %s' % (from_path, to_path, err)
---> 94       raise ExtractError(msg)
95     # `tf.io.gfile.Rename(overwrite=True)` doesn't work for non empty
96     # directories, so delete destination first, if it already exists.
ExtractError: Error while extracting C:Users%user%tensorflow_datasetsdownloadsimages.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip to C:Users%user%tensorflow_datasetsdownloadsextractedZIP.images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip : C:Users%user%tensorflow_datasetsdownloadsimages.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error

这个消息对我来说似乎很神秘。当笔记本启动时,它试图提取到的文件夹不存在 - 它是由 Tensorflow 创建的,并且仅在该命令行创建。我显然尝试完全删除它并再次运行它,但没有效果。

导致错误的代码是(直到最后一行一切正常(:

import tensorflow as tf
from __future__ import absolute_import, division, print_function, unicode_literals
from tensorflow_examples.models.pix2pix import pix2pix
import tensorflow_datasets as tfds
from IPython.display import clear_output
import matplotlib.pyplot as plt
dataset, info = tfds.load('coco', with_info=True)

还尝试将最后一个命令分解为分配 tdfs.builder 对象,然后运行download_and_extract,并再次收到相同的错误。

磁盘中有足够的空间 - 下载后,仍有 50+GB 可用空间,而数据集在其最大版本(2014 年(中应该是 37GB。

我在Windows 10和COCO 2017上也有类似的问题。我的解决方案很简单。根据错误消息中的文件夹路径手动提取 ZIP 文件。

相关内容

最新更新