我保存了一个gzip镶木地板文件:
df.to_parquet("meth_450_clin_all_kipan.parquet.gz", compression="gzip")
然后我想把它加载为矩阵:
matrix = pd.read_table('../input/meth-clin-kipan/meth_450_clin_all_kipan.parquet.gz')
追溯:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
/tmp/ipykernel_18/3894087199.py in <module>
----> 4 matrix = pd.read_table('../input/meth-clin-kipan/meth_450_clin_all_kipan.parquet.gz')
/opt/conda/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
/opt/conda/lib/python3.7/site-packages/pandas/io/parsers/readers.py in read_table(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, encoding_errors, delim_whitespace, low_memory, memory_map, float_precision)
681 kwds.update(kwds_defaults)
682
--> 683 return _read(filepath_or_buffer, kwds)
684
685
/opt/conda/lib/python3.7/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
480
481 # Create the parser.
--> 482 parser = TextFileReader(filepath_or_buffer, **kwds)
483
484 if chunksize or iterator:
/opt/conda/lib/python3.7/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
809 self.options["has_index_names"] = kwds["has_index_names"]
810
--> 811 self._engine = self._make_engine(self.engine)
812
813 def close(self):
/opt/conda/lib/python3.7/site-packages/pandas/io/parsers/readers.py in _make_engine(self, engine)
1038 )
1039 # error: Too many arguments for "ParserBase"
-> 1040 return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
1041
1042 def _failover_to_python(self):
/opt/conda/lib/python3.7/site-packages/pandas/io/parsers/c_parser_wrapper.py in __init__(self, src, **kwds)
67 kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
68 try:
---> 69 self._reader = parsers.TextReader(self.handles.handle, **kwds)
70 except Exception:
71 self.handles.close()
/opt/conda/lib/python3.7/site-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
/opt/conda/lib/python3.7/site-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._get_header()
/opt/conda/lib/python3.7/site-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()
/opt/conda/lib/python3.7/site-packages/pandas/_libs/parsers.pyx in pandas._libs.parsers.raise_parser_error()
/opt/conda/lib/python3.7/_compression.py in readinto(self, b)
66 def readinto(self, b):
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
70 return len(data)
/opt/conda/lib/python3.7/gzip.py in read(self, size)
472 # jump to the next member, if there is one.
473 self._init_read()
--> 474 if not self._read_gzip_header():
475 self._size = self._pos
476 return b""
/opt/conda/lib/python3.7/gzip.py in _read_gzip_header(self)
420
421 if magic != b' 37213':
--> 422 raise OSError('Not a gzipped file (%r)' % magic)
423
424 (method, flag,
OSError: Not a gzipped file (b'PA')
解决方案是使用read_parquet
读取文件,然后将其转换为numpy数组。
matrix = pd.read_parquet('../input/meth-clin-kipan/meth_450_clin_all_kipan.parquet.gz').to_numpy()