写入 CSV:"UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d"



我正在尝试将一个大的csv文件拆分为多个文件,我为此使用了这个代码片段。 我使用的是Python 3.7.7,使用的是Windows操作系统。我尝试添加utf8编码,但仍然不起作用。你知道为什么吗?

这是我的代码:

import os
def split(filehandler, delimiter=',', row_limit=125000, output_name_template='jokes_%s.csv', output_path='.', keep_headers=True):
"""
Splits a CSV file into multiple pieces.
A quick bastardization of the Python CSV library.
Arguments:
`row_limit`: The number of rows you want in each output file. 10,000 by default.
`output_name_template`: A %s-style template for the numbered output files.
`output_path`: Where to stick the output files.
`keep_headers`: Whether or not to print the headers in each output file.
Example usage:
>> from toolbox import csv_splitter;
>> csv_splitter.split(open('/home/ben/input.csv', 'r'));
"""
import csv
reader = csv.reader(filehandler,  delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template  % current_piece
)
print(current_out_path)
current_out_writer = csv.writer(open(current_out_path, 'w', encoding='utf8', newline=''), delimiter=delimiter)
current_limit = row_limit
if keep_headers:
headers = next(reader)
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template  % current_piece
)
print(current_out_path)
current_out_writer = csv.writer(open(current_out_path, 'w', encoding='utf8', newline=''), delimiter=delimiter)
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
split(open('jokes.csv', 'r'))

这是错误消息:

File "csv_cutter.py", line 47, in <module>
split(open('jokes.csv', 'r'))
File "csv_cutter.py", line 33, in split
for i, row in enumerate(reader):
File "C:Program FilesPython37libencodingscp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 6409: character maps to <undefined>

您可以将split(open('jokes.csv', 'r'))更改为split(open('jokes.csv', 'r', encoding="utf8"))并尝试一下。

相关内容

最新更新