import re, os
from multiprocessing import Pool
directory = r'F:data\' #['a.txt', 'b.txt', 'c.txt', 'd.txt', 'e.txt']
raw_files = os.listdir(directory)
# TARGET FUNCTION
def print_result(raw_files):
for raw_file in raw_files:
with open(directory+raw_file, 'r', encoding = 'utf-16') as f: #FileNotFoundError: [Errno 2] No such file or directory: 'F:\corpus_duplicated\\2'
raw = f.read()
if re.search('target', raw):
print(raw)
if __name__ == '__main__':
print(raw_files[:3]) #['a.txt', 'b.txt', 'c.txt']
pool = Pool(processes = 4)
pool.map(print_result, raw_files)
pool.close()
pool.join()
我想把directory+raw_file
变成F:dataa.txt
但是它导致了可以在错误消息中看到的CCD_ 3。
我想我还不了解多处理,但通过搜索我不知道为什么。
谢谢你的帮助。(获取的代码减少一个。(
应该更改函数print_result
,使其只处理一个文件。您将带有文件的列表交给map
,它将对列表中的项目进行分组,并在单独的过程中将它们交给print_result
import re, os
from multiprocessing import Pool
directory = 'F:/data/'
raw_files = os.listdir(directory)
# TARGET FUNCTION
# input to this function should be a single file
def print_result(raw_file):
with open(directory+raw_file, 'r', encoding = 'utf-16') as f:
raw = f.read()
if re.search('target', raw):
print(raw)
if __name__ == '__main__':
print(raw_files[:3]) #['a.txt', 'b.txt', 'c.txt']
pool = Pool(processes = 4)
pool.map(print_result, raw_files)
pool.close()
pool.join()
您可能需要将directory
的前斜杠改为后斜杠