from num2words import num2words
import re
from googletrans import Translator
import time, os, glob
import concurrent.futures
start_time = time.time()
translator = Translator()
src_dir="/home/lol/patrika1"
dest_file="/home/lol/df.txt"
counter=1
def n2w(match):
return translator.translate(num2words(int(match.group(1))),dest='hi').text
def clean_file(file_name):
global counter
fil = open(file_name,'r')
lines = fil.read()
fil.close()
# more logic
result=re.sub(r'[n]+','n',result2)
counter+=1
print(counter)
print(file_name)
cleaned.write(result)
print("--- %s seconds ---" % (time.time() - start_time))
if __name__ == '__main__':
global cleaned
os.chdir(src_dir)
file_list=glob.glob("*.txt")
cleaned=open(dest_file,'a')
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(clean_file,file_list[:10])
print("finish "+ str(counter))
cleaned.close()
计数器打印时在主函数中具有值1。如何维持该函数已处理了多少个文件的计数?
通常不建议全局变量使用(它计算什么?谁修改了它?以及2个合并的脚本在2个不同的情况下使用相同的"计数"(,您可以使用该类型的构造:
class FileCleaner:
Counter = 0
@classmethod
def clean(cls, file_name):
...
cls.Counter +=1
...
然后访问filecleaner.counter。
也许在使用ProcessPoolExecutor模型之前先尝试清洁代码,因为它不容易读取代码(希望Subtrestreters会尽快帮助您对您进行帮助(,您需要拆分文件列表,请在try/def/deve中拨打executor,添加 1从文件清洁池类中的所有这些都取得成功。不是从主。
import concurrent.futures
import threading
import math
PRIMES = [
112272535095293,
112582705942171,
112272535095293,
115280095190773,
115797848077099,
1099726899285419]
class PrimePoolTester:
Counter = 0
@classmethod
def is_prime(cls,n):
if n % 2 == 0:
return False
sqrt_n = int(math.floor(math.sqrt(n)))
for i in range(3, sqrt_n + 1, 2):
if n % i == 0:
return False
return True
@classmethod
def execute(cls,primes):
with concurrent.futures.ProcessPoolExecutor() as executor:
for number, prime in zip(primes, executor.map(cls.is_prime, primes)):
cls.Counter += 1
print('(%s)-%d : %d is prime: %s' % (threading.current_thread().name, cls.Counter, number, prime))
class Runner_interpreter:
def __init__(self, thread_count, worker):
self.thr = []
for _ in range(thread_count):
t = threading.Thread(target = worker)
t.daemon = True
t.start()
self.thr.append( t )
def join(self):
for th in self.thr:
th.join()
if __name__ == '__main__':
def job():
global worklist
PrimePoolTester.execute( worklist.pop(0) )
worklist = [ PRIMES ] * 4
#use 4 "core"
Runner_interpreter(4,job).join()