returns=[]
for x in range(40,80):
url = f'https://www.mutualfundindia.com/MF/Performance/Details?id={x}'
r = requests.get(url)
tree = html.fromstring(r.content)
inception = tree.xpath('//*[@id="collPerformanceAnalysis"]/div/div[3]/div[7]')
for i in inception:
if i.text!=' ':
returns.append(str.strip(i.text))
目前40个结果需要60秒。我在网上看到我可以用多处理使它更快。我看了很多视频,但我不能让它工作。请帮助
下面是使用multiprocessing.Pool
的解决方案。您可以进一步调优cpu_count
参数以找到最佳点。现在,它将创建与机器中可用CPU内核数量一样多的进程。
import multiprocessing
# Other imports here...
returns = []
def callback(result):
returns.extend(result)
def f(x):
url = f'https://www.mutualfundindia.com/MF/Performance/Details?id={x}'
r = requests.get(url)
tree = html.fromstring(r.content)
inception = tree.xpath('//*[@id="collPerformanceAnalysis"]/div/div[3]/div[7]')
result = []
for i in inception:
if i.text != ' ':
result.append(str.strip(i.text))
return result
if __name__ == "__main__":
pool = multiprocessing.Pool(multiprocessing.cpu_count())
for i in range(40, 80):
pool.apply_async(f, args=(i,), callback=callback)
pool.close()
pool.join()
print(returns)