并行运行函数并使用 python 将返回保存在列表中

我有一个函数，可以从特定页面抓取href链接并返回结果。我想以并行方式调用此函数以节省时间。我访问过这个问题在python中并行运行多个文件的相同函数但挑战在于我需要将返回元素保存在列表中。我该怎么做？这是我的代码片段。

url = "https://www.programmableweb.com/category/all/apis"
response = requests.get(url)
data = response.text
soup = BeautifulSoup(data,'html.parser')
#function to scrape individual pages
def scrap_api_url(i):
print(i)
page_url = "https://www.programmableweb.com" + mid_url + '=' + str(i)
response = requests.get(page_url)
data = response.text
soup = BeautifulSoup(data, 'html.parser')
all_api = soup.find_all('tr', class_ = re.compile('^(even|odd)$'))
return all_api
url_tag = soup.find('a',{'title' : 'Go to next page'})
mid_url = url_tag.get('href').split('=')[0]
threads=[]
#calling functions
if __name__ == '__main__':
inputs = [i for i in range(851)]
for item in inputs:
print('Thread Started :: ', item)
t = threading.Thread(target = scrap_api_url, args=(item,))
threads.append(t)
t.start()
h = []        
for t in threads:
h.append(t.join())

您可以使用ThreadPoolExecutormap方法：

import re
from concurrent.futures import ThreadPoolExecutor
import requests
from bs4 import BeautifulSoup

def main():
url = "https://www.programmableweb.com/category/all/apis"
response = requests.get(url)
data = response.text
soup = BeautifulSoup(data,'html.parser')
url_tag = soup.find('a',{'title' : 'Go to next page'})
mid_url = url_tag.get('href').split('=')[0]
# function to scrape individual pages
def scrap_api_url(i):
print(i)
page_url = "https://www.programmableweb.com" + mid_url + '=' + str(i)
response = requests.get(page_url)
data = response.text
soup = BeautifulSoup(data, 'html.parser')
all_api = soup.find_all('tr', class_=re.compile('^(even|odd)$'))
return all_api
inputs = [i for i in range(851)]
with ThreadPoolExecutor() as executor:
future_results = executor.map(scrap_api_url, inputs)
results = [result for result in future_results]
print(results)
#calling functions
if __name__ == '__main__':
main()

相关内容

最新更新

热门标签：