用Python在Excel中保存字典



我需要你帮忙在Excel中保存数据。我已经解析了一些网站,我需要在Excel中输入字典。

from scrapingbee import ScrapingBeeClient
import requests
from bs4 import BeautifulSoup
import pandas as pd
SCRAPINGBEE_API_KEY = "bzzzz"
endpoint = "https://app.scrapingbee.com/api/v1"
pages = [
'https://www.businesslist.com.ng/category/restaurants/1/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/2/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/3/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/4/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/5/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/6/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/7/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/8/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/9/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/10/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/11/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/12/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/13/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/14/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/15/city:lagos'
]
rest = []
#GET_LINKS
for url in pages[:1]:
params = {
'api_key': SCRAPINGBEE_API_KEY,
'url': url}
response = requests.get(endpoint, params=params)
soup = BeautifulSoup(response.content, 'html.parser')
body = soup.find_all('h4')
for items in body:
item = items.find('a').get('href')
item_link = 'https://www.businesslist.com.ng' + item
rest.append(item_link)
#GET_REST
for url in rest[:2]:
params = {
'api_key': SCRAPINGBEE_API_KEY,
'url': url}
info = {}
response = requests.get(endpoint, params=params)
soup = BeautifulSoup(response.content, 'html.parser')
restaraunt_name = soup.find('b', {'id':'company_name'}).text
info.update({'Restaraunt':restaraunt_name})
location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
info.update({'Location':location})
phone = soup.find('div', {'class':'text phone'}).text[:11]
info.update({'Phone':phone})    
web = soup.find('div', {'class':'text weblinks'}).text
info.update({'web':web})
df = pd.DataFrame(info)
df.to_excel('./Lagos.xlsx') 

我从列表'rest'中获得解析链接,然后从该链接获取数据。然后我想保存每个项目从所有链接到字典'info'。然后保存到Excel文件。但是代码只保存一行,而不是全部。我错过了什么。

你在循环中保存df与相同的名称,它将只创建一个字典(意味着excel中的每个循环值)。所以你最好在循环外创建一个空的数据框,并在循环执行完成后将其保存到excel文件中。

修改后的代码将像

all_info = pd.DataFrame()
for url in rest[:2]:
params = {
'api_key': SCRAPINGBEE_API_KEY,
'url': url}
info = {}
response = requests.get(endpoint, params=params)
soup = BeautifulSoup(response.content, 'html.parser')
restaraunt_name = soup.find('b', {'id':'company_name'}).text
info.update({'Restaraunt':restaraunt_name})
location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
info.update({'Location':location})
phone = soup.find('div', {'class':'text phone'}).text[:11]
info.update({'Phone':phone})
web = soup.find('div', {'class':'text weblinks'}).text
info.update({'web':web})
if len(all_info) == 0:
all_info = pd.DataFrame(info, index=range(len(info)))
else:
all_info = all_info.append(pd.DataFrame(info))
all_info.to_excel('./Lagos.xlsx')

创建一个包含所有数据的列表,然后将其转换为数据框,然后输出到Excel文件。

from scrapingbee import ScrapingBeeClient
import requests
from bs4 import BeautifulSoup
import pandas as pd
SCRAPINGBEE_API_KEY = "zzzzzzzzz"
endpoint = "https://app.scrapingbee.com/api/v1"
pages = [
'https://www.businesslist.com.ng/category/restaurants/1/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/2/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/3/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/4/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/5/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/6/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/7/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/8/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/9/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/10/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/11/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/12/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/13/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/14/city:lagos',
'https://www.businesslist.com.ng/category/restaurants/15/city:lagos'
]
rest = []
#GET_LINKS
for url in pages[:1]:
params = {
'api_key': SCRAPINGBEE_API_KEY,
'url': url}
response = requests.get(endpoint, params=params)
soup = BeautifulSoup(response.content, 'html.parser')
body = soup.find_all('h4')
for items in body:
item = items.find('a').get('href')
item_link = 'https://www.businesslist.com.ng' + item
rest.append(item_link)
#GET_REST
data = []
for url in rest[:2]:
params = {
'api_key': SCRAPINGBEE_API_KEY,
'url': url}
info = {}
response = requests.get(endpoint, params=params)
soup = BeautifulSoup(response.content, 'html.parser')
restaraunt_name = soup.find('b', {'id':'company_name'}).text
info.update({'Restaraunt':restaraunt_name})
location = soup.find('div', {'class':'text location'}).text.split('View Map')[0]
info.update({'Location':location})
phone = soup.find('div', {'class':'text phone'}).text[:11]
info.update({'Phone':phone})    
web = soup.find('div', {'class':'text weblinks'}).text
info.update({'web':web})
data.append(info)
df = pd.DataFrame(data)
df.to_excel('./Lagos.xlsx')

最新更新