从wunderground中抓取历史天气数据



我是数据抓取的新手,最近,我试图用python的selenium库从wunderground.com上抓取数据。但是,我发现有时候,selenium web driver无法成功打开网页,我认为这个问题可能与网站使用的JavaScript有一定的关系,但不确定是哪个部分出了问题。有人知道怎么解吗?提前谢谢。

下面是正确显示的示例:正确显示

的示例这里是有问题的:有问题的

示例我的代码在这里,这是一个非常简单的硒调用
import requests                                                                      
from bs4 import BeautifulSoup 
import pandas as pd
import numpy
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ChromeOptions
from selenium.webdriver import ActionChains
import time
# url for scraping
url = "https://www.wunderground.com/history/daily/us/ca/san-diego/KSAN/date/2021-2-1"
# define properties of selenium webdriver
option = webdriver.ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
option.add_experimental_option( "prefs",{'profile.managed_default_content_settings.javascript': 1}) #value 1 enables it , if you set to 2 it disables it  
option.add_argument('--disable-gpu')
option.add_argument("--disable-blink-features")
option.add_argument("--disable-blink-features=AutomationControlled")
option.add_argument("--enable-javascript")
driver = webdriver.Chrome(options=option)
driver.get(url)
time.sleep(5) # wait for webpage loading 

页面发送HTTP GET到:https://api.weather.com/v1/location/KSAN:9:US/observations/historical.json?apiKey=e1f10a1e78da46f5b10a1e78da96f525&units=e&startDate=20210201
这个调用的响应是一个巨大的JSON,其中包含您正在寻找的数据。(下面是一个子集)

{
"metadata": {
"language": "en-US",
"transaction_id": "1631220781880:2112944028",
"version": "1",
"location_id": "KSAN:9:US",
"units": "e",
"expire_time_gmt": 1631224381,
"status_code": 200
},
"observations": [
{
"key": "KSAN",
"class": "observation",
"expire_time_gmt": 1612176660,
"obs_id": "KSAN",
"obs_name": "San Diego",
"valid_time_gmt": 1612169460,
"day_ind": "N",
"temp": 59,
"wx_icon": 27,
"icon_extd": 2700,
"wx_phrase": "Mostly Cloudy",
"pressure_tend": 2,
"pressure_desc": "Falling",
"dewPt": 45,
"heat_index": 59,
"rh": 60,
"pressure": 30.04,
"vis": 10,
"wc": 59,
"wdir": null,
"wdir_cardinal": "CALM",
"gust": null,
"wspd": 0,
"max_temp": null,
"min_temp": null,
"precip_total": null,
"precip_hrly": 0,
"snow_hrly": null,
"uv_desc": "Low",
"feels_like": 59,
"uv_index": 0,
"qualifier": null,
"qualifier_svrty": null,
"blunt_phrase": null,
"terse_phrase": null,
"clds": "BKN",
"water_temp": null,
"primary_wave_period": null,
"primary_wave_height": null,
"primary_swell_period": null,
"primary_swell_height": null,
"primary_swell_direction": null,
"secondary_swell_period": null,
"secondary_swell_height": null,
"secondary_swell_direction": null
},
{
"key": "KSAN",
"class": "observation",
"expire_time_gmt": 1612180260,
"obs_id": "KSAN",
"obs_name": "San Diego",
"valid_time_gmt": 1612173060,
"day_ind": "N",
"temp": 59,
"wx_icon": 27,
"icon_extd": 2700,
"wx_phrase": "Mostly Cloudy",
"pressure_tend": null,
"pressure_desc": null,
"dewPt": 47,
"heat_index": 59,
"rh": 64,
"pressure": 30.04,
"vis": 10,
"wc": 59,
"wdir": 260,
"wdir_cardinal": "W",
"gust": null,
"wspd": 5,
"max_temp": null,
"min_temp": null,
"precip_total": null,
"precip_hrly": 0,
"snow_hrly": null,
"uv_desc": "Low",
"feels_like": 59,
"uv_index": 0,
"qualifier": null,
"qualifier_svrty": null,
"blunt_phrase": null,
"terse_phrase": null,
"clds": "BKN",
"water_temp": null,
"primary_wave_period": null,
"primary_wave_height": null,
"primary_swell_period": null,
"primary_swell_height": null,
"primary_swell_direction": null,
"secondary_swell_period": null,
"secondary_swell_height": null,
"secondary_swell_direction": null
} ]

相关内容

  • 没有找到相关文章

最新更新