Google Finance错误:文字无效



我正试图为学校做一个个人项目(股票市场预测(,这时谷歌又开始行动了。。。

我意识到谷歌金融在过去的一年里完全是垃圾,但直到今天早上,它似乎还在发挥作用。尽管昨天运行得很好,但我第一次运行代码时还是遇到了一个错误。

所以我试着从实际的库页面运行一个示例代码:https://pypi.org/project/googlefinance.client/

!pip install googlefinance.client
from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data
# Dow Jones
param = {
'q': ".DJI", # Stock symbol (ex: "AAPL")
'i': "86400", # Interval size in seconds ("86400" = 1 day intervals)
'x': "INDEXDJX", # Stock exchange symbol on which stock is traded (ex: "NASD")
'p': "1Y" # Period (Ex: "1Y" = 1 year)
}
# get price data (return pandas dataframe)
df = get_price_data(param)
print(df)
params = [
# Dow Jones
{
'q': ".DJI",
'x': "INDEXDJX",
},
# NYSE COMPOSITE (DJ)
{
'q': "NYA",
'x': "INDEXNYSEGIS",
},
# S&P 500
{
'q': ".INX",
'x': "INDEXSP",
}
]
period = "1Y"
# get open, high, low, close, volume data (return pandas dataframe)
df = get_prices_data(params, period)
print(df)

仍然得到

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-df3429694fd0> in <module>()
9 }
10 # get price data (return pandas dataframe)
---> 11 df = get_price_data(param)
12 print(df)
13 
/usr/local/lib/python3.6/dist-packages/googlefinance/client.py in get_price_data(query)
13                 cols = price.split(",")
14                 if cols[0][0] == 'a':
---> 15                         basetime = int(cols[0][1:])
16                         index.append(datetime.fromtimestamp(basetime))
17                         data.append([float(cols[4]), float(cols[2]), float(cols[3]), float(cols[1]), int(cols[5])])
ValueError: invalid literal for int() with base 10: 'nd&nbsp;...</span><br></div></div><div class="g"><h3 class="r"><a href="/url?q=https://en.wikipedia.org/wiki/DJI_(company)&amp;sa=U&amp;ved=0ahUKEwiB-e_gjMzcAhUpwlkKHTTUC74QFghGMAw&amp;usg=AOvVaw1ugw

以前有没有人遇到过这种情况,知道出了什么问题或如何解决?

或者,另一方面,有人知道谷歌金融的好替代方案吗?

这是示例代码的问题。如果你去GitHub主页,你会得到最新版本——甚至是小更新。

我稍微修改了client.py,输出没有问题。

#!/usr/bin/env python
# coding: utf-8
import requests
from datetime import datetime
import pandas as pd

def get_price_data(query):
r = requests.get(
"https://finance.google.com/finance/getprices", params=query)
lines = r.text.splitlines()
data = []
index = []
basetime = 0
for price in lines:
cols = price.split(",")
if cols[0][0] == 'a':
basetime = int(cols[0][1:])
index.append(datetime.fromtimestamp(basetime))
data.append([float(cols[4]), float(cols[2]), float(
cols[3]), float(cols[1]), int(cols[5])])
elif cols[0][0].isdigit():
date = basetime + (int(cols[0]) * int(query['i']))
index.append(datetime.fromtimestamp(date))
data.append([float(cols[4]), float(cols[2]), float(
cols[3]), float(cols[1]), int(cols[5])])
return pd.DataFrame(data, index=index, columns=['Open', 'High', 'Low', 'Close', 'Volume'])

def get_closing_data(queries, period):
closing_data = []
for query in queries:
query['i'] = 86400
query['p'] = period
r = requests.get(
"https://finance.google.com/finance/getprices", params=query)
lines = r.text.splitlines()
data = []
index = []
basetime = 0
for price in lines:
cols = price.split(",")
if cols[0][0] == 'a':
basetime = int(cols[0][1:])
date = basetime
data.append(float(cols[1]))
index.append(datetime.fromtimestamp(date).date())
elif cols[0][0].isdigit():
date = basetime + (int(cols[0]) * int(query['i']))
data.append(float(cols[1]))
index.append(datetime.fromtimestamp(date).date())
s = pd.Series(data, index=index, name=query['q'])
closing_data.append(s[~s.index.duplicated(keep='last')])
return pd.concat(closing_data, axis=1)

def get_open_close_data(queries, period):
open_close_data = pd.DataFrame()
for query in queries:
query['i'] = 86400
query['p'] = period
r = requests.get(
"https://finance.google.com/finance/getprices", params=query)
lines = r.text.splitlines()
data = []
index = []
basetime = 0
for price in lines:
cols = price.split(",")
if cols[0][0] == 'a':
basetime = int(cols[0][1:])
date = basetime
data.append([float(cols[4]), float(cols[1])])
index.append(datetime.fromtimestamp(date).date())
elif cols[0][0].isdigit():
date = basetime + (int(cols[0]) * int(query['i']))
data.append([float(cols[4]), float(cols[1])])
index.append(datetime.fromtimestamp(date).date())
df = pd.DataFrame(data, index=index, columns=[
query['q'] + '_Open', query['q'] + '_Close'])
open_close_data = pd.concat(
[open_close_data, df[~df.index.duplicated(keep='last')]], axis=1)
return open_close_data

def get_prices_data(queries, period):
prices_data = pd.DataFrame()
for query in queries:
query['i'] = 86400
query['p'] = period
r = requests.get(
"https://finance.google.com/finance/getprices", params=query)
lines = r.text.splitlines()
data = []
index = []
basetime = 0
for price in lines:
cols = price.split(",")
if cols[0][0] == 'a':
basetime = int(cols[0][1:])
date = basetime
data.append([float(cols[4]), float(cols[2]), float(
cols[3]), float(cols[1]), int(cols[5])])
index.append(datetime.fromtimestamp(date).date())
elif cols[0][0].isdigit():
date = basetime + (int(cols[0]) * int(query['i']))
data.append([float(cols[4]), float(cols[2]), float(
cols[3]), float(cols[1]), int(cols[5])])
index.append(datetime.fromtimestamp(date).date())
df = pd.DataFrame(data, index=index, columns=[
query['q'] + '_Open', query['q'] + '_High', query['q'] + '_Low', query['q'] + '_Close', query['q'] + '_Volume'])
prices_data = pd.concat(
[prices_data, df[~df.index.duplicated(keep='last')]], axis=1)
return prices_data

def get_prices_time_data(queries, period, interval):
prices_time_data = pd.DataFrame()
for query in queries:
query['i'] = interval
query['p'] = period
r = requests.get(
"https://finance.google.com/finance/getprices", params=query)
lines = r.text.splitlines()
data = []
index = []
basetime = 0
for price in lines:
cols = price.split(",")
if cols[0][0] == 'a':
basetime = int(cols[0][1:])
date = basetime
data.append([float(cols[4]), float(cols[2]), float(
cols[3]), float(cols[1]), int(cols[5])])
index.append(datetime.fromtimestamp(date))
elif cols[0][0].isdigit():
date = basetime + (int(cols[0]) * int(query['i']))
data.append([float(cols[4]), float(cols[2]), float(
cols[3]), float(cols[1]), int(cols[5])])
index.append(datetime.fromtimestamp(date))
df = pd.DataFrame(data, index=index, columns=[
query['q'] + '_Open', query['q'] + '_High', query['q'] + '_Low', query['q'] + '_Close', query['q'] + '_Volume'])
prices_time_data = pd.concat(
[prices_time_data, df[~df.index.duplicated(keep='last')]], axis=1)
return prices_time_data

代码段

params = {
'q': ".DJI",  # Stock symbol (ex: "AAPL")
'i': "86400",  # Interval size in seconds ("86400" = 1 day intervals)
# Stock exchange symbol on which stock is traded (ex: "NASD")
'x': "INDEXDJX",
'p': "1Y"  # Period (Ex: "1Y" = 1 year)
}
df = get_price_data(params)
print(df)

输出

成交量高开。。。关闭
328405532 2017-08-01 15:00:00 21961.42 21990.96。。。21963.92
328405532 2017-08-02 15:00:00 22004.36 22036.10。。。22016.24
336824836 2017-08-03 15:00:00 22007.58 22044.85。。。22026.10
278731064 2017-08-04 15:00:00 22058.39 22092.81。。。22092.81
253635270 2017-08-07 15:00:00 22100.20 22121.15。。。22118.42
23012378 2017-08-08 15:00:00 22095.14 22179.11。。。22085.34

在过去的48小时左右,".INX"一直没有在我的谷歌工作表上更新。.DJI.IXIC仍在更新,尽管我认为其中一个最近没有更新。

在抓取Google Finance时,除非你真的想,否则没有必要真正使用任何API。使用BeautifulSoupweb抓取库就足够了,在它的帮助下,你可以从股票行情页面抓取所有你需要的信息,以及几乎所有的东西。

在线IDE中检查代码。


from bs4 import BeautifulSoup
import requests, lxml, json
from itertools import zip_longest

def scrape_google_finance(ticker: str):
# https://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls
params = {
"hl": "en" # language
}
# https://docs.python-requests.org/en/master/user/quickstart/#custom-headers
# https://www.whatismybrowser.com/detect/what-is-my-user-agent
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36",
}
html = requests.get(f"https://www.google.com/finance/quote/{ticker}", params=params, headers=headers, timeout=30)
soup = BeautifulSoup(html.text, "lxml")

ticker_data = {"right_panel_data": {},
"ticker_info": {}}

ticker_data["ticker_info"]["title"] = soup.select_one(".zzDege").text
ticker_data["ticker_info"]["current_price"] = soup.select_one(".AHmHk .fxKbKc").text

right_panel_keys = soup.select(".gyFHrc .mfs7Fc")
right_panel_values = soup.select(".gyFHrc .P6K39c")

for key, value in zip_longest(right_panel_keys, right_panel_values):
key_value = key.text.lower().replace(" ", "_")
ticker_data["right_panel_data"][key_value] = value.text

return ticker_data

data = scrape_google_finance(ticker="GOOGL:NASDAQ")
print(json.dumps(data, indent=2))

示例输出

{
"right_panel_data": {
"previous_close": "$118.84",
"day_range": "$119.46 - $120.56",
"year_range": "$101.88 - $151.55",
"market_cap": "1.57T USD",
"avg_volume": "34.44M",
"p/e_ratio": "22.76",
"dividend_yield": "-",
"primary_exchange": "NASDAQ",
"ceo": "Sundar Pichai",
"founded": "Oct 2, 2015",
"headquarters": "Mountain View, CaliforniaUnited States",
"website": "abc.xyz",
"employees": "174,014"
},
"ticker_info": {
"title": "Alphabet Inc Class A",
"current_price": "$120.11"
}
}

如果你需要从谷歌金融中获取更多数据,Python博客文章中有一个抓取谷歌金融Ticker报价数据。

相关内容

最新更新