我目前正在学习python,并试图通过使用其他代码来做我自己的项目,所以不要在我学习时责怪我。
我从tickers.csv中获取股票列表,并抓取了一个网站以获取部门&.csv
问题是我只能通过
将部门或行业(通过选择一个)放入stocks.csv中if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
我想同时完成部门和行业这里是整个代码
# dependencies
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', index=False)
def get_soup(url):
return bs(requests.get(url).text, 'html.parser')
def get_sector(ticker):
soup = get_soup(LSE + ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
return {'ticker': ticker, 'sector': sector}
def get_industry(ticker):
soup1 = get_soup(LSE + ticker)
try:
industry = soup1.find('span', text='Industry').find_next('a').text.replace('n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_sector(ticker), get_stocks())))
# to_csv(list(map(lambda ticker: get_industry(ticker), get_stocks())))
这里是tickers.csv
ticker,
A
AA
AADI
AAIC
AAL
AAN
AAOI
AAON
AAP
AAPL
AAT
AAU
AAWW
AB
ABB
ABBV
ABC
ABCB
ABCL
ABEO
ABEV
ABG
ABIO
ABM
ABMD
ABNB
ABOS
ABR
ABSI
ABST
ABT
ABTX
ABUS
ACA
ACAD
ACB
ACC
ACCD
ACCO
ACEL
ACER
ACET
ACEV
ACGL
ACH
ACHC
ACHR
ACHV
ACI
ACIU
这是stock。csv当我得到扇区
ticker,sector
A,Healthcare
AA,Basic Materials
AADI,
AAIC,Services
AAL,Transportation
AAN,Services
AAOI,Technology
AAON,Capital Goods
AAP,Retail
AAPL,Technology
AAT,Financial
AAU,Basic Materials
AAWW,Transportation
AB,Financial
ABB,Consumer Discretionary
ABBV,Healthcare
ABC,Retail
ABCB,Financial
ABCL,Healthcare
ABEO,Healthcare
ABEV,Consumer Non Cyclical
ABG,Retail
ABIO,Healthcare
ABM,Services
ABMD,Healthcare
ABNB,Services
ABOS,Healthcare
ABR,Financial
ABSI,Healthcare
ABST,
ABT,Healthcare
ABTX,Financial
ABUS,Healthcare
ACA,Basic Materials
ACAD,Healthcare
ACB,
ACC,Financial
ACCD,Financial
ACCO,Basic Materials
ACEL,Services
ACER,Healthcare
ACET,Retail
ACEV,Technology
ACGL,Financial
ACH,Basic Materials
ACHC,Healthcare
ACHR,Capital Goods
ACHV,Healthcare
ACI,Energy
ACIU,
这里是stocks。csv当我得到行业
ticker,industry
A,Laboratory Analytical Instruments
AA,Aluminum
AADI,
AAIC,Real Estate Operations
AAL,Airline
AAN,Rental & Leasing
AAOI,Computer Networks
AAON,Industrial Machinery and Components
AAP,Automotive Aftermarket
AAPL,Computer Hardware
AAT,Real Estate Investment Trusts
AAU,Metal Mining
AAWW,Special Transportation Services
AB,Investment Services
ABB,Electric & Wiring Equipment
ABBV,Biotechnology & Pharmaceuticals
ABC,Pharmacy Services & Retail Drugstore
ABCB,Regional Banks
ABCL,Major Pharmaceutical Preparations
ABEO,Major Pharmaceutical Preparations
ABEV,Nonalcoholic Beverages
ABG,Automotive Aftermarket
ABIO,In Vitro & In Vivo Diagnostic Substances
ABM,Professional Services
ABMD,Medical Equipment & Supplies
ABNB,Real Estate Operations
ABOS,Biotechnology & Pharmaceuticals
ABR,Real Estate Investment Trusts
ABSI,Medical Laboratories
ABST,
ABT,Major Pharmaceutical Preparations
ABTX,Commercial Banks
ABUS,Major Pharmaceutical Preparations
ACA,Miscellaneous Fabricated Products
ACAD,Major Pharmaceutical Preparations
ACB,
ACC,Real Estate Investment Trusts
ACCD,Blank Checks
ACCO,Paper & Paper Products
ACEL,Casinos & Gaming
ACER,Major Pharmaceutical Preparations
ACET,Pharmacy Services & Retail Drugstore
ACEV,Semiconductors
ACGL,Property & Casualty Insurance
ACH,Aluminum
ACHC,Healthcare Facilities
ACHR,Aerospace & Defense
ACHV,In Vitro & In Vivo Diagnostic Substances
ACI,Coal Mining
ACIU,
只需将现有的两个函数合并为一个,并通过单个soup对象返回解析的结果
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
LSE = 'https://csimarket.com/stocks/at_glance.php?code='
def get_stocks():
df = pd.read_csv('watchlist/tickers.csv')
return list(df['ticker'])
def to_csv(stocks):
df = pd.DataFrame(stocks)
df.to_csv('stocks.csv', encoding='utf-8-sig', index=False)
def get_soup(url):
return bs(requests.get(url, headers = {'User-Agent':'Mozilla/5.0'}).text, 'html.parser')
def get_data(ticker):
soup = get_soup(LSE + ticker)
try:
sector = soup.find('span', text='Sector').find_next('a').text.replace('n', '').replace('•', '').strip()
except:
print('No sector information availible for ', ticker)
return {'ticker': ticker, 'sector': ''}
print(ticker, sector)
try:
industry = soup.find('span', text='Industry').find_next('a').text.replace('n', '').replace('•', '').strip()
except:
print('No industry information availible for ', ticker)
return {'ticker': ticker, 'industry': ''}
print(ticker, industry)
return {'ticker': ticker, 'sector': sector, 'industry': industry}
if __name__ == '__main__':
to_csv(list(map(lambda ticker: get_data(ticker), get_stocks())))