有人能向我解释一下我是如何重写这些代码以使用池更快地运行的吗?很抱歉问这个问题,但我是一个初学者,我花了很多时间尝试,但不幸的是我没能弄清楚。
for i in constituents:
print(i) # print out the ticker so we know the downloading progress
prc = yf.download(i, interval="1d", start=start_date, end=end_date)
prc = pd.DataFrame(prc['Adj Close']) # select adjusted close price only
prc.columns = [i] # rename the column with the ticker of the stock
try:
df_prc = pd.concat([df_prc, prc], axis=1) # if the dataframe already exists, join the newly downloaded data to the existing table
except:
df_prc = prc # create the dataframe for the first ticker
stk = yf.Ticker(i)
try:
stk.info['floatShares']
except:
stk.info['floatShares'] = None
try:
stk.info['sharesOutstanding']
except:
stk.info['sharesOutstanding'] = None
if stk.info['floatShares']:
mcap = prc * stk.info['floatShares']
elif stk.info['sharesOutstanding']:
mcap = prc * stk.info['sharesOutstanding']
else:
mcap = prc * ((stk.info['marketCap'])/(stk.info['previousClose']))
try:
df_mcap = pd.concat([df_mcap, mcap], axis=1)
except:
df_mcap = mcap
此外,我想提供在我发布之前运行的代码,以澄清我的问题:
import yfinance as yf
import pandas as pd
start_date = "2021-01-04"
end_date = "2021-11-29"
idx = "^STOXX50E"
Index = yf.download(idx, # ticker
interval="1d", # daily frequency
start=start_date, end=end_date) # sampling period
Index = pd.DataFrame(Index['Adj Close'].rename(idx)) # select adjusted close price
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
我使用以下代码将运行时间从386秒减少到17秒。请注意,我必须导入模块ssl
并发出ssl._create_default_https_context = ssl._create_unverified_context
,以克服从pd.read_html()
方法调用中收到的SSL证书错误。
import yfinance as yf
import pandas as pd
from multiprocessing.pool import ThreadPool
from functools import partial
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
start_date = "2021-01-04"
end_date = "2021-11-29"
def process_constituent(data, constituent):
prc = pd.DataFrame(data[constituent]['Adj Close']) # select adjusted close price only
prc.columns = [constituent] # rename the column with the ticker of the stock
stk = yf.Ticker(constituent)
try:
stk.info['floatShares']
except:
stk.info['floatShares'] = None
try:
stk.info['sharesOutstanding']
except:
stk.info['sharesOutstanding'] = None
if stk.info['floatShares']:
mcap = prc * stk.info['floatShares']
elif stk.info['sharesOutstanding']:
mcap = prc * stk.info['sharesOutstanding']
else:
mcap = prc * ((stk.info['marketCap'])/(stk.info['previousClose']))
return mcap
def process_constituents(constituents):
# Download all the tickers:
data = yf.download(
tickers = ' '.join(constituents),
interval='1d',
start = start_date,
end = end_date,
group_by = 'ticker',
adjust=False,
threads = True,
proxy = None
)
pool = ThreadPool(len(constituents))
for idx, mcap in enumerate(pool.imap(partial(process_constituent, data), constituents)):
if idx == 0:
df_mcap = mcap
else:
df_mcap = pd.concat([df_mcap, mcap], axis=1)
return df_mcap
def main():
idx = "^STOXX50E"
Index = yf.download(idx, # ticker
interval="1d", # daily frequency
start=start_date, end=end_date) # sampling period
Index = pd.DataFrame(Index['Adj Close'].rename(idx)) # select adjusted close price
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
df_mcap = process_constituents(constituents)
print(df_mcap)
if __name__ == '__main__':
main()
打印:
[*********************100%***********************] 1 of 1 completed
[*********************100%***********************] 49 of 49 completed
ADS.DE ADYEN.AS AD.AS AI.PA ... TTE.PA DG.PA VOW.DE VNA.DE
Date ...
2021-01-04 5.083322e+10 4.880933e+10 2.414267e+10 6.200227e+10 ... 8.326552e+10 3.746300e+10 3.842743e+10 3.322534e+10
2021-01-05 4.983515e+10 4.800875e+10 2.403104e+10 6.134340e+10 ... 8.545638e+10 3.682896e+10 3.849667e+10 3.338207e+10
2021-01-06 5.019652e+10 4.548888e+10 2.411223e+10 6.147971e+10 ... 8.921219e+10 3.824197e+10 3.886594e+10 3.203872e+10
2021-01-07 4.964585e+10 4.500328e+10 2.407163e+10 6.195684e+10 ... 9.018724e+10 3.830537e+10 3.946601e+10 3.183722e+10
2021-01-08 5.078160e+10 4.610573e+10 2.400059e+10 6.232034e+10 ... 9.024743e+10 3.879449e+10 3.893518e+10 3.225142e+10
... ... ... ... ... ... ... ... ... ...
2021-11-22 4.851034e+10 6.454539e+10 3.178177e+10 7.108912e+10 ... 1.073903e+11 4.175263e+10 6.518791e+10 2.937961e+10
2021-11-23 4.727562e+10 6.298360e+10 3.187473e+10 7.017166e+10 ... 1.086315e+11 4.224230e+10 6.532881e+10 2.843048e+10
2021-11-24 4.667566e+10 6.206490e+10 3.153388e+10 7.028287e+10 ... 1.092141e+11 4.271798e+10 6.326233e+10 2.985586e+10
2021-11-25 4.659740e+10 6.453227e+10 3.159068e+10 7.013459e+10 ... 1.091381e+11 4.279726e+10 6.298054e+10 3.005144e+10
2021-11-26 4.405841e+10 6.358732e+10 3.132214e+10 6.882791e+10 ... 1.026661e+11 3.918302e+10 6.072620e+10 2.859604e+10
[233 rows x 49 columns]
import yfinance as yf
import pandas as pd
from multiprocessing.pool import ThreadPool
from functools import partial
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
start_date = "2021-01-04"
end_date = "2021-11-29"
# download data
idx = "^STOXX50E"
Index = yf.download(idx, # ticker
interval="1d", # daily frequency
start=start_date, end=end_date) # sampling period
Index = pd.DataFrame(Index['Adj Close'].rename(idx)) # select adjusted close price
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
data = yf.download(
tickers = ' '.join(constituents),
interval='1d',
start = start_date,
end = end_date,
group_by = 'ticker',
adjust=False,
threads = True,
proxy = None
)
def process_prc(data, constituent):
prc = pd.DataFrame(data[constituent]['Adj Close']) # select adjusted close price only
prc.columns = [constituent] # rename the column with the ticker of the stock
return prc
def process_constituent(data, constituent):
prc = pd.DataFrame(data[constituent]['Adj Close']) # select adjusted close price only
prc.columns = [constituent] # rename the column with the ticker of the stock
stk = yf.Ticker(constituent)
try:
stk.info['floatShares']
except:
stk.info['floatShares'] = None
try:
stk.info['sharesOutstanding']
except:
stk.info['sharesOutstanding'] = None
if stk.info['floatShares']:
mcap = prc * stk.info['floatShares']
elif stk.info['sharesOutstanding']:
mcap = prc * stk.info['sharesOutstanding']
else:
mcap = prc * ((stk.info['marketCap'])/(stk.info['previousClose']))
return mcap
def process_dfprc(constituents):
pool = ThreadPool(len(constituents))
for idx, prc in enumerate(pool.imap(partial(process_prc, data), constituents)):
try:
df_prc = pd.concat([df_prc, prc], axis=1) # if the dataframe already exists, join the newly downloaded data to the existing table
except:
df_prc = prc # create the dataframe for the first ticker
return df_prc
def process_constituents(constituents):
# Download all the tickers:
pool = ThreadPool(len(constituents))
for idx, mcap in enumerate(pool.imap(partial(process_constituent, data), constituents)):
#if idx == 0:
# df_mcap = mcap
#else:
# df_mcap = pd.concat([df_mcap, mcap], axis=1)
try:
df_mcap = pd.concat([df_mcap, mcap], axis=1)
except:
df_mcap = mcap
return df_mcap
page = pd.read_html('https://en.wikipedia.org/wiki/EURO_STOXX_50')
constituents = page[2]['Ticker'] # we only need tickers
constituents.pop(46) # Ticker UMG.AS is removed because otherwise the for loop produces an error
计算df_mcap:
if __name__ == '__main__':
df_mcap = process_constituents(constituents)
df_mcap
打印:
ADS.DE ADYEN.AS AD.AS AI.PA AIR.PA ALV.DE ABI.BR ASML.AS CS.PA BAS.DE ... SAF.PA SAN.PA SAP.DE SU.PA SIE.DE STLA.MI TTE.PA DG.PA VOW.DE VNA.DE
Date
2021-01-04 5.083322e+10 4.880933e+10 2.414267e+10 6.200227e+10 6.001130e+10 7.936595e+10 5.306992e+10 1.647683e+11 3.507867e+10 5.679746e+10 ... 4.005571e+10 8.460465e+10 1.080643e+11 6.363292e+10 9.197665e+10 2.174885e+10 8.763420e+10 3.746300e+10 3.842743e+10 3.316382e+10
2021-01-05 4.983515e+10 4.800875e+10 2.403104e+10 6.134340e+10 5.997124e+10 7.855080e+10 5.304209e+10 1.650319e+11 3.506423e+10 5.636858e+10 ... 4.014193e+10 8.459394e+10 1.077770e+11 6.303187e+10 9.178897e+10 2.169038e+10 8.994003e+10 3.682896e+10 3.849667e+10 3.332026e+10
2021-01-06 5.019652e+10 4.548888e+10 2.411223e+10 6.147971e+10 6.019823e+10 8.263458e+10 5.451703e+10 1.633893e+11 3.656208e+10 5.896818e+10 ... 4.010744e+10 8.407989e+10 1.082285e+11 6.478275e+10 9.530789e+10 2.123436e+10 9.389289e+10 3.824197e+10 3.886594e+10 3.197940e+10
2021-01-07 4.964585e+10 4.500328e+10 2.407163e+10 6.195684e+10 5.983105e+10 8.195529e+10 5.417381e+10 1.638151e+11 3.678766e+10 5.987848e+10 ... 3.993501e+10 8.323385e+10 1.072435e+11 6.611552e+10 9.720029e+10 2.161438e+10 9.491911e+10 3.830537e+10 3.946601e+10 3.177828e+10
2021-01-08 5.078160e+10 4.610573e+10 2.400059e+10 6.232034e+10 6.015150e+10 8.221501e+10 5.367288e+10 1.687430e+11 3.675157e+10 6.002728e+10 ... 4.012468e+10 8.437975e+10 1.089467e+11 6.682110e+10 9.696569e+10 2.121390e+10 9.498246e+10 3.879449e+10 3.893518e+10 3.219170e+10
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-11-22 4.851034e+10 6.454539e+10 3.178177e+10 7.108912e+10 7.262242e+10 8.481195e+10 4.905974e+10 3.047505e+11 4.871214e+10 5.686775e+10 ... 3.862038e+10 9.936060e+10 1.269160e+11 8.638617e+10 1.251472e+11 3.187450e+10 1.074238e+11 4.175263e+10 6.518791e+10 2.937961e+10
2021-11-23 4.727562e+10 6.298360e+10 3.187473e+10 7.017166e+10 7.361048e+10 8.489549e+10 4.868553e+10 2.879491e+11 4.890396e+10 5.780438e+10 ... 3.883494e+10 9.827092e+10 1.247044e+11 8.521374e+10 1.230764e+11 3.105655e+10 1.086654e+11 4.224230e+10 6.532881e+10 2.843048e+10
2021-11-24 4.667566e+10 6.206490e+10 3.153388e+10 7.028287e+10 7.441161e+10 8.483284e+10 4.900361e+10 2.891317e+11 4.899028e+10 5.683102e+10 ... 3.892492e+10 9.708117e+10 1.240785e+11 8.322063e+10 1.219527e+11 3.068774e+10 1.092482e+11 4.271798e+10 6.326233e+10 2.985586e+10
2021-11-25 4.659740e+10 6.453227e+10 3.159068e+10 7.013459e+10 7.494570e+10 8.464487e+10 5.084663e+10 2.899473e+11 4.885600e+10 5.657391e+10 ... 3.898721e+10 9.634731e+10 1.249965e+11 8.351906e+10 1.232691e+11 3.050516e+10 1.091722e+11 4.279726e+10 6.298054e+10 3.005144e+10
2021-11-26 4.405841e+10 6.358732e+10 3.132214e+10 6.882791e+10 6.633355e+10 7.996257e+10 4.789032e+10 2.795891e+11 4.646787e+10 5.317635e+10 ... 3.498675e+10 9.452378e+10 1.201978e+11 8.077986e+10 1.165751e+11 2.842012e+10 1.026981e+11 3.918302e+10 6.072620e+10 2.859604e+10
233 rows × 49 columns
计算df_prc:
if __name__ == '__main__':
df_prc = process_dfprc(constituents)
df_prc
打印:
ADS.DE ADYEN.AS AD.AS AI.PA AIR.PA ALV.DE ABI.BR ASML.AS CS.PA BAS.DE ... SAF.PA SAN.PA SAP.DE SU.PA SIE.DE STLA.MI TTE.PA DG.PA VOW.DE VNA.DE
Date
2021-01-04 292.307343 1859.5 23.374092 133.809341 89.889999 190.011627 56.726482 404.039764 18.287489 61.853455 ... 115.747612 76.089233 103.588997 119.404495 114.593018 11.912073 34.584999 80.331764 163.641632 57.650417
2021-01-05 286.568085 1829.0 23.266014 132.387405 89.830002 188.060059 56.696735 404.686218 18.279963 61.386387 ... 115.996742 76.079597 103.313599 118.276649 114.359184 11.880051 35.494999 78.972191 163.936478 57.922352
2021-01-06 288.646088 1733.0 23.344616 132.681595 90.169998 197.837112 58.273296 400.658264 19.060837 64.217407 ... 115.897095 75.617287 103.746368 121.562111 118.743378 11.630281 37.055000 82.002113 165.509003 55.591476
2021-01-07 285.479584 1714.5 23.305313 133.711288 89.620003 196.210800 57.906425 401.702545 19.178438 65.208740 ... 115.398827 74.856400 102.802139 124.062988 121.101105 11.838422 37.459999 82.138069 168.064377 55.241844
2021-01-08 292.010498 1756.5 23.236538 134.495789 90.099998 196.832611 57.370987 413.786438 19.159622 65.370781 ... 115.946915 75.886971 104.434860 125.386978 120.808823 11.619075 37.485001 83.186890 165.803864 55.960529
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-11-22 278.950012 2459.0 30.770000 153.419998 108.779999 203.050003 52.439999 747.299988 25.395000 61.930000 ... 111.599998 89.360001 121.660004 162.100006 155.919998 17.458000 42.395000 89.529999 277.600006 51.072109
2021-11-23 271.850006 2399.5 30.860001 151.440002 110.260002 203.250000 52.040001 706.099976 25.495001 62.950001 ... 112.220001 88.379997 119.540001 159.899994 153.339996 17.010000 42.884998 90.580002 278.200012 49.422203
2021-11-24 268.399994 2364.5 30.530001 151.679993 111.459999 203.100006 52.380001 709.000000 25.540001 61.889999 ... 112.480003 87.309998 118.940002 156.160004 151.940002 16.808001 43.115002 91.599998 269.399994 51.900002
2021-11-25 267.950012 2458.5 30.584999 151.360001 112.260002 202.649994 54.349998 711.000000 25.469999 61.610001 ... 112.660004 86.650002 119.820000 156.720001 153.580002 16.708000 43.084999 91.769997 268.200012 52.240002
2021-11-26 253.350006 2422.5 30.325001 148.539993 99.360001 191.440002 51.189999 685.599976 24.225000 57.910000 ... 101.099998 85.010002 115.220001 151.580002 145.240005 15.566000 40.529999 84.019997 258.600006 49.709999
233 rows × 49 columns