如何在使用 pivot_table 和 MultiIndex 操作数据帧后编写函数以绘制一段时间内的变量



以下代码完美运行并创建数据帧

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import matplotlib as mpl
sns.set()
df = pd.DataFrame({ 
# some ways to create random data
'scenario':np.random.choice( ['BAU','ETS','ESD'], 27),
'region':np.random.choice( ['Italy','France'], 27),
'variable':np.random.choice( ['GDP','GHG'], 27),
# some ways to create systematic groups for indexing or groupby
# this is similar to r's expand.grid(), see note 2 below
'2015':np.random.randn(27),
'2016':np.random.randn(27),
'2017':np.random.randn(27),
'2018':np.random.randn(27),
'2019':np.random.randn(27),
'2020':np.random.randn(27),
'2021':np.random.randn(27)
})
df2=pd.melt(df,id_vars=['scenario','region','variable'],var_name='year')
all_names_index = df2.set_index(['scenario','region','variable','year']).sort_index()

然后我使用一个函数来迭代绘制:

def name_plot(scenario, region, variable):
data = all_names_index.loc[scenario, region, variable]
plt.plot(data.index, data.value, label='%s' % scenario)
font = {'family' : 'normal',
'weight' : 'bold',
'size'   : 13}
plt.rc('font', **font)
names = ['BAU','ETS', 'ESD']
for scenario in names:
name_plot(scenario, 'Italy', 'GHG')
plt.xlabel('Years')
plt.ylabel('MtCO2e')
plt.title('Emissions Pathways')
plt.legend() 
plt.savefig('EMIp.png')
plt.clf()

由于我需要创建一个区域欧盟作为两个国家的总和,因此我创建了一个pivot_table:

map_eu = {
'EU' : ['Italy','France']
}
df3=pd.pivot_table(df2, 'value', ['scenario', 'variable', 'year'], 'region')
for k,v in map_eu.items():
df3[k] = df3[v].sum(1)
df3 = df3.stack(0).unstack(1)
df3.sort_index(0,inplace=True)

如何使用之前定义的函数绘制 df3 name_plot?我不明白如何重新排列pivot_table以获得与df2相同的结构

考虑调整绘图方法以接收数据框,而不是在全局环境(即all_names_index(中依赖数据框。此外,在方法中运行所有绘图操作,以便更好地组织代码。请务必动态更改标题和文件名。

def name_plot(df, scenario, region, variable):
data = df.loc[scenario, region, variable]
plt.plot(data.index, data['value'], label=scenario)
plt.xlabel('Years')
plt.ylabel('MtCO2e')
plt.title(f'{region} {variable} - Emissions Pathways')
plt.legend() 
plt.tight_layout()
plt.savefig(f'{scenario} {region} {variable} EMP.png')
plt.clf()

然后,再次meltpivot_table数据框:

### ITALY PLOTS
names = ['BAU', 'ETS', 'ESD']
for scenario in names:
name_plot(all_names_index, scenario, 'Italy', 'GHG')
### EU PLOTS
df3 = pd.pivot_table(df2, 'value', ['scenario', 'variable', 'year'], 'region')
map_eu = {'EU' : ['Italy','France']}
for k,v in map_eu.items():
df3[k] = df3.reindex(v, axis='columns').sum(1)
df3 = (df3.reset_index()
.melt(id_vars=['scenario','variable', 'year'], 
var_name='region')
.set_index(['scenario', 'region', 'variable', 'year'])
)
for scenario in names:
name_plot(df3, scenario, 'EU', 'GHG')

最新更新