cols = Germandata.columns
percentage_list = [0.05,0.01,0.1]
for i in range(len(Germandata)) :
for percentage in percentage_list:
columns_n = 3
random_columns = np.random.choice(cols, columns_n, replace=False)
local_data = Germandata.copy()
remove_n = int(round(local_data.shape[0] * percentage, 0))
for column_name in random_columns:
drop_indices = np.random.choice(local_data.index, remove_n, replace=False)
local_data.loc[drop_indices, column_name] = np.nan
此处的代码随机选择列,并从数据中删除一定百分比的观测值,并将其替换为 NN。这里的问题是在运行循环后,我将在百分比列表中获得最终的百分比删除数据帧,因为它在每次迭代后都会覆盖。如何在每次迭代后使用 nans 存储数据帧。?理想情况下,我应该删除三个数据帧,删除不同百分比的数据。
试试这个
df_list = []
cols = Germandata.columns
percentage_list = [0.05,0.01,0.1]
for percentage in percentage_list:
columns_n = 3
random_columns = np.random.choice(cols, columns_n, replace=False)
local_data = Germandata.copy()
remove_n = int(round(local_data.shape[0] * percentage, 0))
for column_name in random_columns:
drop_indices = np.random.choice(local_data.index, remove_n, replace=False)
local_data.loc[drop_indices, column_name] = np.nan
local_data['percentage'] = percentage # optional
df_list.append(local_data)
df_05 = df_list[0]
df_01 = df_list[1]
df_1 = df_list[2]
或者,您可以使用字典
df_dict = {}
cols = Germandata.columns
percentage_list = [0.05,0.01,0.1]
for percentage in percentage_list:
columns_n = 3
random_columns = np.random.choice(cols, columns_n, replace=False)
local_data = Germandata.copy()
remove_n = int(round(local_data.shape[0] * percentage, 0))
for column_name in random_columns:
drop_indices = np.random.choice(local_data.index, remove_n, replace=False)
local_data.loc[drop_indices, column_name] = np.nan
local_data['percentage'] = percentage # optional
df_dict[str(percentage)] = local_data
df_05 = df_dict['0.05']
df_01 = df_dict['0.01']
df_1 = df_dict['0.1']