k近邻模型不工作.不再支持向.loc或[]传递带有任何缺失标签的类列表



数据集可以在这里找到:https://gofile.io/d/f8nBLL

我正在尝试运行k近邻模型,但我面临以下问题

KeyError: '不再支持将list-like传递给.loc或[],任何缺少标签,请参阅https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike'

我使用的代码如下:

def knn_train_test(variable_columns,target_columns,df,hyp_test=False):
# Randomizing the Data
np.random.seed(1)
df=df.loc[np.random.permutation(len(df))]
df = df.reset_index(drop=True)
# Dividing Training 75% / Test 25% Dataset 
rows=round(df.shape[0]*0.75)
training=df[:rows]
test=df[rows:]
x_training=training[variable_columns]
y_training=training[target_columns]
x_test=test[variable_columns]
y_test=test[target_columns]
#Testing Multiple Hyper Parameters and Accuracy
if test==True:
hyper_params = [x for x in range(1,21)]
# Append the first model's MSE values to this list.
two_mse_values = list()
# Append the second model's MSE values to this list.
two_hyp_mse = dict()
for hp in hyper_params:
knn = KNeighborsRegressor(n_neighbors=hp) #Selecting the Model
knn.fit(x_training, y_training)
predictions = knn.predict(x_test)
rmse = mean_squared_error(y_test, predictions,squared=False)
two_mse_values.append(rmse)
two_lowest_mse = two_mse_values[0]
two_lowest_k = 1
for k,mse in enumerate(two_mse_values):
if rmse < two_lowest_mse:
two_lowest_mse = rmse
two_lowest_k = k + 1
two_hyp_mse[two_lowest_k] = two_lowest_mse
return (f'k:{two_lowest_k}, RMSE:{two_lowest_mse}')
else:
knn = KNeighborsRegressor() #Selecting the Model
knn.fit(x_training, y_training)
predictions = knn.predict(x_test)
rmse = mean_squared_error(y_test, predictions)
return rmse
variables=numeric_cars.drop('price',axis=1)
target=numeric_cars['price']
# For each column (minus `price`), train a model, return RMSE value
# and add to the dictionary `rmse_results`.
for col in variables.columns:
rmse_val = knn_train_test(col, 'price', numeric_cars)
rmse_results[col] = rmse_val
# Create a Series object from the dictionary so 
# we can easily view the results, sort, etc
rmse_results_series = pd.Series(rmse_results)
rmse_results_series.sort_values()

这段代码为我工作

import pandas as pd
import numpy as np
import sklearn
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
assert(pd.__version__ == '1.2.1')
assert(np.__version__ == '1.18.5')
assert(sklearn.__version__ == '0.23.1')
def knn_train_test(variable_columns,target_columns,df,hyp_test=False):
# Randomizing the Data
np.random.seed(1)
df=df.loc[np.random.permutation(len(df))]
df = df.reset_index(drop=True)
# Dividing Training 75% / Test 25% Dataset 
rows=round(df.shape[0]*0.75)
training=df[:rows]
test=df[rows:]
x_training=training[variable_columns]
y_training=training[target_columns]
x_test=test[variable_columns]
y_test=test[target_columns]
#Testing Multiple Hyper Parameters and Accuracy
if hyp_test==True:
hyper_params = [x for x in range(1,21)]
# Append the first model's MSE values to this list.
two_mse_values = list()
# Append the second model's MSE values to this list.
two_hyp_mse = dict()
for hp in hyper_params:
knn = KNeighborsRegressor(n_neighbors=hp) #Selecting the Model
knn.fit(x_training, y_training)
predictions = knn.predict(x_test)
rmse = mean_squared_error(y_test, predictions,squared=False)
two_mse_values.append(rmse)
two_lowest_mse = two_mse_values[0]
two_lowest_k = 1
for k,mse in enumerate(two_mse_values):
if rmse < two_lowest_mse:
two_lowest_mse = rmse
two_lowest_k = k + 1
two_hyp_mse[two_lowest_k] = two_lowest_mse
return (f'k:{two_lowest_k}, RMSE:{two_lowest_mse}')
else:
knn = KNeighborsRegressor() #Selecting the Model
knn.fit(x_training, y_training)
predictions = knn.predict(x_test)
rmse = mean_squared_error(y_test, predictions)
return rmse
variables=numeric_cars.drop('price',axis=1)
target=numeric_cars['price']
# For each column (minus `price`), train a model, return RMSE value
# and add to the dictionary `rmse_results`.
for col in variables.columns:
rmse_val = knn_train_test(col, 'price', numeric_cars)
rmse_results[col] = rmse_val
# Create a Series object from the dictionary so 
# we can easily view the results, sort, etc
rmse_results_series = pd.Series(rmse_results)
rmse_results_series.sort_values()
file = '~/Downloads/numeric_cars.csv'
df = pd.read_csv(file)
variable_columns = ['normalized-losses', 'wheel-base']
target_columns = ['price']

print(knn_train_test(variable_columns, target_columns, df))

相关内容

最新更新