PyTorch NN not as good as sklearn MLP



我正在将sklearn的MLPRegressor的精度与PyTorch中的等效网络进行比较,但PyTorch模型总是差得多。我不明白为什么。下面是我的代码。

poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),y.ravel(),
test_size=0.15, 
random_state=0,shuffle=True)       
#print(X_train)
layers = (78,22,8,3,3,1)
regr_nn = MLPRegressor(hidden_layer_sizes=layers,random_state=0, max_iter=20000,
solver='lbfgs',
activation='tanh',alpha=1e-5)
regr_nn.fit(X_train, y_train)
y_predict_test_nn = regr_nn.predict(y_test)
y_predict_train_nn = regr_nn.predict(y_train)
test_score = regr_nn.score(X_test, y_test)
train_score = regr_nn.score(X_train, y_train)
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X), 
y.ravel(),test_size=0.15,
random_state=0)
# torch can only train on Variable, so convert them to Variable
x_test, y_test = torch.from_numpy(X_test.astype('float')), torch.from_numpy(y_test)
y_test = y_test.reshape((y_test.shape[0], 1))
x_train, y_train = torch.from_numpy(X_train.astype('float')), torch.from_numpy(y_train)
y_train = y_train.reshape((y_train.shape[0], 1))
class Train_set(torch.utils.data.Dataset):
def __init__(self, X, y):
if not torch.is_tensor(X) and not torch.is_tensor(y):
self.X = torch.from_numpy(X)
self.y = torch.from_numpy(y)
else:
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, i):
return self.X[i], self.y[i]

class Net(torch.nn.Module):
def __init__(self, n_feature):
super(Net, self).__init__()
self.regress = nn.Sequential(nn.Linear(n_feature,78),nn.Tanh(),
nn.Linear(78, 22),nn.Tanh(), 
nn.Linear(22, 8),nn.Tanh(), nn.Linear(8, 3),nn.Tanh(),
nn.Linear(3,3), nn.Tanh(),nn.Linear(3, 1))
def forward(self, x):
return self.regress(x.float())      # activation function for hidden layer

net = Net(n_feature=x_train.size(1))
net.to(cuda)
# print(net)  # net architecture
optimizer = torch.optim.LBFGS(net.parameters(), max_iter=20000, lr=1e-5,
tolerance_grad=1e-07,tolerance_change=1e-05)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss    
train_set = Train_set(x_train,y_train)
trainloader = DataLoader(train_set, batch_size=10, shuffle=True)
CL = []
# train the network
for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure)     # apply gradients
prediction_train = net(x_train.to(cuda))
prediction_test = net(x_test.to(cuda))
train_score = r2_score(y_train.data.numpy(), prediction_train.data.numpy())
test_score = r2_score(y_test.data.numpy(), prediction_test.data.numpy())

sklearn的R^2分数高于0.9,奇偶校验图看起来像一条线,但PyTorch的分数接近零,奇偶校验线看起来很糟糕。Sklearn结果PyTorch结果如果有任何帮助,我将不胜感激。非常感谢。

我认为您的闭包函数需要在trainloader循环中:

for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure)     #<<< this is applied to the END of `closure`

我不能肯定,因为我没有太多使用LBFGS,但我相信你目前的方法每个历元只会走一步。