为什么不同的初始点应该导致套索优化的不同结果(这是凸)

我试图在具有950个样本和约5000个功能的数据上使用套索优化。套索函数为$（1/（2 * numberOfSamples）） * || y -xw ||^2_2 alpha * || w || w || _1 $ .once我尝试使用初始化的最小化，我得到了完全不同的w这很奇怪，因为拉索是凸，初始化不应影响结果。这是套索的结果，没有初始化。TOL是公差。如果W的变化变成了波纹管耐受性，则会发生融合。

tol=0.00000001 
#####  lasso model errors  ##### 

gene: 5478 matrix error: 0.069611732213 
with initialization: alpha: 1e-20 promotion: -3.58847815733e-13 
coef: [-0.00214732 -0.00509795  0.00272167 -0.00651548 -0.00164646 -0.00115342 
  0.00553346  0.01047653  0.00139832] 
without initialization: alpha: 1e-20  promotion: -19.0735249749 
coef: [-0.03650629  0.08992003 -0.01287155  0.03203973  0.1567577  -0.03708655 
-0.13710957 -0.01252736 -0.21710334] 

with initialization: alpha: 1e-15 promotion: 1.06179081478e-10 
coef: [-0.00214732 -0.00509795  0.00272167 -0.00651548 -0.00164646 -0.00115342 
  0.00553346  0.01047653  0.00139832] 
without initialization: alpha: 1e-15  promotion: -19.0735249463 
coef: [-0.03650629  0.08992003 -0.01287155  0.03203973  0.1567577  -0.03708655 
-0.13710957 -0.01252736 -0.21710334] 

Warning (from warnings module): 
  File "/usr/local/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py", line 491 
    ConvergenceWarning) 
ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems. 
with initialization: alpha: 1e-10  promotion: 0.775144987537 
coef: [-0.00185139 -0.0048819   0.00218349 -0.00622618 -0.00145647 -0.00115857 
  0.0055919   0.01072924  0.00043773] 
without initialization: alpha: 1e-10 promotion: -17.8649603301 
coef: [-0.03581581  0.0892119  -0.01232829  0.03151441  0.15606195 -0.03734093 
-0.13604286 -0.01247732 -0.21233529] 

with initialization: alpha: 1e-08 promotion: -5.87121366314 
coef: [-0.          0.         -0.         -0.01064477  0.         -0.00116167 
-0.          0.01114746  0.        ] 
without initialization: alpha: 1e-08  promotion: 4.05593555389 
coef: [ 0.          0.04505117  0.00668611  0.          0.07731668 -0.03537848 
-0.03151995  0.         -0.00310122] 

max promote: 
4.05593555389

对于实现，我使用了Python软件包Sklearn.linear_model的LASSO功能。我也更改了数据，但是新数据的结果也随初始化而改变。我认为这很奇怪，但我无法分析并找到解释。

这是我的代码的一部分，与套索有关。我的数据是基因表达。我在归一化和非归一化数据上测试代码。在他们两个上，初始点有所不同。

    alpha_lasso = [1e-20,1e-15, 1e-10, 1e-8, 1e-7,1e-6,1e-5,1e-4, 1e-3,1e-2, 1, 5 ,20]
    lassoreg = Lasso(alpha=alpha_lasso[i],warm_start=True,tol=0.00000001,max_iter=100000)
    lassoreg.coef_ = mybeta[:,j-c]
    lassoreg.fit(train[:,predictors],train[:,y])
    y_train_pred = lassoreg.predict(A)#train[:,predictors])
    y_test_pred = lassoreg.predict(C)#test[:,predictors])

这也是我的整个代码：

import pandas as pd
import random
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from GEOparse.GEOTypes import (GSE, GSM, GPL, GDS,
                               GDSSubset, GEODatabase,
                               DataIncompatibilityException,
                               NoMetadataException,
                               )
import GEOparse as GEO
import numpy as np
import copy
import sys
import math
from sklearn import linear_model
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoLars
from sklearn.linear_model import MultiTaskLassoCV
from sklearn.linear_model import coordinate_descent
from sklearn.linear_model import lasso_path, enet_path

import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from copy import deepcopy
miss_percent = 0.1
alpha_lasso = [1e-20,1e-15, 1e-10, 1e-8, 1e-7,1e-6,1e-5,1e-4, 1e-3,1e-2, 1, 5 ,20]
mins=[]
maxs=[]
mean_err=[]
alphas=[]
mins1=[]
maxs1=[]
mean_err1=[]
alphas1=[]
#mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
def getdata(percent):
    gsd = GEO.get_GEO(geo="GDS4971")
    ngsd = gsd.table.replace('null', np.NaN)
    ngsd = ngsd.dropna(axis=0, how='any')
    ngsd =ngsd.transpose()
    dataarray = ngsd.values
    data = np.delete(dataarray, [0,1], 0)
    x = data.astype(np.float)
    r_df = x.shape[0]
    c_df = x.shape[1]
    r = int(r_df-math.sqrt((1-percent)*r_df))
    c = int(c_df-math.sqrt((1-percent)*c_df))
    train = x[0:r,:]
    test = x[r:r_df,:]
    return x,train,test,r_df,c_df,r,c

genedata,train,test,r_df,c_df,r,c = getdata(miss_percent)
predictors = range(0,c)
promotion =[[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
promotion = np.asmatrix(promotion)
#error of ax-b 
error_aw_b = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_aw_b = np.asmatrix(error_aw_b)
#error of cw-x
error_cw_x = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_cw_x = np.asmatrix(error_cw_x)
#error of lasso function
error_lasso = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_lasso = np.asmatrix(error_lasso)
promotion1 =[[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
promotion1 = np.asmatrix(promotion)
#error of ax-b 
error_aw_b1 = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_aw_b1 = np.asmatrix(error_aw_b)
#error of cw-x
error_cw_x1 = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_cw_x1 = np.asmatrix(error_cw_x)
#error of lasso function
error_lasso1 = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_lasso1 = np.asmatrix(error_lasso)

mybeta = #any initialization
######################              LASSO              #####################
print("#####  lasso model errors  #####")
for j in range(c,c+1):
    mean_err=[]
    print("n")
    y=j
    eachMeanError= math.sqrt((np.power(errorC[:,j-c],2)).sum()/(r_df-r))
    print("gene: "+str(j)+ " matrix error: "+ str(eachMeanError))
    for i in range(0,4):#len(alpha_lasso)):
        lassoreg = Lasso(alpha=alpha_lasso[i],warm_start=True,tol=0.00000001,max_iter=100000)
        lassoreg.coef_ = mybeta[:,j-c]
        lassoreg.fit(train[:,predictors],train[:,y])
        y_train_pred = lassoreg.predict(A)#train[:,predictors])
        y_test_pred = lassoreg.predict(C)#test[:,predictors])
        y_lasso_func = (1/(2*r))*sum(y_train_pred)+sum(abs(lassoreg.coef_))
        ##################      RMS     ##################
        error_aw_b[j-c,i] = math.sqrt(sum((y_train_pred-train[:,y])**2)/r) 
        error_lasso[j-c,i] = y_lasso_func
        error_cw_x[j-c,i] = math.sqrt(sum((y_test_pred-test[:,y])**2)/(r_df-r)) 
        mins.extend([(error_cw_x.min())])
        maxs.extend([(error_cw_x.max())])
        promotion[j-c,i] = (((eachMeanError-error_cw_x[j-c,i])/eachMeanError)*100)
        print("alpha: "+str(alpha_lasso[i])+ " error_aw_b: "+str(error_aw_b[j-c,i]) + " error_cw_x: " + str(error_cw_x[j-c,i])+" error_lasso: "+str(error_lasso[j-c,i]) + " promotion: " + str(promotion[j-c,i]) )
        print("coef: " + str(lassoreg.coef_[1:10]))
        lassoreg1 = Lasso(alpha=alpha_lasso[i],tol=0.00000001,max_iter=100000)
        lassoreg1.fit(train[:,predictors],train[:,y])
        y_train_pred1 = lassoreg1.predict(A)#train[:,predictors])
        y_test_pred1 = lassoreg1.predict(C)#test[:,predictors])
        y_lasso_func1 = (1/(2*r))*sum(y_train_pred1)+sum(abs(lassoreg1.coef_))
        ##################      RMS     ##################
        error_aw_b1[j-c,i] = math.sqrt(sum((y_train_pred1-train[:,y])**2)/r) 
        error_lasso1[j-c,i] = y_lasso_func1
        error_cw_x1[j-c,i] = math.sqrt(sum((y_test_pred1-test[:,y])**2)/(r_df-r)) 
        mins1.extend([(error_cw_x1.min())])
        maxs1.extend([(error_cw_x1.max())])
        promotion1[j-c,i] = (((eachMeanError-error_cw_x1[j-c,i])/eachMeanError)*100)
        print("alpha: "+str(alpha_lasso[i])+ " error_aw_b: "+str(error_aw_b1[j-c,i]) + " error_cw_x: " + str(error_cw_x1[j-c,i])+" error_lasso: "+str(error_lasso1[j-c,i]) + " promotion: " + str(promotion1[j-c,i]) )
        print("coef: " + str(lassoreg1.coef_[1:10]))
        print("n")
    print("max promote:")
    print((promotion[j-c,:].max()))
f = open('analyse_col', 'wb')
np.save(f, [promotion,alphas,error_cw_x,mins,maxs])
f.close()
plt.plot(promotion[:,j-c])
plt.ylabel('coef for ')
plt.xlabel('each gene')
plt.show()

您获得了M示例和n个功能，带有M=950 , N=5000。

这里的要点是：，但是当p> n时，套索标准并不是严格凸的，因此它可能没有唯一的最低限度。参考。

这会使优化有些复杂（请记住：这并不是所有问题中最简单的问题！

在您的情况下，有明确的警告和建议：增加迭代次数！并确保您的 alpha 不会太小。不确定，您如何做后者，但是如果那些1e-15幅度是手工制作的，请重新考虑您的问题形成！

警告足以不将这些解决方案作为优化的解决方案（因此：我的拉索对不同的INITS 在技术上不正确；只有您的近似解决方案的表现）。

相关内容

最新更新

热门标签：