我正在试验LSTM,特别是将一个序列输入到LSTM中,将状态转移到另一个LSTM中,然后解码序列。我在两个 LSTM 之间添加了一个自动编码器,通过较低维的潜在空间对传输的状态进行编码和解码。
当我创建模型并拟合它时,这工作正常。但是,如果我保存此模型,然后尝试继续训练它,或者甚至只是使用它而不进行其他训练,则模型不会运行,并且我会收到以下警告:
Traceback (most recent call last):
File "s2s_AE_2.py", line 140, in <module>
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
File "C:ProgramDataAnaconda3libsite-packageskeraslegacyinterfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:ProgramDataAnaconda3libsite-packageskerasenginetraining.py", line 2224, in fit_generator
class_weight=class_weight)
File "C:ProgramDataAnaconda3libsite-packageskerasenginetraining.py", line 1877, in train_on_batch
class_weight=class_weight)
File "C:ProgramDataAnaconda3libsite-packageskerasenginetraining.py", line 1476, in _standardize_user_data
exception_prefix='input')
File "C:ProgramDataAnaconda3libsite-packageskerasenginetraining.py", line 86, in _standardize_input_data
str(len(data)) + ' arrays: ' + str(data)[:200] + '...')
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 2 arrays: [array([[[ 0.47338937, 0.75865918, 0.37731877, 0.63840222,
0.14653083],
[ 0.52119932, 0.78308798, 0.45885839, 0.66738276,
0.20393343],
[ 0.5674261 , 0.806364...
我的代码如下:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, TimeDistributed,Lambda, Dropout, Activation ,RepeatVector
from keras.callbacks import ModelCheckpoint
import numpy as np
from keras.layers import Lambda, Concatenate
from keras import backend as K
from keras.models import load_model
import os
seq_length=150
features_num=5
LSTM_latent_dim=40
AE_latent_dim=10
encoder_inputs = Input(shape=(seq_length, features_num))
encoder = LSTM(LSTM_latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
merged_encoder_states = Concatenate(axis=-1)([state_h, state_c])
encoded_states=Dense(AE_latent_dim,activation='relu')(merged_encoder_states)
decoded_states=Dense(LSTM_latent_dim*2, activation='relu')(encoded_states)
decoder_inputs=Input(shape=(1, features_num))
decoder_lstm = LSTM(LSTM_latent_dim, return_sequences=True, return_state=True)
decoder_dense = Dense(features_num)
all_outputs = []
inputs = decoder_inputs
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
for _ in range(seq_length):
# Run the decoder on one timestep
outputs, state_h, state_c = decoder_lstm(inputs, initial_state=states)
outputs = decoder_dense(outputs)
# Store the current prediction (we will concatenate all predictions later)
all_outputs.append(outputs)
# Reinject the outputs as inputs for the next loop iteration
# as well as update the states
inputs = outputs
states = [state_h, state_c]
# Concatenate all predictions
decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
#model = load_model('pre_model.h5')
filepath_for_w= 'AE2_p2p_s2s_model.h5'
try:
model = load_model(filepath_for_w) # if model was previouslly run, continue from it
print("loaded model")
except: print("new model")
print(model.summary())
model.compile(loss='mean_squared_error', optimizer='adam')
def create_wavelength(min_wavelength, max_wavelength, fluxes_in_wavelength, category ) :
#category :: 0 - train ; 2 - validate ; 4- test. 1;3;5 - dead space
c=(category+np.random.random())/6
k = fluxes_in_wavelength
#
base= (np.trunc(k*np.random.random()*(max_wavelength-min_wavelength)) +k*min_wavelength) /k
answer=base+c/k
return (answer)
def make_line(length,category):
shift= np.random.random()
wavelength = create_wavelength(30,10,1,category)
a=np.arange(length)
answer=np.sin(a/wavelength+shift)
return answer
def make_data(seq_num,seq_len,dim,category):
data=np.array([]).reshape(0,seq_len,dim)
for i in range (seq_num):
mini_data=np.array([]).reshape(0,seq_len)
for j in range (dim):
line = make_line(seq_len,category)
line=line.reshape(1,seq_len)
mini_data=np.append(mini_data,line,axis=0)
mini_data=np.swapaxes(mini_data,1,0)
mini_data=mini_data.reshape(1,seq_len,dim)
data=np.append(data,mini_data,axis=0)
return (data)
def train_generator():
while True:
sequence_length = seq_length+1
data=make_data(1000,sequence_length,features_num,0) # category=0 in train
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value in the sequence
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
def val_generator():
while True:
sequence_length =seq_length+1
data=make_data(1000,sequence_length,features_num,2) # category=2 in val
#
#
# # decoder_target_data is the same as decoder_input_data but offset by one timestep
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the one before the last one.
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
yield [encoder_input_data, decoder_input_data], decoder_target_data
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
model.fit_generator(train_generator(),callbacks=[checkpointer], steps_per_epoch=30, epochs=2000, verbose=1,validation_data=val_generator(),validation_steps=30)
model.save(filepath_for_w)
def predict_wave(input_wave,input_for_decoder): # input wave= x[n,:,:], ie points except the last seq_length; each wave has feature_num features. run this function for all such instances (=n)
#print (input_wave.shape)
#print (input_for_decoder.shape)
pred= model.predict([input_wave,input_for_decoder])
#
return pred
def predict_many_waves_from_input(x):
x, x2=x # x == encoder_input_data ; x==2 decoder_input_data
#
instance_num= x.shape[0]
#
#
multi_predict_collection=np.zeros((x.shape[0],seq_length,x.shape[2]))
#
for n in range(instance_num):
input_wave=x[n,:,:].reshape(1,x.shape[1],x.shape[2])
input_for_decoder=x2[n,:,:].reshape(1,x2.shape[1],x2.shape[2])
wave_prediction=predict_wave(input_wave,input_for_decoder)
multi_predict_collection[n,:,:]=wave_prediction
return (multi_predict_collection)
def test_maker():
if True:
sequence_length = seq_length +1
data=make_data(470,sequence_length,features_num,4) # category=4 in test
#
encoder_input_data =data[:,1:,:] # all
#
decoder_input_data = data[:,0,:] # the first value
decoder_input_data=decoder_input_data.reshape((decoder_input_data.shape[0],1,decoder_input_data.shape[1]))
#
#
decoder_target_data = encoder_input_data
return [encoder_input_data, decoder_input_data], decoder_target_data
x,y= test_maker()
a=predict_many_waves_from_input (x)
x=x[0] # keep the wave (generated data except last seq_length time points)
print (x.shape)
print (y.shape)
print (a.shape)
np.save ('a.npy',a)
np.save ('y.npy',y)
np.save ('x.npy',x)
print (np.mean(np.absolute(y[:,:,0]-a[:,:,0])))
print (np.mean(np.absolute(y[:,:,1]-a[:,:,1])))
print (np.mean(np.absolute(y[:,:,2]-a[:,:,2])))
print (np.mean(np.absolute(y[:,:,3]-a[:,:,3])))
print (np.mean(np.absolute(y[:,:,4]-a[:,:,4])))
罪魁祸首可能是这句话:
states=[decoded_states[:,:LSTM_latent_dim],decoded_states[:,LSTM_latent_dim:]]
在组合编码 LSTM 的状态并通过自动编码器传递它们后,我将它们拆分回c
和h
(分别为单元状态和隐藏状态(,并将它们馈送到解码器 LSTM 中。
在我看来,当使用初始模型时,此步骤正确发生,但不知何故错误地保存到模型文件中(或错误地从模型文件加载(,导致加载模型有缺陷,这似乎是合理的。
在我看来,进一步支持我的评估的是,当这一行被替换为
states= [state_h, state_c]
,加载的模型能够正确运行(拟合和预测(,但这当然取消了状态自动编码器,因此除了放大错误之外,我无法使用它。
因此,我请您帮助解决两个问题:
为什么会出现此问题?
我该如何解决?
一个可能的部分解决方案是放弃整个模型的保存,只保存(和加载(模型的权重。
替换线条
model = load_model(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save(filepath_for_w)
跟
model.load_weights(filepath_for_w)
...
checkpointer=ModelCheckpoint(filepath_for_w, save_weights_only=True, monitor='val_loss', verbose=0, save_best_only=True, mode='auto', period=1)
...
model.save_weights(filepath_for_w)
能解决问题。可以加载模型以进行进一步拟合和预测。
但是,这不允许保存整个模型;我仍然需要在代码中保留架构,以便用权重填充它。它也没有解释为什么会出现这个问题。