微调MNIST的深度自动编码器模型



我已经为MNIST数据集开发了一个3层深的自动编码器模型,因为我只是在这个玩具数据集上练习,因为我是这个微调范式的初学者

以下是代码

from keras import  layers
from keras.layers import Input, Dense
from keras.models import Model,Sequential
from keras.datasets import mnist
import numpy as np
# Deep Autoencoder

# this is the size of our encoded representations
encoding_dim = 32   # 32 floats -> compression factor 24.5, assuming the input is 784 floats
# this is our input placeholder; 784 = 28 x 28
input_img = Input(shape=(784, ))
my_epochs = 100
# "encoded" is the encoded representation of the inputs
encoded = Dense(encoding_dim * 4, activation='relu')(input_img)
encoded = Dense(encoding_dim * 2, activation='relu')(encoded)
encoded = Dense(encoding_dim, activation='relu')(encoded)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(encoding_dim * 2, activation='relu')(encoded)
decoded = Dense(encoding_dim * 4, activation='relu')(decoded)
decoded = Dense(784, activation='sigmoid')(decoded)
# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)
# Separate Encoder model
# this model maps an input to its encoded representation
encoder = Model(input_img, encoded)
# Separate Decoder model
# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim, ))
# retrieve the layers of the autoencoder model
decoder_layer1 = autoencoder.layers[-3]
decoder_layer2 = autoencoder.layers[-2]
decoder_layer3 = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer3(decoder_layer2(decoder_layer1(encoded_input))))
# Train to reconstruct MNIST digits
# configure model to use a per-pixel binary crossentropy loss, and the Adadelta optimizer
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
# prepare input data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# normalize all values between 0 and 1 and flatten the 28x28 images into vectors of size 784
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
# Train autoencoder for 50 epochs
autoencoder.fit(x_train, x_train, epochs=my_epochs, batch_size=256, shuffle=True, validation_data=(x_test, x_test),
                verbose=2)
# after 100 epochs the autoencoder seems to reach a stable train/test lost value
# Visualize the reconstructed encoded representations
# encode and decode some digits
# note that we take them from the *test* set
encodedTrainImages=encoder.predict(x_train)
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)


# From here I want to fine tune just the encoder model
model=Sequential()
model=Sequential()
for layer in encoder.layers:
  model.add(layer)
model.add(layers.Flatten())
model.add(layers.Dense(20, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))

以下是我要微调的编码器模型。

encoder.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 784)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
=================================================================
Total params: 110,816
Trainable params: 110,816
Non-trainable params: 0
_________________________________________________________________

问题:1

构建自动编码器模型后,我只想使用Encoder模型并将其微调用于MNIST数据集中的分类任务,但是我会遇到错误。

错误:

Traceback (most recent call last):
  File "C:UserssamerAnaconda3envstensorflow-gpulibsite-packagesIPythoncoreinteractiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-528c079e5325>", line 3, in <module>
    model.add(layers.Flatten())
  File "C:UserssamerAnaconda3envstensorflow-gpulibsite-packageskerasenginesequential.py", line 181, in add
    output_tensor = layer(self.outputs[0])
  File "C:UserssamerAnaconda3envstensorflow-gpulibsite-packageskerasenginebase_layer.py", line 414, in __call__
    self.assert_input_compatibility(inputs)
  File "C:UserssamerAnaconda3envstensorflow-gpulibsite-packageskerasenginebase_layer.py", line 327, in assert_input_compatibility
    str(K.ndim(x)))
ValueError: Input 0 is incompatible with layer flatten_4: expected min_ndim=3, found ndim=2

问题2:

同样,我稍后会使用预训练的模型,其中每个自动编码器将以贪婪的方式进行训练,然后将对最终模型进行微调。有人可以指导我如何在这两个任务中进一步进行。

问候

问题1

问题在于您正在尝试将已经平坦的图层弄平:您的编码器由一维的desnse层组成,后者具有形状(batch_size, dim)

平坦层至少预期一个2D输入,即具有3维形状(batch_size, dim1, dim2)(例如,Conv2D层的输出(,通过删除该模型将正确构建:

encoding_dim = 32
input_img = layers.Input(shape=(784, ))
encoded = layers.Dense(encoding_dim * 4, activation='relu')(input_img)
encoded = layers.Dense(encoding_dim * 2, activation='relu')(encoded)
encoded = layers.Dense(encoding_dim, activation='relu')(encoded)
encoder = Model(input_img, encoded)
[...]
model = Sequential()
for layer in encoder.layers:
    print(layer.name)
    model.add(layer)
model.add(layers.Dense(20, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

哪个oututs:

input_1
dense_1
dense_2
dense_3
Model: "sequential_1"
________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 20)                660       
_________________________________________________________________
dropout_1 (Dropout)          (None, 20)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                210       
=================================================================
Total params: 111,686
Trainable params: 111,686
Non-trainable params: 0
_________________________________________________________________

___

编辑:在评论中整合问题的答案

问:我如何确保新模型将使用与以前训练的Encoder相同的权重?

a:在您的代码中,您正在做的是通过编码器内部包含的层进行迭代,然后将它们都传递到model.add()。您在这里所做的是直接传递对每个层的引用,因此您的新型号将具有相同的层。这是使用图层名称的概念证明:

encoding_dim = 32
input_img = Input(shape=(784, ))
encoded = Dense(encoding_dim * 4, activation='relu')(input_img)
encoded = Dense(encoding_dim * 2, activation='relu')(encoded)
encoded = Dense(encoding_dim, activation='relu')(encoded)
decoded = Dense(encoding_dim * 2, activation='relu')(encoded)
decoded = Dense(encoding_dim * 4, activation='relu')(decoded)
decoded = Dense(784, activation='sigmoid')(decoded)
autoencoder = Model(input_img, decoded)
print("autoencoder first Dense layer reference:", autoencoder.layers[1])
encoder = Model(input_img, encoded)
print("encoder first Dense layer reference:", encoder.layers[1])
new_model = Sequential()
for i, layer in enumerate(encoder.layers):
  print("Before: ", layer.name)
  new_model.add(layer)
  if i != 0:
    new_model.layers[i-1].name = "new_model_"+layer.name
    print("After: ", layer.name)

输出:

autoencoder first Dense layer reference: <keras.layers.core.Dense object at 
0x7fb5f138e278>
encoder first Dense layer reference: <keras.layers.core.Dense object at 
0x7fb5f138e278>
Before:  input_1
Before:  dense_1
After:  new_model_dense_1
Before:  dense_2
After:  new_model_dense_2
Before:  dense_3
After:  new_model_dense_3

您可以看到,编码器和自动编码器中的图层引用相同。WhatsMore,通过更改新模型内部的图层名称,我们还将更改编码器相应层的图层名称。有关通过参考通过的Python参数的更多详细信息,请查看此答案。


问:我的数据是否需要一个壁炉编码?如果是这样,那么如何?

a:您确实需要一个单热编码,因为您正在处理多标签的分类问题。编码仅通过使用方便的keras函数来完成:

from keras.utils import np_utils
one_hot = np_utils.to_categorical(y_train)

这是文档的链接。

___


问题2

关于您的第二个问题,目前尚不清楚您的目标,但是在我看来,您想构建一个架构,其中包含几个平行的自动编码器,这些自动编码器专门针对不同的任务,然后将其交配通过添加一些最终的公共层来输出。

在任何情况下,到目前为止,我能做的就是建议您研究本指南,该指南解释了如何构建多输入和多输出模型,并将其用作基线以从您的自定义实现开始。

___

编辑2:问题2答案集成

关于贪婪的培训任务,该方法是一次训练一层,因为您附加新的训练任务。这是3( 1(贪婪训练的层网络的示例,后来用作新模型的基础:

(x_train, y_train), (x_test, y_test) = mnist.load_data()
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
x_train = np.reshape(x_train, (x_train.shape[0], -1))
x_test = np.reshape(x_test, (x_test.shape[0], -1))
model = Sequential()
model.add(Dense(256, activation="relu", kernel_initializer="he_uniform", input_shape=(28*28,)))
model.add(Dense(10, activation="softmax"))
model.compile(optimizer=SGD(lr=0.01, momentum=0.9), loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, batch_size=64, epochs=50, verbose=1)
# Remove last layer
model.pop()
# 'Freeze' previous layers, so to single-train the new one
for layer in model.layers:
    layer.trainable = False
# Append new layer + classification layer
model.add(Dense(64, activation="relu", kernel_initializer="he_uniform"))
model.add(Dense(10, activation="softmax"))
model.fit(x_train, y_train, batch_size=64, epochs=50, verbose=0)
#  Remove last layer
model.pop()
# 'Freeze' previous layers, so to single-train the new one
for layer in model.layers:
    layer.trainable = False
# Append new layer + classification layer
model.add(Dense(32, activation="relu", kernel_initializer="he_uniform"))
model.add(Dense(10, activation="softmax"))
model.fit(x_train, y_train, batch_size=64, epochs=50, verbose=0)
# Create new model which will use the pre-trained layers
new_model = Sequential()
# Discard the last layer from the previous model
model.pop()
# Optional: you can decide to set the pre-trained layers as trainable, in 
# which case it would be like having initialized their weights, or not.
for l in model.layers:
    l.trainable = True
new_model.add(model)
new_model.add(Dense(20, activation='relu'))
new_model.add(Dropout(0.5))
new_model.add(Dense(10, activation='softmax'))
new_model.compile(optimizer=SGD(lr=0.01, momentum=0.9), loss="categorical_crossentropy", metrics=["accuracy"])
new_model.fit(x_train, y_train, batch_size=64, epochs=100, verbose=1)

大致是这样,但是我必须说,贪婪的层训练可能不再是一个适当的解决方案:如今的relu,辍学和其他正则化技术,使贪婪的层训练成为过时且耗时的重量初始化,因此您可能想要在进行贪婪培训之前,还要看看其他可能性。

___

相关内容

  • 没有找到相关文章

最新更新