我正在数据集上开发自动编码器https://www.kaggle.com/jessicali9530/celeba-dataset.
import tensorflow
tensorflow.__version__
输出:
'2.2.0-rc3'
from tensorflow.keras.preprocessing import image
data_gen = image.ImageDataGenerator(rescale=1.0/255)
batch_size = 20
train_data_gen = data_gen.flow_from_directory(directory=train_dest_path,
target_size=(256, 256),
batch_size=batch_size,
shuffle=True,
class_mode = 'input')
test_data_gen = data_gen.flow_from_directory(directory=test_dest_path,
target_size=(256,256),
batch_size=batch_size,
shuffle=True,
class_mode= 'input')
# autoencoder
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam, SGD
#parameters
inchannel = 3
x, y = 256, 256
input_img = Input(shape=(x,y,inchannel))
def autoencoder_model(input_img):
#encoder
conv1 = Conv2D(32, kernel_size=(3,3), activation='relu', padding='same')(input_img)
pool1 = MaxPooling2D(pool_size=(2,2))(conv1)
conv2 = Conv2D(64, kernel_size=(3,3), activation='relu', padding='same')(pool1)
pool2 = MaxPooling2D(pool_size=(2,2))(conv2)
conv3 = Conv2D(128, kernel_size=(3,3), activation='relu', padding='same')(pool2)
#decoder
conv4 = Conv2D(128, kernel_size=(3,3), activation='relu', padding='same')(conv3)
pool3 = UpSampling2D(size=(2,2))(conv4)
conv5 = Conv2D(64, kernel_size=(3,3), activation='relu', padding='same')(pool3)
pool4 = UpSampling2D(size=(2,2))(conv5)
decoded = Conv2D(3, kernel_size=(3,3), activation='relu', padding='same')(pool4)
return decoded
model = Model(inputs=input_img, outputs=autoencoder_model(input_img))
model.compile(loss='mean_squared_error', optimizer=Adam())
model.summary()
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 256, 256, 3)] 0
_________________________________________________________________
conv2d (Conv2D) (None, 256, 256, 32) 896
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 32) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 128, 128, 64) 18496
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 64) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 64, 64, 128) 73856
_________________________________________________________________
conv2d_3 (Conv2D) (None, 64, 64, 128) 147584
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 128, 128, 128) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 128, 128, 64) 73792
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 256, 256, 64) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 256, 256, 3) 1731
=================================================================
Total params: 316,355
Trainable params: 316,355
Non-trainable params: 0
from tensorflow.keras.callbacks import ModelCheckpoint
epochs = 2
num_training_steps = train_data_gen.samples/batch_size
checkpoint_directory = '/gdrive/My Drive/Colab Notebooks'
checkpoint = ModelCheckpoint(checkpoint_directory, verbose=1, save_weights_only=False, save_freq='epoch')
model.fit(train_data_gen, epochs=epochs, verbose=1, callbacks=[checkpoint])
输出:
Epoch 1/2
103/Unknown - 8s 80ms/step - loss: 0.0175
在花了很多时间之后,我仍然不明白为什么我在model.fit((的输出中得到了"未知"。此外,即使我在flow_from_directory()
中只从训练数据集中获取了1000张图像,model.fit(也会永远运行。它超过了1000,我不明白为什么它会这样。
以生成器作为输入执行model.fit时,必须设置steps_per_epoch
参数。对于生成器,你无法知道它们输出的图像数量(在这种情况下,它们将永远持续下去(,所以将其设置为数据集中的图像数量除以批量大小。