将图像的原始列车数据集与增强的列车数据集连接起来



我编写了以下代码用于加载列车和测试数据,我已经扩充了列车数据集,但是我想将原始列车数据集与扩充后的数据集连接起来。我该怎么做呢?

from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255, 
rotation_range=5,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1,
validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)
train_dir = 'train_separated'
test_dir = 'test_separated'
batch_size = 128
img_height = 100
img_width = 100
num_classes = 10
# load train and test data
train_data = train_datagen.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical', 
subset='training')
# after that I have train_data that was augmented, but how to concatenete new augmented data with original train data?
val_data = train_datagen.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical', 
subset='validation')
test_data = test_datagen.flow_from_directory(
test_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical')

我期望我的列车数据将包含增强的列车数据和原始数据。

我发现了一个方法。这里我给你一个例子:

import tensorflow as tf
train_dir = "images/"
img_height = 32
img_width = 32
batch_size = 16
#build the generators
train_data = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255
)
train_generator = train_data.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size, 
class_mode='binary',
shuffle=True
)
aug_train_data = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255, 
rotation_range=5,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1
)
aug_train_generator = aug_train_data.flow_from_directory(
train_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary',
shuffle=True
)
#now let's combine these
train_ds = tf.data.Dataset.from_generator(
lambda: train_generator,
output_types=(tf.float32, tf.float32),
output_shapes=([None, img_height, img_width, 3], [None,])  #here are the shapes
)
aug_train_ds = tf.data.Dataset.from_generator(
lambda: aug_train_generator,
output_types=(tf.float32, tf.float32),
output_shapes=([None, img_height, img_width, 3], [None,])
)
# concatenate the two datasets
train_ds = train_ds.concatenate(aug_train_ds)
# shuffle
train_ds = train_ds.shuffle(buffer_size=5)
#classification example
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, GlobalMaxPool2D
model = Sequential([
Conv2D(4, (3,3), activation='relu', input_shape=(img_height, img_width,3)),
MaxPooling2D((2,2)),
Conv2D(8, (3,3), activation='relu'),
MaxPooling2D((2,2)),
Conv2D(16, (3,3), activation='relu'),
GlobalMaxPool2D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
steps_per_epoch = len(train_generator) + len(aug_train_generator) #this is mandatory, otherwise it will keep looping
model.fit(train_ds, steps_per_epoch=steps_per_epoch, epochs=5)

最新更新