CNN-训练一个考虑图像组分类的图像网络

我正在尝试创建一个对图像进行分类的CNN。我知道每一组图像(可以是2-10组图像(都被分类到同一个标签。

因此，如果我预测这组中的8张图像来自A类，2张来自B类，我想让网络预测所有10张图像都来自A类。

到目前为止，我创建了一个网络，可以对每个图像进行分类，而不必考虑它们被分组的事实。

输出假设是对每个图像的预测，而不是对图像组的预测。我考虑在网络中添加一个选项，该选项将获得每组中的主要标签，并将该组中的所有图像分类到同一标签。我可以在不创建新网络的情况下完成吗？如果是，我该怎么做？

我使用了以下网络：(输入是形状中的所有图像(像素，像素，1((

def model():
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(pixels,pixels,1),padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D((2, 2),padding='same'))
model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(LeakyReLU(alpha=0.1))                  
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(Flatten())
model.add(Dense(128, activation='linear'))
model.add(LeakyReLU(alpha=0.1))                  
model.add(Dense(6, activation='softmax'))
return model

谢谢！

编辑-我试着在训练中使用回调来做改变。

class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, train_df, val_df, save_file, model, features):
self.train_df = train_df
self.val_df = val_df
self.model = model
self.features = features
def on_epoch_end(self, epoch, logs=None):

训练mnist分类器并使用组预测的示例代码：

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import models, layers
import numpy as np
BATCH_SIZE = 32
LATENT_DEM = 128
def _normalize_img(img, label):
img = tf.cast(img, tf.float32) / 255.
return (img, label)
train_dataset, test_dataset = tfds.load(name="mnist", split=['train', 'test'], as_supervised=True)
# Build your input pipelines
train_dataset = train_dataset.shuffle(1024).batch(BATCH_SIZE)
train_dataset = train_dataset.map(_normalize_img)
test_dataset = test_dataset.batch(BATCH_SIZE)
test_dataset = test_dataset.map(_normalize_img)
def build_model():
model = models.Sequential()
model.add(layers.Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28, 28, 1),padding='same'))
model.add(layers.LeakyReLU(alpha=0.1))
model.add(layers.MaxPooling2D((2, 2),padding='same'))
model.add(layers.Conv2D(64, (3, 3), activation='linear',padding='same'))
model.add(layers.LeakyReLU(alpha=0.1))
model.add(layers.MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(layers.Conv2D(128, (3, 3), activation='linear',padding='same'))
model.add(layers.LeakyReLU(alpha=0.1))                  
model.add(layers.MaxPooling2D(pool_size=(2, 2),padding='same'))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='linear'))
model.add(layers.LeakyReLU(alpha=0.1))                  
model.add(layers.Dense(10, activation='softmax'))
return model
model = build_model()
# Compile the model
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
# Train the network
history = model.fit(
train_dataset,
epochs=3)
# Take group_num of images with the same class_index, e.g take 10 images of handwriting number 0
same_class_imgs = []
class_index = 0
group_num = 10
for data in train_dataset.as_numpy_iterator():
for i in range(BATCH_SIZE):
if data[1][i] == class_index:
same_class_imgs.append(data[0][i])
if len(same_class_imgs) >= group_num:
same_class_imgs = same_class_imgs[:10]
break
predictions = model.predict(np.array(same_class_imgs))
class_predictions = np.argmax(predictions, axis=-1)
# Get every class with maximum count in a predictions, 
# If model predict something like Class 0: 4, Class 2: 4, Class 6: 2 for all 10 images then result will be [0, 2]
u, c = np.unique(class_predictions, return_counts = True)
group_predictions = u[c == c.max()]
print("Model outputs:n{}nClass predictions for each:n{}nGroup predictions:n{}n".format(predictions, class_predictions, group_predictions))

输出：

Epoch 1/3
1875/1875 [==============================] - 25s 13ms/step - loss: 0.1151 - accuracy: 0.9636
Epoch 2/3
1875/1875 [==============================] - 26s 14ms/step - loss: 0.0373 - accuracy: 0.9887
Epoch 3/3
1875/1875 [==============================] - 27s 15ms/step - loss: 0.0276 - accuracy: 0.9918
Model outputs:
[[9.99998093e-01 4.44641879e-09 3.59849594e-07 1.63150361e-11
5.72367154e-10 1.30235627e-08 2.22519478e-08 2.74096346e-09
1.35854119e-08 1.50564949e-06]
[9.99993443e-01 4.39691888e-07 3.04043715e-06 6.97965232e-13
2.71462852e-10 1.95115590e-11 2.83727672e-06 1.77562035e-10
8.74899158e-08 4.50125714e-09]
[9.99771059e-01 7.68102666e-08 1.70877684e-04 5.31888533e-09
1.18625154e-09 1.55892820e-07 5.27070406e-05 1.39944865e-08
1.69486464e-06 3.51022732e-06]
[9.97043431e-01 4.56344900e-08 4.86983163e-06 5.02740682e-10
1.26683688e-07 2.26395724e-09 2.95107951e-03 3.57114269e-11
3.71302264e-07 1.21673409e-07]
[9.99990821e-01 3.98899509e-08 1.80275507e-07 9.63127977e-09
6.10572037e-10 1.61982803e-06 8.47767296e-08 3.85314580e-09
3.89608113e-06 3.24082566e-06]
[9.99009490e-01 4.41650997e-07 3.74904607e-06 1.01597159e-07
1.90468157e-07 5.84926011e-06 1.67861625e-04 6.61629773e-09
1.19658958e-04 6.92503527e-04]
[9.99953389e-01 7.85282879e-08 1.22815834e-06 3.36330047e-10
9.96207630e-11 7.86476903e-06 3.04561325e-07 2.15532872e-08
3.20932595e-05 5.02664489e-06]
[9.99999881e-01 8.90605922e-10 1.58526674e-07 1.08922321e-14
5.17258491e-13 4.65424088e-11 2.56549915e-09 1.93356355e-11
1.11570158e-08 6.98878289e-09]
[9.99998450e-01 4.95704171e-08 1.38500775e-06 7.40387501e-14
5.10533109e-12 5.59060465e-10 6.39804432e-10 4.76541073e-09
4.51906317e-08 9.17944476e-08]
[9.99998093e-01 4.56725751e-08 7.49201376e-08 3.43272437e-12
5.57741457e-11 2.99988500e-07 1.19160859e-08 2.33604425e-08
1.12858947e-06 3.71466854e-07]]
Class predictions for each:
[0 0 0 0 0 0 0 0 0 0]
Group predictions:
[0]

相关内容

最新更新

热门标签：