为什么当我使用更多的历元来训练我的CNN时,我的测试准确性会下降



我的CNN有问题。我训练了我的模型50个时期(BN,使用Dropouts(,我得到了92%的测试准确率。在那之后,我用同样的调整和泛化技术再次训练了我完全相同的网络,但训练了100个时期,我的测试集的准确率下降到79%。由于我的数据集很小,我使用了数据增强(水平和垂直翻转(。我无法解释,有人能帮忙吗?

import numpy as np 
import tensorflow as tf
from numpy.random import seed
seed(1)
tf.compat.v1.set_random_seed(2)
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
import tensorflow as tf
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))
gpu_options = tf.GPUOptions(allow_growth=True)
session = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
import os
os.environ['KERAS_BACKEND']='tensorflow'

import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from tensorflow.keras.layers import Conv2D,MaxPooling2D
from keras.utils import np_utils
from tensorflow.keras.optimizers import SGD,Adam
from tensorflow.keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import BatchNormalization
import matplotlib as plt
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
keras.initializers.glorot_normal(seed=42)
train_path='C:/Users/Panagiotis Gkanos/Desktop/dataset/40X/train'
train_batches=ImageDataGenerator(rescale=1./255,horizontal_flip=True,
vertical_flip=True).flow_from_direct ory(train_path,
target_size=[400,400],
classes=['malignant','benign'],
class_mode='categorical',batch_size=40)
valid_path='C:/Users/Panagiotis Gkanos/Desktop/dataset/40X/valid'
valid_batches=ImageDataGenerator(rescale=1./255).flow_from_directory(valid_path,
target_size=[400,400],
classes=['malignant','benign'],
class_mode='categorical',batch_size=20)
test_path='C:/Users/Panagiotis Gkanos/Desktop/dataset/40X/test'
test_batches=ImageDataGenerator(rescale=1./255).flow_from_directory(test_path,
target_size=[400,400],
classes=['malignant','benign'],
class_mode='categorical',batch_size=20)

model=Sequential()
model.add(Conv2D(16,(3,3),strides=2,padding='same',input_shape=(400,400,3)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(16,(3,3),strides=1,padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(16,(3,3),strides=1,padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),strides=2))

model.add(Conv2D(32,(3,3),strides=1,padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(32,(3,3),strides=1,padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Conv2D(32,(3,3),strides=1,padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),strides=2))

model.add(Conv2D(64,(3,3),padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),strides=2))

model.add(Conv2D(128,(3,3),padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2),strides=2))

model.add(Flatten())
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(2,activation='softmax'))
model.summary()
#learn_control = ReduceLROnPlateau(monitor='val_acc', patience=5,
# verbose=1,factor=0.2, min_lr=1e-7)
model.compile(optimizer=Adam(lr=0.001),loss='categorical_crossentropy',metrics=['accuracy'])
history=model.fit_generator(train_batches,steps_per_epoch=20 ,validation_data=valid_batches,
validation_steps=8 ,epochs=100)
#,callbacks=[learn_control])
model.evaluate(test_batches)
def plot_loss(history):
train_loss=history.history['loss']
val_loss=history.history['val_loss']
x=list(range(1,len(val_loss)+1))
plt.plot(x,val_loss,color='red',label='validation loss')
plt.plot(x,train_loss,label='training loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch')
plt.legend()
plt.show()
def plot_accuracy(history):
train_acc=history.history['acc']
val_acc=history.history['val_acc']
x=list(range(1,len(val_acc)+1))
plt.plot(x,val_acc,color='red',label='validation acc')
plt.plot(x,train_acc,label='training acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch')
plt.legend()
plt.show()
plot_loss(history)
plot_accuracy(history)

这就是所谓的"过拟合"。如果你的网络在50个时期有更好的测试性能,你可能想到此为止。

在这种情况下,这可能是由于具有较小的数据集,网络将无法找到适合所有情况的通用模式。相反,它适合你的训练数据中重复出现的小细节。

例如,如果你要训练CNN进行动物分类,无论是鸟还是狗,在训练集中只使用蓝色的鸟,在训练中只使用各种颜色的狗。如果你用一张除了蓝色之外的任何颜色的鸟的照片来测试你的网络,它很可能会被归类为狗,因为你的网络了解到蓝色是鸟,其他都是狗,而不是了解鸟类特有的特征和狗特有的特征。

简而言之,你可能只需要一个更大、更多样的数据集。您还可以实现早期停止,这将在网络过度适应数据之前停止网络训练。否则,你可以尝试其他形式的正则化,但这是一个很难克服的问题。

最新更新