我尝试做一个简单的模型来识别动物的图像(3类),使用tf.keras.preprocessing函数。image_dataset_from_directory,当我用训练和验证数据集拟合模型时,它似乎有效,但在评估和预测上,它发现总是相同的类,我不知道为什么,这是我的代码:
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
import os
i = 0
def fast_scandir(dirname):
subfolders= [f.path for f in os.scandir(dirname) if f.is_dir()]
for dirname in list(subfolders):
subfolders.extend(fast_scandir(dirname))
return subfolders
classnames = fast_scandir("/home/someone/Documents/machinelearning/path/testimage")
for name in classnames:
classnames[i] = classnames[i].replace("/home/someone/Documents/machinelearning/path/images/","")
i+=1
i=0
classesnum = len(classnames)
#print(classesnum)
img_height = 256
img_width = 256
batch_size = 16
IMAGE_SIZE = 256
data_dir = "/home/someone/Documents/machinelearning/path/testimage"
test_dir = "/home/someone/Documents/machinelearning/path/testimage"
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
label_mode="categorical",
seed=123,
shuffle=False,
image_size=(img_height, img_width),
batch_size=batch_size,
color_mode='rgb')
print(train_ds)
'''
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
test_dir,
label_mode="categorical",
seed=123,
shuffle=False,
image_size=(img_height, img_width),
batch_size=batch_size,
color_mode='rgb')
'''
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
label_mode="categorical",
seed=123,
shuffle=False,
image_size=(img_height, img_width),
batch_size=batch_size,
color_mode='rgb')
'''
#####debug######
for images,labels in train_ds.take(1):
for i in range(len(labels)):
img = plt.imshow(images[i].numpy().astype("uint8"))
plt.title(str(labels[i]))
plt.show()
'''
#tf.data.experimental.cardinality(train_ds)
#print(val_ds)
#print(np.shape(val_ds))
#print(type(val_ds))
import matplotlib.pyplot as plt
pretrained_model= tf.keras.applications.ResNet50(include_top=False,
input_shape=(img_width,img_height,3),
pooling='avg',classes=classesnum,
weights='imagenet')
for layer in pretrained_model.layers:
layer.trainable=False
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import optimizers
model = Sequential()
model.add(Conv2D(32,(3,3),input_shape=(IMAGE_SIZE,IMAGE_SIZE,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(96,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dense(3,activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(learning_rate=3e-4),
metrics=['accuracy'])
history = model.fit(train_ds, validation_data=val_ds, epochs=10)
fig1 = plt.gcf()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.axis(ymin=0.4,ymax=1)
plt.grid()
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train', 'validation'])
plt.show()
model.save("/home/someone/Documents/machinelearning/path/model.h5")
'''
evaluation = model.evaluate(test_ds, return_dict=True)
for name, value in evaluation.items():
print(f"{name}: {value:.4f}")
'''
我认为这是设置错误的东西,就像某个地方的类数量不好…
似乎在数据集中设置shuffle=True,解决了这个问题,因为在我的模型中只使用了数据集的一部分。