我正在尝试进行图像分类以区分猫和狗。我想通过训练一个一类神经网络来做到这一点。我在VGG16模型中尝试过这种方法,但现在我想使用ResNet50,因为它更小,因此希望更快。对于VGG16,以下方法有效,但对于ResNet50无效。
import os
import random
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D,MaxPool2D
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.optimizers import Adam
import keras
import random
import requests
import keras
from keras.models import Model
from keras.layers import Dense
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.applications.vgg16 import decode_predictions
from keras import applications
from keras.optimizers import RMSprop
import pickle
import cv2
base_path = "datafolder/"
SHAPE = (224,224,3)
batch_size = 10
def to_grayscale_then_rgb(image):
image = tf.image.rgb_to_grayscale(image)
image = tf.image.grayscale_to_rgb(image)
return image
def wrap_generator(generator):
while True:
x,y = next(generator)
y = tf.keras.utils.to_categorical(y)
zeros = tf.zeros_like(y) + tf.constant([1.,0.])
y = tf.concat([y,zeros], axis=0)
yield x,y
def set_seed(seed):
tf.random.set_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
def get_model(train=True):
set_seed(33)
pre_process = Lambda(tf.keras.applications.resnet50.preprocess_input)
vgg = applications.ResNet50(weights = 'imagenet', include_top = True, input_shape = SHAPE)
vgg = Model(vgg.input, vgg.layers[-3].output)
vgg.trainable = False
inp = Input(SHAPE)
vgg_16_process = pre_process(GaussianNoise(0.1)(inp))
vgg_out = vgg(vgg_16_process)
noise = Lambda(tf.zeros_like)(vgg_out)
noise = GaussianNoise(0.1)(noise)
if train:
x = Lambda(lambda z: tf.concat(z, axis=0))([vgg_out,noise])
x = Activation('relu')(x)
else:
x = vgg_out
x = Dense(512, activation='relu')(x)
x = Dense(128, activation='relu')(x)
out = Dense(2, activation='softmax')(x)
model = Model(inp, out)
model.compile(Adam(lr=1e-4), loss='binary_crossentropy')
return model
### FLOW GENERATORS ###
train_generator = train_datagen.flow_from_directory(
base_path + 'training_set/training_set/',
target_size = (SHAPE[0], SHAPE[1]),
batch_size = batch_size,
class_mode = 'categorical',
shuffle = True,
seed = 33,
classes = ['dogs']
)
model = get_model()
model.summary()
model.fit(wrap_generator(train_generator), steps_per_epoch=train_generator.samples/train_generator.batch_size, epochs=30)
对于VGG16模型,这是有效的,但是当尝试实现ResNet50模型时,我得到了以下错误:
ValueError: logits and labels must have the same shape ((None, 7, 7, 2) vs (None, None))
我怀疑这与神经网络的结构有关,如下所示:
Model: "model_57"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_59 (InputLayer) [(None, 224, 224, 3) 0
__________________________________________________________________________________________________
gaussian_noise_56 (GaussianNois (None, 224, 224, 3) 0 input_59[0][0]
__________________________________________________________________________________________________
lambda_98 (Lambda) (None, 224, 224, 3) 0 gaussian_noise_56[0][0]
__________________________________________________________________________________________________
model_56 (Functional) (None, 7, 7, 2048) 23587712 lambda_98[0][0]
__________________________________________________________________________________________________
lambda_99 (Lambda) (None, 7, 7, 2048) 0 model_56[0][0]
__________________________________________________________________________________________________
gaussian_noise_57 (GaussianNois (None, 7, 7, 2048) 0 lambda_99[0][0]
__________________________________________________________________________________________________
lambda_100 (Lambda) (None, 7, 7, 2048) 0 model_56[0][0]
gaussian_noise_57[0][0]
__________________________________________________________________________________________________
activation_686 (Activation) (None, 7, 7, 2048) 0 lambda_100[0][0]
__________________________________________________________________________________________________
dense_84 (Dense) (None, 7, 7, 512) 1049088 activation_686[0][0]
__________________________________________________________________________________________________
dense_85 (Dense) (None, 7, 7, 128) 65664 dense_84[0][0]
__________________________________________________________________________________________________
dense_86 (Dense) (None, 7, 7, 2) 258 dense_85[0][0]
==================================================================================================
Total params: 24,702,722
Trainable params: 1,115,010
Non-trainable params: 23,587,712
最后一层包含两个7,而VGG16则没有。我尝试更改代码中的多个内容,但问题似乎并没有消失。我怀疑get_model
函数有问题。是什么原因导致了这个问题?
简单更改:
vgg = Model(vgg.input, vgg.layers[-3].output)
进入:
vgg = Model(vgg.input, vgg.layers[-2].output)
应该有效。
这是因为我们需要一个输出2D张量的层。当使用VGG16
时,[-3]
位置的层是Flatten
层,所以可以。当使用RESNET50
时,我们需要使用[-2]
位置的层(GlobalPooling
(才能获得2D输出