Siamese神经网络的损失函数



我正在尝试训练暹罗神经网络用于人脸识别。许多资源使用这个函数作为损失函数:

def contrastive_loss(y_true, y_pred):
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

我训练了几个不同架构的神经网络。对于其中一些,这个函数不能正确工作(返回nan)。因此,神经网络根本没有经过训练。

我代码:

#Models.py
from keras.models import Sequential, Model
from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Lambda, BatchNormalization, Activation
from keras.optimizers import RMSprop
from keras import backend as K

def euclidean_distance(vects):
x, y = vects
return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))

def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)

def contrastive_loss(y_true, y_pred):
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

def accuracy(y_true, y_pred):
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))

def TestModel(input_shape):
model = Sequential()
model.add(Conv2D(filters=96, kernel_size=3, strides=3, activation='relu', input_shape=input_shape, padding='valid'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(.25))
model.add(Conv2D(filters=256, kernel_size=3, strides=3, activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(128, activation='relu'))
return model

def Net_Definition(input_shape):
model = Sequential()
model.add(Conv2D(filters=96, kernel_size=7, strides=4, activation='relu', padding='valid', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=3, strides=2, padding='valid'))
model.add(BatchNormalization())
model.add(Conv2D(filters=256, kernel_size=5, strides=1, activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=3, strides=2, padding='valid'))
model.add(BatchNormalization())
model.add(Conv2D(filters=384, kernel_size=3, strides=1, activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=3, strides=2, padding='valid'))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(128, activation='softmax'))
return model

def CreateModel(name, input_shape):
global network
if name == 'test':
network = TestModel(input_shape)
elif name == 'net_definition':
network = Net_Definition(input_shape)
else:
print('Invalid model name!')
exit(0)
network = Net_Definition(input_shape)
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)
processed_a = network(input_a)
processed_b = network(input_b)
distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
model = Model(inputs=[input_a, input_b], outputs=distance)
opt = RMSprop()
model.compile(loss=contrastive_loss, optimizer=opt, metrics=[accuracy])
return model
from keras.utils import Sequence
import numpy as np
import Models
from keras.callbacks import CSVLogger

class MyGenerator(Sequence):
def __init__(self, filenames, labels, batch_size):
self.filenames = filenames
self.labels = labels
self.batch_size = batch_size
def __len__(self):
return (np.ceil(len(self.filenames) / float(self.batch_size))).astype(np.int32)
def __getitem__(self, item):
batch_x = self.filenames[item * self.batch_size:(item + 1) * self.batch_size]
batch_y = self.labels[item * self.batch_size:(item + 1) * self.batch_size]
x1 = []
x2 = []
for i, files in enumerate(batch_x):
pair = np.load(files).astype(np.float32)
x1.append(pair[0]/255)
x2.append(pair[1]/255)
x1 = np.asarray(x1)
x2 = np.asarray(x2)
return (x1, x2), np.array(batch_y).astype(np.float32)

# path_to_folder = 'Datasets/test/pairs/224/'
path_to_folder = 'Datasets/6. Pairs/224/'
input_shape = (224, 224, 3)
batch_size = 128
x_train_file = open(path_to_folder + 'X_Train.txt', 'r')
y_train_file = open(path_to_folder + 'Y_Train.txt', 'r')
x_val_file = open(path_to_folder + 'X_Val.txt', 'r')
y_val_file = open(path_to_folder + 'Y_Val.txt', 'r')
x_train = x_train_file.read().splitlines()
y_train = y_train_file.read().splitlines()
x_val = x_val_file.read().splitlines()
y_val = y_val_file.read().splitlines()
csv_logger = CSVLogger('logs.log')
train_generator = MyGenerator(x_train, y_train, batch_size)
val_generator = MyGenerator(x_val, y_val, batch_size)
model = Models.CreateModel('test', input_shape)
history = model.fit(train_generator, epochs=10, verbose=1, validation_data=val_generator, callbacks=[csv_logger])
model.save_weights('my_checkpoint')

对于TestModel一切工作正常,但对于Net_definition它返回nan。TestModelNet_definition如何解决这个问题?也许还有其他的损失函数?

我可以看到这里有几个错误-

  1. 对比函数中的y_true1-y_true项应该互换。

你可以从这里获得灵感-

def loss(margin=1):
"""Provides 'constrastive_loss' an enclosing scope with variable 'margin'.
Arguments:
margin: Integer, defines the baseline for distance for which pairs
should be classified as dissimilar. - (default is 1).
Returns:
'constrastive_loss' function with data ('margin') attached.
"""
# Contrastive loss = mean( (1-true_value) * square(prediction) +
#                         true_value * square( max(margin-prediction, 0) ))
def contrastive_loss(y_true, y_pred):
"""Calculates the constrastive loss.
Arguments:
y_true: List of labels, each label is of type float32.
y_pred: List of predictions of same length as of y_true,
each label is of type float32.
Returns:
A tensor containing constrastive loss as floating point value.
"""
square_pred = tf.math.square(y_pred)
margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
return tf.math.reduce_mean(
(1 - y_true) * square_pred + (y_true) * margin_square
)
return contrastive_loss

来源
  1. 本例中Siamese网络的输出应该是概率(介于0和1之间的值),因为y_true为0或1。在这种情况下,CreateModel函数创建了暹罗网络,输出是两个向量之间的euclidean_distance,这不是一个概率。欧氏距离可以大于1。最好在Siamese模型的最后一层添加像sigmoid这样的激活。

最新更新