如何为每批隐含不同的损失函数

在得到这个问题的回答后，我意识到我有一个不同的问题。

我希望根据我在训练步骤中通过的批处理有一个不同的目标组件。假设我的批处理大小为1，并且我将每个训练数据与两个不属于训练步骤的支持向量相关联。所以我需要找出输入向量的哪一部分正在被处理。

import numpy as np
import keras.backend as K
from keras.layers import Dense, Input
from keras.models import Model
features = np.random.rand(100, 5)
labels = np.random.rand(100, 2)
holder =  np.random.rand(200, 5) # each feature gets two supporter. 
iter = np.arange(start=1, stop=features.shape[0], step=1)
supporters = {}
for i,j in zip(iter, holder): #(i, i+1) represent the ith training data
supporters[i]=j

例如，supporters的前两行是feature中的第一个点。

features[0]  [0.71444629 0.77256729 0.95375736 0.18759234 0.8207317 ]

有以下两种支持者:

1: array([0.76281692, 0.18698215, 0.11687052, 0.78084761, 0.10293403]), 
2: array([0.98229912, 0.08784577, 0.08109571, 0.23665783, 0.52587238])

现在，我创建一个简单的模型

# Simple neural net with three outputs
input_layer = Input((5,))
hidden_layer = Dense(16)(input_layer)
output_layer = Dense(2)(hidden_layer)

# Model
model = Model(inputs=input_layer, outputs=output_layer)

我的目标是创建一个损失函数

def custom_loss(y_true, y_pred):
# Normal MSE loss
mse = K.mean(K.square(y_true-y_pred), axis=-1)
#Assume that I properly pass model object into the method use the predict method
#to use the current network weights
new_constraint = K.sum(y_pred -  model.predict(supporters)) 
return(mse+new_constraint)

然后，我继续编译我的模型。

model.compile(loss=custom_loss, optimizer='sgd')
model.fit(features, labels, epochs=1, ,batch_size=1)

问题是，由于批大小为1，我想确保损失函数只考虑当前训练输入的支持者。例如，如果我正在训练features中的第三个点，那么我想在创建new_constraint时使用第五个和第六个向量。我怎样才能做到这一点呢?

你可以这样实现它(我已经使用了基于TensorFlow的Keras api，但这并不重要)

import numpy as np
import tensorflow as tf
from tensorflow.keras import Input, layers, Model
from tensorflow.keras import backend as K
features = np.random.rand(100, 5)
labels = np.random.rand(100, 2)
supporters =  np.random.rand(200, 5) # each feature gets two supporter. 

# I will get both support vectors to iterate over
supporters_1 = supporters[::2, :]
supporters_2 = supporters[1::2, :]
print(supporters_1.shape, supporters_2.shape)
# Result -> ((100, 5), (100, 5))
# Create a tf dataset to use in training
dataset = tf.data.Dataset.from_tensor_slices(((features, supporters_1, supporters_2), labels)).batch(1)
# A look at what it returns
for i in dataset:
print(i)
break
'''
Result:
((<tf.Tensor: shape=(1, 5), dtype=float64, numpy=array([[0.42834492, 0.01041871, 0.53058175, 0.69453215, 0.83901092]])>, 
<tf.Tensor: shape=(1, 5), dtype=float64, numpy=array([[0.1724601 , 0.14386688, 0.49018201, 0.13565471, 0.35159235]])>, 
<tf.Tensor: shape=(1, 5), dtype=float64, numpy=array([[0.87243349, 0.98779049, 0.98405784, 0.74069913, 0.25763667]])>), 
<tf.Tensor: shape=(1, 2), dtype=float64, numpy=array([[0.20993531, 0.70153453]])>)
'''
#=========================================================
# Creating the model (Input size is 5 and not 2 in your sample so I changed it)
# Same for the label shape
input_layer = Input((5,))
hidden_layer = layers.Dense(16)(input_layer)
output_layer = layers.Dense(2)(hidden_layer)
# Model
model = Model(inputs=input_layer, outputs=output_layer)
#=========================================================

# Implementing the custom loss
# Without the `K.abs` the result can be negative and hence the `K.abs`
def custom_loss(y_true, y_pred, support_pred_1, support_pred_2):
mse = tf.keras.losses.mse(y_true, y_pred)
new_constraint = K.abs(K.sum(y_pred -  [support_pred_1, support_pred_2]))
return (mse+new_constraint)
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)

'''
Now we create a custom training loop. In this we will get the logits
of all the inputs and then compute loss using the custom loss
function and then optimize on that loss.
'''
epochs = 10
for epoch in range(epochs):
print("Start of epoch %d" % (epoch,))
for step, ((features, support_1, support_2), labels) in enumerate(dataset):
with tf.GradientTape() as tape:
logits = model(features, training=True)
logits_1 = model(support_1, training=True)
logits_2 = model(support_2, training=True)

loss_value = custom_loss(labels, logits, logits_1, logits_2)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
print('loss_value: ', loss_value)

编辑:还有另一种方法可以做到这一点。如下:

# Everthing same till the supporters_1, supporters_2
def combine(inputs, targets):
features = inputs[0]
supports1 = inputs[1]
supports2 = inputs[2]
# Stack the inputs as a batch
final = tf.stack((features, support_1, support_2))
final = tf.reshape(final, (3,5))
return final, targets

# Creating the dataset
dataset = tf.data.Dataset.from_tensor_slices(((features, supporters_1, supporters_2), labels)).batch(1)
dataset = dataset.map(combine, num_parallel_calls=-1)
# Check the output
for i in dataset:
print(i)
break
'''
(<tf.Tensor: shape=(3, 5), dtype=float64, numpy=
array([[0.35641985, 0.93025517, 0.72874829, 0.81810538, 0.46682277],
[0.95497516, 0.71722253, 0.10608685, 0.37267656, 0.94748968],
[0.04822454, 0.00480376, 0.08479184, 0.51133809, 0.38242403]])>, <tf.Tensor: shape=(1, 2), dtype=float64, numpy=array([[0.21399956, 0.97149716]])>)
'''
#================MODEL=================
input_layer = Input((5,))
hidden_layer = layers.Dense(16)(input_layer)
output_layer = layers.Dense(2)(hidden_layer)
# Model
model = Model(inputs=input_layer, outputs=output_layer)
#=======================================
# change the loss function accordingly
'''
The first row in the y_pred will be the prediction corresponding to
actual features and the rest will be predictions corresponding to
supports and hence you can change the loss function as below.
'''
def custom_loss(y_true, y_pred):
mse = tf.keras.losses.mse(y_true, y_pred[0, :])
new_constraint = K.abs(K.sum(y_pred[0, :] -  y_pred[1:, :]))
return (mse+new_constraint)

# Compile
model.compile(loss=custom_loss, optimizer='adam')
# train
model.fit(dataset, epochs=5)

相关内容

最新更新

热门标签：