实现子类模型和损失函数作为一个层



我想用一些输入实现一个子类模型。首先你会看到我的损失层。

class CTCLayer(layers.Layer):
'''
Implementation of loss layer. 
Attributes
----------
name : str
Name of the last layer.
Methods
-------

call(y_true, y_pred)
'''
def __init__(self, name:str=None):
super().__init__(name=name)
self.loss_fn = keras.backend.ctc_batch_cost
def call(self, y_true, y_pred):

# Compute the training-time loss value and add it
# to the layer using `self.add_loss()`.
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
self.add_loss(loss)
# At test time, just return the computed predictions
return y_pred

我的损失是一层模型。我的输出来自我丢失的

class ConvBlock(layers.Layer):
'''
Convolution block has Cov2D and Maxpooling2D.
Attributes
----------
n_filter : int
Number of filters of Conv2D.
activation : str
The activation we use in Conv2D.
kernel_initializer : str
Name of mathematic distribution of input weights.
Methods
-------

call(inputs)
'''
def __init__(self, n_filter:int, activation:str, kernel_initializer:str):
super(ConvBlock, self).__init__()
self.conv2d = Conv2D(n_filter, (3, 3), activation=activation,
kernel_initializer=kernel_initializer,
padding="same")

self.maxpool = MaxPooling2D()

def call(self, inputs):
x = self.conv2d(inputs)
x = self.maxpool(x)
return x

class RNNBlock(layers.Layer):
'''
RNN block has 2 LSTM layer.
Attributes
----------
unit_rnn_1 : int
Number of units of first LSTM layer.
drop_rnn_1 : float
Rate of dropout hyperparameters in first LSTM.
unit_rnn_2 : int
Number of units of second LSTM layer.
drop_rnn_2 : float
Rate of dropout hyperparameters in second LSTM.
Methods
-------

call(inputs)
'''
def __init__(self, unit_rnn_1:int, drop_rnn_1:float, unit_rnn_2:int, drop_rnn_2:float):
super(RNNBlock, self).__init__()
self.rnn_1 = Bidirectional(LSTM(unit_rnn_1, return_sequences=True, dropout=drop_rnn_1))
self.rnn_2 = Bidirectional(LSTM(unit_rnn_2, return_sequences=True, dropout=drop_rnn_2))

def call(self, inputs):
x = self.rnn_1(inputs)
x = self.rnn_2(x)
return x

class OCRModel(Model):
'''
Implementation Model for OCR problem.
In OCR we have 2 inputs and we have CTCLayer in last layer.
Attributes
----------
block_1 : int
act : str
init_kernel : str
block_2 : int
img_width : int
img_height : int
unit_1 : int 
drop : float
unit_rnn_1 : int
drop_rnn_1 : float 
unit_rnn_2 : int,
drop_rnn_2 : float
char_to_num : object
Methods
-------

call(inputs)
'''
def __init__(self, block_1:int=32, act:str='relu', init_kernel:str='he_normal',
block_2:int=64, img_width:int=200, img_height:int=50, unit_1:int=64, 
drop:float=0.2, unit_rnn_1:int=128, drop_rnn_1:float=0.25, 
unit_rnn_2:int=64, drop_rnn_2:float=0.25, char_to_num:object=None):
super(OCRModel, self).__init__()
# CNNs
self.convb1 = ConvBlock(n_filter=block_1, activation=act, kernel_initializer=init_kernel)
self.convb2 = ConvBlock(n_filter=block_2, activation=act, kernel_initializer=init_kernel)
# Factor by which the image is going to be downsampled
# by the convolutional blocks. We will be using two
# convolution blocks and each block will have
# a pooling layer which downsample the features by a factor of 2.
# Total downsampling factor would be 4.
down_sampling = 4
self.new_shape = ((img_width // down_sampling), (img_height // down_sampling) * block_2)
self.reshape = Reshape(target_shape=self.new_shape)
self.dense1 = Dense(units=unit_1, activation=act)
self.drop = Dropout(rate=drop)
#RNNs
self.rnn = RNNBlock(unit_rnn_1=unit_rnn_1, drop_rnn_1=drop_rnn_1, unit_rnn_2=unit_rnn_2, drop_rnn_2=drop_rnn_2)
# Output layer
self.dense2 = Dense(units = len(char_to_num.get_vocabulary()) + 1, activation="softmax", name='dense2')
# # CTC Loss
self.ctc = CTCLayer()
def call(self, inputs):
x = self.convb1(inputs[0])
x = self.convb2(x)
x = self.reshape(x)
x = self.dense1(x)
x = self.drop(x)
x = self.rnn(x)
x = self.dense2(x)
output = self.ctc(inputs, x)
return output

这是我的模型,有一些conv块、RNN块和像CTC丢失这样的丢失层。

img_width = 200
img_height = 50
# inputs
img_input = Input(shape=(img_width, img_height, 1), name='image')
label_input = Input(shape=(None, ), name='label')
# hidden layers
model = OCRModel(char_to_num=char_to_num)
# output
output = model([img_input, label_input])
model = Model(inputs=[img_input, label_input], outputs = output)
model.compile(optimizer='adam')

使用adam优化器编译模型。

# Get the prediction model by extracting layers till the output layer
prediction_model = keras.models.Model(
model.get_layer(name="image").input, model.get_layer(name="dense_2").output
)
prediction_model.summary()

我想在模型中使用dense_;dense_2";层。我的错误是:ValueError:没有这样的层:dense_2。现有层有:[‘image’、‘label’、‘ocr_mode’]。

您不能直接访问内部模型OCRModel内部的层。但是,您可以给模型一个名称,并从中访问层:

model = OCRModel()
model._name = "my_model"
...
prediction_model = keras.models.Model(
model.get_layer(name="image").input,
model.get_layer(name="my_model").get_layer(name="dense_2").output
)

最新更新