如何使用从save_weights保存的张量流模型进行加载和预测?



我正在从以下存储库运行一个相当自定义的张量流模型:

https://github.com/strongio/keras-bert/blob/master/keras-bert.py

# Initialize session
sess = tf.Session()

# # Load all files from a directory in a DataFrame.
# def load_directory_data(directory):
#     data = {}
#     data["sentence"] = []
#     data["sentiment"] = []
#     for file_path in os.listdir(directory):
#         with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
#             data["sentence"].append(f.read())
#             data["sentiment"].append(re.match("d+_(d+).txt", file_path).group(1))
#     return pd.DataFrame.from_dict(data)

# # Merge positive and negative examples, add a polarity column and shuffle.
# def load_dataset(directory):
#     pos_df = load_directory_data(os.path.join(directory, "pos"))
#     neg_df = load_directory_data(os.path.join(directory, "neg"))
#     pos_df["polarity"] = 1
#     neg_df["polarity"] = 0
#     return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# # Download and process the dataset files.
# def download_and_load_datasets(force_download=False):
#     dataset = tf.keras.utils.get_file(
#         fname="aclImdb.tar.gz",
#         origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
#         extract=True,
#     )
#     train_df = load_dataset(os.path.join(os.path.dirname(dataset), "aclImdb", "train"))
#     test_df = load_dataset(os.path.join(os.path.dirname(dataset), "aclImdb", "test"))
#     return train_df, test_df

class PaddingInputExample(object):
"""Fake example so the num input examples is a multiple of the batch size.
When running eval/predict on the TPU, we need to pad the number of examples
to be a multiple of the batch size, because the TPU requires a fixed batch
size. The alternative is to drop the last batch, which is bad because it means
the entire output data won't be generated.
We use this class instead of `None` because treating `None` as padding
battches could cause silent errors.
"""

class InputExample(object):
"""A single training/test example for simple sequence classification."""
def __init__(self, guid, text_a, text_b=None, label=None):
"""Constructs a InputExample.
Args:
guid: Unique id for the example.
text_a: string. The untokenized text of the first sequence. For single
sequence tasks, only this sequence must be specified.
text_b: (Optional) string. The untokenized text of the second sequence.
Only must be specified for sequence pair tasks.
label: (Optional) string. The label of the example. This should be
specified for train and dev examples, but not for test examples.
"""
self.guid = guid
self.text_a = text_a
self.text_b = text_b
self.label = label

def create_tokenizer_from_hub_module(bert_path):
"""Get the vocab file and casing info from the Hub module."""
bert_module = hub.Module(bert_path)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
vocab_file, do_lower_case = sess.run(
[tokenization_info["vocab_file"], tokenization_info["do_lower_case"]]
)
return FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)

def convert_single_example(tokenizer, example, max_seq_length=256):
"""Converts a single `InputExample` into a single `InputFeatures`."""
if isinstance(example, PaddingInputExample):
input_ids = [0] * max_seq_length
input_mask = [0] * max_seq_length
segment_ids = [0] * max_seq_length
label = 0
return input_ids, input_mask, segment_ids, label
tokens_a = tokenizer.tokenize(example.text_a)
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0 : (max_seq_length - 2)]
tokens = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
# The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to.
input_mask = [1] * len(input_ids)
# Zero-pad up to the sequence length.
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
assert len(segment_ids) == max_seq_length
return input_ids, input_mask, segment_ids, example.label

def convert_examples_to_features(tokenizer, examples, max_seq_length=256):
"""Convert a set of `InputExample`s to a list of `InputFeatures`."""
input_ids, input_masks, segment_ids, labels = [], [], [], []
for example in tqdm(examples, desc="Converting examples to features"):
input_id, input_mask, segment_id, label = convert_single_example(
tokenizer, example, max_seq_length
)
input_ids.append(input_id)
input_masks.append(input_mask)
segment_ids.append(segment_id)
labels.append(label)
return (
np.array(input_ids),
np.array(input_masks),
np.array(segment_ids),
np.array(labels).reshape(-1, 1),
)

def convert_text_to_examples(texts, labels):
"""Create InputExamples"""
InputExamples = []
for text, label in zip(texts, labels):
InputExamples.append(
InputExample(guid=None, text_a=" ".join(text), text_b=None, label=label)
)
return InputExamples

class BertLayer(tf.keras.layers.Layer):
def __init__(
self,
n_fine_tune_layers=10,
pooling="mean",
bert_path="https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1",
**kwargs,
):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
self.pooling = pooling
self.bert_path = bert_path
if self.pooling not in ["first", "mean"]:
raise NameError(
f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
)
super(BertLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.bert = hub.Module(
self.bert_path, trainable=self.trainable, name=f"{self.name}_module"
)
# Remove unused layers
trainable_vars = self.bert.variables
if self.pooling == "first":
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
trainable_layers = ["pooler/dense"]
elif self.pooling == "mean":
trainable_vars = [
var
for var in trainable_vars
if not "/cls/" in var.name and not "/pooler/" in var.name
]
trainable_layers = []
else:
raise NameError(
f"Undefined pooling type (must be either first or mean, but is {self.pooling}"
)
# Select how many layers to fine tune
for i in range(self.n_fine_tune_layers):
trainable_layers.append(f"encoder/layer_{str(11 - i)}")
# Update trainable vars to contain only the specified layers
trainable_vars = [
var
for var in trainable_vars
if any([l in var.name for l in trainable_layers])
]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [K.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
if self.pooling == "first":
pooled = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"pooled_output"
]
elif self.pooling == "mean":
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"sequence_output"
]
mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)
input_mask = tf.cast(input_mask, tf.float32)
pooled = masked_reduce_mean(result, input_mask)
else:
raise NameError(f"Undefined pooling type (must be either first or mean, but is {self.pooling}")
return pooled
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_size)
def get_config(self):
config = super().get_config().copy()
config.update({
'n_fine_tune_layers': self.n_fine_tune_layers,
'trainable': self.trainable,
'output_size': self.output_size,
'pooling': self.pooling,
'bert_path': self.bert_path,
'dropout': self.dropout,
})
return config

# Build model
def build_model(max_seq_length):
in_id = tf.keras.layers.Input(shape=(max_seq_length,), name="input_ids")
in_mask = tf.keras.layers.Input(shape=(max_seq_length,), name="input_masks")
in_segment = tf.keras.layers.Input(shape=(max_seq_length,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
bert_output = BertLayer(n_fine_tune_layers=3)(bert_inputs)
dense = tf.keras.layers.Dense(256, activation="relu")(bert_output)
pred = tf.keras.layers.Dense(1, activation="sigmoid")(dense)
# embedding_size = 768
# bert_output = BertLayer(n_fine_tune_layers=3)(bert_inputs)
# # Reshape bert_output before passing it the GRU
# bert_output_ = tf.keras.layers.Reshape((max_seq_length, embedding_size))(bert_output)
# gru_out = tf.keras.layers.GRU(100, activation='sigmoid')(bert_output_)
# dense = tf.keras.layers.Dense(256, activation="relu")(gru_out)
# pred = tf.keras.layers.Dense(1, activation="sigmoid")(dense)
model = tf.keras.models.Model(inputs=bert_inputs, outputs=pred)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
return model

def initialize_vars(sess):
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
K.set_session(sess)

def main():
# Params for bert model and tokenization
bert_path = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
max_seq_length = 256
train_df, test_df = master_df[:round(len(master_df)*.8)], master_df[round(len(master_df)*.8):]

# Create datasets (Only take up to max_seq_length words for memory)
train_text = train_df["words"].tolist()
train_text = [" ".join(t.split()[0:max_seq_length]) for t in train_text]
train_text = np.array(train_text, dtype=object)[:, np.newaxis]
train_label = train_df["new_grouping"].tolist()
test_text = test_df["words"].tolist()
test_text = [" ".join(t.split()[0:max_seq_length]) for t in test_text]
test_text = np.array(test_text, dtype=object)[:, np.newaxis]
test_label = test_df["new_grouping"].tolist()
# Instantiate tokenizer
tokenizer = create_tokenizer_from_hub_module(bert_path)
# Convert data to InputExample format
train_examples = convert_text_to_examples(train_text, train_label)
test_examples = convert_text_to_examples(test_text, test_label)
# Convert to features
(
train_input_ids,
train_input_masks,
train_segment_ids,
train_labels,
) = convert_examples_to_features(
tokenizer, train_examples, max_seq_length=max_seq_length
)
(
test_input_ids,
test_input_masks,
test_segment_ids,
test_labels,
) = convert_examples_to_features(
tokenizer, test_examples, max_seq_length=max_seq_length
)
model = build_model(max_seq_length)
# Instantiate variables
initialize_vars(sess)
checkpoint_path = "bert_dir/cp.ckpt"
checkpoint_dir = os.path.dirname('checkpoint_path')
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
history = model.fit(
[train_input_ids, train_input_masks, train_segment_ids],
train_labels,
validation_data=(
[test_input_ids, test_input_masks, test_segment_ids],
test_labels,
),
epochs=1,
batch_size=32,
callbacks=[cp_callback]
)
model.save('bert_1.h5')

return history
if __name__ == "__main__":
history = main()

编辑 1

get_config.py添加到BertLayer类中后(请参阅上面的编辑(,我收到此错误:


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-11-ac0d0189838a> in <module>()
383 
384 if __name__ == "__main__":
--> 385    history, train_df, val_df, model = main()
6 frames
<ipython-input-11-ac0d0189838a> in main()
377     )
378 
--> 379     model.save('bert_1.h5')
380 
381 
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py in save(self, filepath, overwrite, include_optimizer, save_format, signatures)
1169     """
1170     saving.save_model(self, filepath, overwrite, include_optimizer, save_format,
-> 1171                       signatures)
1172 
1173   def save_weights(self, filepath, overwrite=True, save_format=None):
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/save.py in save_model(model, filepath, overwrite, include_optimizer, save_format, signatures)
107           'or using `save_weights`.')
108     hdf5_format.save_model_to_hdf5(
--> 109         model, filepath, overwrite, include_optimizer)
110   else:
111     saved_model_save.save(model, filepath, overwrite, include_optimizer,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py in save_model_to_hdf5(model, filepath, overwrite, include_optimizer)
91 
92   try:
---> 93     model_metadata = saving_utils.model_metadata(model, include_optimizer)
94     for k, v in model_metadata.items():
95       if isinstance(v, (dict, list, tuple)):
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/saving_utils.py in model_metadata(model, include_optimizer, require_config)
155   model_config = {'class_name': model.__class__.__name__}
156   try:
--> 157     model_config['config'] = model.get_config()
158   except NotImplementedError as e:
159     if require_config:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py in get_config(self)
884     for layer in self.layers:  # From the earliest layers on.
885       layer_class_name = layer.__class__.__name__
--> 886       layer_config = layer.get_config()
887 
888       filtered_inbound_nodes = []
<ipython-input-11-ac0d0189838a> in get_config(self)
246     def get_config(self):
247 
--> 248         config = super(BertLayer).get_config().copy()
249         config.update({
250             'n_fine_tune_layers': self.n_fine_tune_layers,
AttributeError: 'super' object has no attribute 'get_config'

编辑2:

当我在get_config((中删除BertLayer作为super((的参数时,模型会保存。但是,当我尝试用tensorflow.keras.load_model加载它时,出现此错误:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-fc5e61399702> in <module>()
1 from tensorflow.keras.models import load_model
2 
----> 3 model_ = load_model('path.h5',custom_objects={'BertLayer':BertLayer})
13 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/save.py in load_model(filepath, custom_objects, compile)
141   if (h5py is not None and (
142       isinstance(filepath, h5py.File) or h5py.is_hdf5(filepath))):
--> 143     return hdf5_format.load_model_from_hdf5(filepath, custom_objects, compile)
144 
145   if isinstance(filepath, six.string_types):
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/hdf5_format.py in load_model_from_hdf5(filepath, custom_objects, compile)
160     model_config = json.loads(model_config.decode('utf-8'))
161     model = model_config_lib.model_from_config(model_config,
--> 162                                                custom_objects=custom_objects)
163 
164     # set weights
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/saving/model_config.py in model_from_config(config, custom_objects)
53                     '`Sequential.from_config(config)`?')
54   from tensorflow.python.keras.layers import deserialize  # pylint: disable=g-import-not-at-top
---> 55   return deserialize(config, custom_objects=custom_objects)
56 
57 
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/layers/serialization.py in deserialize(config, custom_objects)
103       module_objects=globs,
104       custom_objects=custom_objects,
--> 105       printable_module_name='layer')
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/utils/generic_utils.py in deserialize_keras_object(identifier, module_objects, custom_objects, printable_module_name)
189             custom_objects=dict(
190                 list(_GLOBAL_CUSTOM_OBJECTS.items()) +
--> 191                 list(custom_objects.items())))
192       with CustomObjectScope(custom_objects):
193         return cls.from_config(cls_config)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py in from_config(cls, config, custom_objects)
1069     # First, we create all layers and enqueue nodes to be processed
1070     for layer_data in config['layers']:
-> 1071       process_layer(layer_data)
1072     # Then we process nodes in order of layer depth.
1073     # Nodes that cannot yet be processed (if the inbound node
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/network.py in process_layer(layer_data)
1053       from tensorflow.python.keras.layers import deserialize as deserialize_layer  # pylint: disable=g-import-not-at-top
1054 
-> 1055       layer = deserialize_layer(layer_data, custom_objects=custom_objects)
1056       created_layers[layer_name] = layer
1057 
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/layers/serialization.py in deserialize(config, custom_objects)
103       module_objects=globs,
104       custom_objects=custom_objects,
--> 105       printable_module_name='layer')
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/utils/generic_utils.py in deserialize_keras_object(identifier, module_objects, custom_objects, printable_module_name)
191                 list(custom_objects.items())))
192       with CustomObjectScope(custom_objects):
--> 193         return cls.from_config(cls_config)
194     else:
195       # Then `cls` may be a function returning a class.
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py in from_config(cls, config)
599         A layer instance.
600     """
--> 601     return cls(**config)
602 
603   def compute_output_shape(self, input_shape):
<ipython-input-4-7942232d57db> in __init__(self, n_fine_tune_layers, pooling, bert_path, **kwargs)
170             )
171 
--> 172         super(BertLayer, self).__init__(**kwargs)
173 
174     def build(self, input_shape):
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455     self._self_setattr_tracking = False  # pylint: disable=protected-access
456     try:
--> 457       result = method(self, *args, **kwargs)
458     finally:
459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py in __init__(self, trainable, name, dtype, dynamic, **kwargs)
298     }
299     # Validate optional keyword arguments.
--> 300     generic_utils.validate_kwargs(kwargs, allowed_kwargs)
301 
302     # Mutable properties
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/utils/generic_utils.py in validate_kwargs(kwargs, allowed_kwargs, error_message)
597   for kwarg in kwargs:
598     if kwarg not in allowed_kwargs:
--> 599       raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'output_size')

get_config代码

def get_config(self):
config = super(BertLayer, self).get_config().copy()
config.update({
'n_fine_tune_layers': self.n_fine_tune_layers,
'trainable': self.trainable,
'output_size': self.output_size,
'pooling': self.pooling,
'bert_path': self.bert_path,
'dropout': self.dropout,
})
return config

但是,我能够用model.save_weights保存模型权重。我知道这只节省了重量,而不是实际模型。如何保存模型?我可以使用load_weights()来加载权重,但是如何重建整个模型以预测新数据?

好吧,你真的重建了整个模型,就像你第一次构建它一样。似乎build_model完全包含它。

然后你做model.load_weights(path).

但是,您的方法不会保存优化器。如果要"继续"训练加载的模型,最好保存优化器。

要使用model.save,您只需要为BertLayer编写get_config方法。 你可以通过查看 Keras 如何在自己的层中编写此方法找到很多关于如何编写此方法的示例:

  • 密集:https://github.com/keras-team/keras/blob/master/keras/layers/core.py#L796
  • 转换:https://github.com/keras-team/keras/blob/master/keras/layers/convolutional.py#L361
  • 等: https://keras.io/layers/core/

请记住,模型加载器不知道您的图层,您必须通知它:

load_model(...., custom_objects={'BertLayer':BertLayer})

最新更新