model_main.py无法训练mobilenet ssd v2-tensorflow对象检测api



我正在使用TensorFlow 1.15,并尝试使用我自己的数据集的TensorFlow对象检测API来微调mobilenetSSDv2。

我按照tf repo中所述的方式创建了我的tf记录,并读取了类似的图像

with tf.gfile.GFile(folder_path+"temp.jpeg", 'rb') as fid:
encoded_image_data = fid.read()

我已经按照需要的宽度和高度划分了我的分数,然后我调整了配置以适应我的类数量,但当我运行训练过程时,我仍然会遇到这个错误(我尝试了很多方法都没有成功(

...

...

tensorflow.python.framework.errors_impl.InvalidArgumentError: {{function_node Dataset_map_transform_and_pad_input_data_fn_423}} assertion failed: [[0.576413691][0.335303724][0.766369045]...] [[0.155026451][0.439418][0.299206346]...]     [[{{node Assert/AssertGuard/Assert}}]]      [[IteratorGetNext]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):   File "./object_detection/model_main.py", line 108, in <module>
tf.app.run()   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/platform/app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/absl/app.py", line 299, in run
_run_main(main, args)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/absl/app.py", line 250, in _run_main
sys.exit(main(argv))   File "./object_detection/model_main.py", line 104, in main
tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0])   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/training.py", line 473, in train_and_evaluate
return executor.run()   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/training.py", line 613, in run
return self.run_local()   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/training.py", line 714, in run_local
saving_listeners=saving_listeners)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1195, in _train_model_default
saving_listeners)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1494, in _train_with_estimator_spec
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/training/monitored_session.py", line 754, in run
run_metadata=run_metadata)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/training/monitored_session.py", line 1259, in run
run_metadata=run_metadata)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/training/monitored_session.py", line 1360, in run
raise six.reraise(*original_exc_info)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/six.py", line 703, in reraise
raise value   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/training/monitored_session.py", line 1345, in run
return self._sess.run(*args, **kwargs)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/training/monitored_session.py", line 1418, in run
run_metadata=run_metadata)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/training/monitored_session.py", line 1176, in run
return self._sess.run(*args, **kwargs)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 956, in run
run_metadata_ptr)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1359, in _do_run
run_metadata)   File "/home/mai/anaconda3/envs/tf/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1384, in _do_call
raise type(e)(node_def, op, message) 
tensorflow.python.framework.errors_impl.InvalidArgumentError:  assertion failed: [[0.576413691][0.335303724][0.766369045]...] [[0.155026451][0.439418][0.299206346]...]      [[{{node Assert/AssertGuard/Assert}}]]      [[IteratorGetNext]]

我的配置文件和pbtxt

# SSD with Mobilenet v2 configuration for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 5
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 1
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v2'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 3
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 32
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "/home/mai/Downloads/ssdlite_mobilenet_v2_coco_2018_05_09/checkpoints/model.ckpt"
from_detection_checkpoint: true # added 
fine_tune_checkpoint_type:  "detection"
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 10000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input: "pathto/train_608.record"
}
label_map_path: "pathto/vehicle_label_map.pbtxt"
}
eval_config: {
num_examples: 100
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
metrics_set : "coco_detection_metrics"
}
eval_input_reader: {
tf_record_input_reader {
input_path: "pathto/frames/eval_608.record"
}
label_map_path: "pathto/vehicle_label_map.pbtxt"
shuffle: false
num_readers: 1
}
# and given pbtxt 
item {
name: "car"
id: 1
display_name: "car"
}
item {
name: "motorbike"
id: 2
display_name: "motorbike"
}
item {
name: "bus"
id: 3
display_name: "bus"
}
item {
name: "truck"
id: 4
display_name: "truck"
}
item {
name: "van"
id: 5
display_name: "van"
}

编辑:以下是转换为tf记录代码

def create_tf_example(image_prop_dict):
height = image_prop_dict['im_height']
width = image_prop_dict['im_width']
filename = image_prop_dict['im_name']  # Filename of the image. Empty if image is not from file
encoded_image_data = image_prop_dict['encoded_image']  # Encoded image bytes
image_format = bytes('jpeg', 'utf-8')  # b'jpeg' or b'png'

xmins = image_prop_dict['x_mins']  # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = image_prop_dict['x_maxs']  # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = image_prop_dict['x_mins']  # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = image_prop_dict['y_maxs']  # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = image_prop_dict['classes_labels']  # List of string class name of bounding box (1 per box)
classes = image_prop_dict['classes_ints']  # List of integer class id of bounding box (1 per box)

tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example


def convert_jsons_in_folder(folder_path, classes_dict):
"""loops through a folder of json labels and converts every json to the yolo format and saves it to a .txt
of the same name.

:param folder_path: str path to the folder containing the json files
:param classes_dict: dict [class name] = class number
"""
json_name_list = []
image_dictionaries = []
for file_name in os.listdir(folder_path):
if file_name.endswith(".json"):
json_name_list.append(file_name)

for json_file_name in tqdm(json_name_list):
# read json file
# get list of boxes and labels
# fill dictionary ,save it to dictionary 
json_path = os.path.join(folder_path, json_file_name)

with tf.gfile.GFile(folder_path+"temp.jpeg", 'rb') as fid:
encoded_image_data = fid.read()

with open(json_path) as json_file_r:
json_data = json.load(json_file_r)
im_width = json_data["imageWidth"]
im_height = json_data["imageHeight"]
image_dictionary = {'im_height': im_height,
'im_width': im_width,
'im_name': bytes(json_file_name.replace(".json", ".jpg"), 'utf-8'),
'encoded_image': encoded_image_data,  # image.tostring(),
'x_mins': [],
'x_maxs': [],
'y_mins': [],
'y_maxs': [],
'classes_labels': [],
'classes_ints': []}

for labelme_detection in json_data["shapes"]:

points = labelme_detection["points"]
if len(points) > 0:
class_label = labelme_detection["label"]
# calculate relative points using original width and height (boxes were on the original image)
image_dictionary['x_mins'].append(min(points[0][0], points[1][0]) / im_width)
image_dictionary['x_maxs'].append(max(points[0][0], points[1][0]) / im_width)
image_dictionary['y_mins'].append(min(points[0][1], points[1][1]) / im_height)
image_dictionary['y_maxs'].append(max(points[0][1], points[1][1]) / im_height)
bytes_label = bytes(class_label, 'utf-8')
image_dictionary['classes_labels'].append(bytes_label)
image_dictionary['classes_ints'].append(classes_dict[class_label] + 1)

image_dictionaries.append(image_dictionary)

return image_dictionaries

# ..
# ..
# main  
examples = convert_list_of_folders(args.source, classes_dict)
# ..
# ..
# ..
for i in range(len(examples)):
# for example in examples:
tf_example = create_tf_example(examples[i])
eval_writer.write(tf_example.SerializeToString())

这确实是数据,为了修复错误,我使用此repo将我的数据转换为tf记录

数据需要转换为YOLO格式,但这是非常直接的

最新更新