在视频上应用yolo项目时挑战边界框(来自coursera)

在汽车检测程序结束时，我试图处理CNN课程中的视频文件而不是图像。不幸的是，边界框与实际汽车位置不同步，并且在X轴和Y轴上偏移了几个点。。。在我看来，当我冻结一个"currentFrame"并将其提供给预处理(如果有的话(时，这似乎在某个地方扰乱了frame_width和height。有什么想法吗？我不想粘贴整个项目代码，所以我只粘贴用代码替换预测函数的部分，以迭代到视频帧。

import cv2 
from tqdm import tqdm
import imghdr 
from numpy import expand_dims
from keras.preprocessing.image import img_to_array
video_out = ("nb_images/out1.mp4")
video_reader = cv2.VideoCapture("nb_images/road_video_trim2.mp4")
nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))        
video_writer = cv2.VideoWriter(video_out,
cv2.VideoWriter_fourcc(*'MPEG'), 
50.0, 
(frame_w, frame_h))
batch_size  = 1
images      = []
start_point = 0 #%
show_window = False
for i in tqdm(range(nb_frames)):
_, image = video_reader.read()
#blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (frame_w, frame_h), swapRB=True, crop=False)    
cv2.imwrite("currentFrame.jpg", image)
image, image_data = preprocess_image("currentFrame.jpg", model_image_size = (608, 608))
out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input:image_data, K.learning_phase():0})
#out_scores, out_boxes, out_classes, output_image = predict2(sess,"currentFrame.jpg")
colors = generate_colors(class_names)
#draw_boxes(img, out_scores, out_boxes, out_classes, class_names, colors)
draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
#video_writer.write(images[i]) 
imshow(image)
video_writer.write(np.uint8(image))
images = []
if show_window: cv2.destroyAllWindows()
video_reader.release()
video_writer.release()       
#else: # do detection on an image or a set of images
image_paths = []

所以，我想这里出了什么问题。。。这个执行初始化的代码片段有一个不同的图像形状。首先我改变了这个。

class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
#image_shape = (720., 1280.)
image_shape=(608., 608.)

然后yolo叫。。。

yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

现在，在我的代码中，我做了这些小的更改。。。

for i in tqdm(range(nb_frames)):
_, image = video_reader.read()
#blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (frame_w, frame_h), swapRB=True, crop=False)    
image = cv2.resize(image, (608, 608))
cv2.imwrite("currentFrame.jpg", image)
image, image_data = preprocess_image("currentFrame.jpg", model_image_size = (608, 608))
out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input:image_data, K.learning_phase():0})
colors = generate_colors(class_names)    
draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
image = cv2.resize(np.array(image), (frame_w,frame_h))
video_writer.write(np.uint8(image))
imshow(image)

我认为形状初始化为608608和上面的调整大小是它工作的原因。最后一帧是这样出来的。最终帧

只是在逻辑上关闭这里的循环。

答案是我上面编辑的信息的后半部分。从我开始的地方——我就知道出了什么问题。准确地说，我必须

class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
#image_shape = (720., 1280.)
image_shape=(608., 608.)

然后我不得不"；调整大小"；无论是在我发送它进行处理之前，还是在我将其作为更新视频的一部分写回之前。我看到的其他代码片段实际上没有这个，所以我真的不知道为什么我需要这个"；补丁"；修理这是有效的：(

for i in tqdm(range(nb_frames)):
_, image = video_reader.read()
#blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (frame_w, frame_h), swapRB=True, crop=False)    
image = cv2.resize(image, (608, 608))
cv2.imwrite("currentFrame.jpg", image)
image, image_data = preprocess_image("currentFrame.jpg", model_image_size = (608, 608))
out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input:image_data, K.learning_phase():0})
colors = generate_colors(class_names)    
draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
image = cv2.resize(np.array(image), (frame_w,frame_h))
video_writer.write(np.uint8(image))
imshow(image)

相关内容

最新更新

热门标签：