如何检测视频中给定的形状并在其周围绘制矩形



我对opencv和python有一些问题。。。

我收到了4张链锯的图片和一段包含相同链锯的视频。我的任务是用opencv在视频上的链锯周围画一个矩形。我试着这样做:

import numpy as np
import inspect
import cv2
def show_img(img, bw=False):
fig = plt.figure(figsize=(13,13))
ax = fig.gca()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.imshow(img, cmap='Greys_r' if bw else None)
plt.show()
img1 = cv2.imread("saw1.jpg")
img2 = cv2.imread("saw2.jpg")
img3 = cv2.imread("saw3.jpg")
img4 = cv2.imread("saw4.jpg")
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray3 = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY)
gray4 = cv2.cvtColor(img4, cv2.COLOR_BGR2GRAY)
orb = cv2.ORB_create(nfeatures=1000)
kp1, des1 = orb.detectAndCompute(gray1, None)
kp2, des2 = orb.detectAndCompute(gray2, None)
kp3, des3 = orb.detectAndCompute(gray3, None)
kp4, des4 = orb.detectAndCompute(gray4, None)
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
cap = cv2.VideoCapture('sawmovie.mp4')
while(cap.isOpened()):
ret, frame = cap.read()
if ret == True:
frameGray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

kp5, des5 = orb.detectAndCompute(frameGray, None)
matches = matcher.match(des1, des5)
matches = sorted(matches, key = lambda x:x.distance)
good_matches = matches[:10]

list_kp1 = []
list_kp2 = []

for mat in good_matches:
# Get the matching keypoints for each of the images
img1_idx = mat.queryIdx
img2_idx = mat.trainIdx
(x1, y1) = kp1[img1_idx].pt
(x2, y2) = kp5[img2_idx].pt
list_kp1.append((x1, y1))
list_kp2.append((x2, y2))

x_values = []
y_values = []

for kp in list_kp2:
x_values.append(kp[0]) 

for kp in list_kp2:
y_values.append(kp[1]) 

x_max = int(max(x_values))
x_min = int(min(x_values))
y_max = int(max(y_values))
y_min = int(min(y_values))

starting_point = (x_max,y_max)
ending_point = (x_min,y_min)
thickness = 2

color = (255,0,0)

final_image = cv2.rectangle(frame, starting_point, ending_point, color, thickness)

cv2.imshow('Frame', final_image)

if cv2.waitKey(25) & 0xFF == ord('q'):
break
else:
break
cap.release()
cv2.destroyAllWindows()

我试着这样做,但矩形是在一个随机的地方。我被卡住了,不能再往前走了。

视频中的示例图像和一帧:https://drive.google.com/drive/u/0/folders/1lD97uXttSUUUc2R76nXsbQVxNfONKnp8

我想得到的是在视频的每一帧上用这些样本图像在锯子周围画一个矩形。

我真的很抱歉。我周末接到电话去上班,看来我今天不会下班了。我会发布我现在得到的糟糕代码。

我把它分为两个文件。一个是将所有匹配的点保存到文本文件中,另一个是读取所有这些点并在其周围绘制一个框。功能点检测器需要很长时间才能运行,这就是为什么我将其保存到文件中并只回放该文件。

matcher.py

import cv2
import numpy as np
import time
# sample images
samples = [];
for a in range(1,5):
samples.append(cv2.imread("saw" + str(a) + ".jpg"));
# test video
cap = cv2.VideoCapture("sawmovie.mp4");
# make detector and matcher
sift = cv2.SIFT_create();
orb = cv2.ORB_create();
bfm = cv2.BFMatcher();
# text file to save feature points (so we don't have to redo the slow part every time)
file = open("matched_points.txt", 'w');
# skip empty frames
frame = 0;
skip = 0;
for a in range(skip):
_,_ = cap.read();
frame += 1;
# go until video is finished
while True:
# get frame
ret, test = cap.read();
if not ret:
break;
# count
print("Frame: " + str(frame));
# create a fresh mask
mask = np.zeros(test.shape[:2], np.uint8);
# do keypoint matching with each sample
for sample in samples:
# get features
kp1, des1 = sift.detectAndCompute(sample,None);
kp2, des2 = sift.detectAndCompute(test,None);
# match
matches = bfm.knnMatch(des1, des2, k=2);
# lowe's ratio test
good = []
for a,b in matches:
if a.distance < 0.75*b.distance:
good.append(a);
# pull position of matches
points = [];
for m in good:
test_index = m.trainIdx;
x, y = kp2[test_index].pt;
points.append((int(x), int(y)));
# add to log
for point in points:
x, y = point;
out_str = "";
out_str += str(frame) + " ";
out_str += str(x) + " ";
out_str += str(y) + "n";
file.write(out_str);
# show
cv2.imshow("Test", test);
cv2.imshow("Mask", mask);
key = cv2.waitKey(1);
if key == ord('q'):
break;
# increment frame counter
frame += 1;
file.close();

回放.py

import cv2
import numpy as np
import time
# rescale
def rescale(img, scale):
h, w = img.shape[:2];
h = int(h*scale);
w = int(w*scale);
return cv2.resize(img, (w,h));
# test video
cap = cv2.VideoCapture("sawmovie.mp4");
# text file to save feature points (so we don't have to redo the slow part every time)
file = open("matched_points.txt", 'r');
points = []; # [frame, x, y]
for line in file:
frame, x, y = [int(a) for a in line.split(' ')];
points.append([frame, x, y]);
file.close();
# group by frame
fps = [];
last_frame = -1;
group = [];
for point in points:
# unpack
frame, x, y = point;
if last_frame == -1:
last_frame = frame;
# check for new group
if last_frame != frame:
fps.append(group[:]);
group = [];
last_frame = frame;
# add to group
group.append((x,y));
# make videowriter 
res = (1920, 1080);
four_cc = cv2.VideoWriter_fourcc(*'DIVX'); # this is windows-specific
writer = cv2.VideoWriter("marked.avi", four_cc, 20, res);
# skip empty frames
frame = 0;
skip = 0;
for a in range(skip):
_,_ = cap.read();
frame += 1;
# go until video is finished
while True:
# get frame
ret, test = cap.read();
if not ret:
break;
# count
print("Frame: " + str(frame));
# create a fresh mask
mask = np.zeros(test.shape[:2], np.uint8);
# grab points
points = fps[frame - skip];
# draw points
# for point in points:
#   cv2.circle(test, point, 50, (0,0,255), -1);
# grow points to try and link up points that are closer than "dist"
dist = 40;
for point in points:
cv2.circle(mask, point, dist, 255, -1);
# get contours and find biggest by area
contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
biggest = None;
biggest_area = -1;
for con in contours:
area = cv2.contourArea(con);
if area > biggest_area:
biggest = con;
biggest_area = area;
# bounding box
x,y,w,h = cv2.boundingRect(biggest);
test = cv2.rectangle(test,(x,y),(x+w,y+h),(0,255,0),4);
# rescale to fit screen
# test = rescale(test, 0.5);
# print(test.shape);
# save
writer.write(test);
# show
cv2.imshow("Test", test);
# cv2.imshow("Mask", mask);
key = cv2.waitKey(1);
if key == ord('q'):
break;
# increment frame counter
frame += 1;
writer.release();

最新更新