使用桌面屏幕作为输入的实时yolov5检测



我有一个脚本,可以抓取应用程序的屏幕截图并显示它。它在我的机器上运行得很好,就像一个60FPS左右的视频。

import os
os.getcwd()
from PIL import ImageGrab
import numpy as np
import cv2
import pyautogui
import win32gui
import time
from mss import mss
from PIL import Image
import tempfile
os.system('calc')
sct = mss()
xx=1
tstart = time.time()
while xx<10000:
hwnd = win32gui.FindWindow(None, 'Calculator')
left_x, top_y, right_x, bottom_y = win32gui.GetWindowRect(hwnd)
#screen = np.array(ImageGrab.grab( bbox = (left_x, top_y, right_x, bottom_y ) ) )
bbox = {'top': top_y, 'left': left_x, 'width': right_x-left_x, 'height':bottom_y-top_y }
screen = sct.grab(bbox)
scr = np.array(screen)

cv2.imshow('window', scr)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
xx+=1
cv2.destroyAllWindows()
tend = time.time()
print(xx/(tend-tstart))
print((tend-tstart))
os.system('taskkill /f /im calculator.exe')

我想在这个scr映像上运行yolov5的detect.py,而不必一直保存到磁盘。我还想显示带有边界框的图像,并将它们的坐标保存在某个地方。

我的python级别不够好,我尝试导入detect并添加参数,但它似乎不接受任何函数参数,只接受命令行参数。

也许我应该调整这条线,或者使用opencv?

parser.add_argument('--source', type=str, default='data/images', help='source')  # file/folder, 0 for webcam

知道吗?谢谢(这是yolov5的detect.py文件(

import argparse
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, 
strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

def detect(save_img=False):
source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://'))

# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu'  # half precision only supported on CUDA

# Load model
model = attempt_load(weights, map_location=device)  # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
if half:
model.half()  # to FP16

# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2)  # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = True
cudnn.benchmark = True  # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
else:
save_img = True
dataset = LoadImages(source, img_size=imgsz)

# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

# Run inference
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float()  # uint8 to fp16/32
img /= 255.0  # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)

# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]

# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()

# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)

# Process detections
for i, det in enumerate(pred):  # detections per image
if webcam:  # batch_size >= 1
p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
else:
p, s, im0 = Path(path), '', im0s

save_path = str(save_dir / p.name)
txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
s += '%gx%g ' % img.shape[2:]  # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum()  # detections per class
s += '%g %ss, ' % (n, names[int(c)])  # add to string

# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt:  # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + 'n')

if save_img or view_img:  # Add bbox to image
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

# Print time (inference + NMS)
print('%sDone. (%.3fs)' % (s, t2 - t1))

# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'):  # q to quit
raise StopIteration

# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
else:
if vid_path != save_path:  # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release()  # release previous video writer

fourcc = 'mp4v'  # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)

if save_txt or save_img:
s = f"n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")

print('Done. (%.3fs)' % (time.time() - t0))


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='data/images', help='source')  # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
print(opt)

with torch.no_grad():
if opt.update:  # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()

EDIT我已经在某个地方保存了权重,并且能够对保存在光盘上的图像运行detect,只想跳过这一步来保留这些FPS。Yolov5回购在这里

对于第三方项目或repos中的独立推理,建议使用PyTorch Hub将模型导入python工作区。请参阅YOLOv5PyTorchHub教程,特别是关于加载自定义模型的部分。https://github.com/ultralytics/yolov5#tutorials

自定义模型

此示例使用PyTorch Hub加载自定义的20类VOC训练的YOLOv5s模型'yolov5s_voc_best.pt'

import torch
model = torch.hub.load('ultralytics/yolov5', 'custom', path_or_model='yolov5s_voc_best.pt')
model = model.autoshape()  # for PIL/cv2/np inputs and NMS

然后一旦加载模型:

from PIL import Image
# Images
img1 = Image.open('zidane.jpg')
img2 = Image.open('bus.jpg')
imgs = [img1, img2]  # batched list of images
# Inference
result = model(imgs, size=640)  # includes NMS
result.print()
import cv2
import torch
from mss import mss
import numpy as np

model = torch.hub.load("/yolov5", 'custom', path="yolov5/best.pt", source='local')
sct = mss()
while 1:
w, h = 1920, 1080
monitor = {'top': 0, 'left': 0, 'width': w, 'height': h}
img = Image.frombytes('RGB', (w, h), sct.grab(monitor).rgb)
screen = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# set the model use the screen
result = model(screen, size=640)
cv2.imshow('Screen', result.render()[0])
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break

im在编程中的应用使用桌面屏幕进行推理可以在yolov5的github页面中找到https://github.com/ultralytics/yolov5/issues/36

import cv2
import numpy
import torch
from mss import mss
from PIL import ImageGrab

im = numpy.array(ImageGrab.grab(bbox=(0,0,1920,1080)))
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
model.conf = 0.6
image = r'D:itestyolov5-master(original)yolov5-masterdataimageszidane.jpg'
results = model(im)
results.print()
results.show()
print(results.pandas().xyxy[0])

我发现mss((.grab((有一个rgb订单问题,所以用PIL代替

最新更新