Swift iOS Vision矩形检测结果不准确



xcode: Version 12.5.1;ios: 14.7.1,设备:iPhone 12 Pro

你好,需要帮助!

iOS Vision框架返回不准确的矩形结果:正确结果不正确

识别的矩形(红色)与实际矩形(黑色)不匹配。它不是方形的,比真正的窄。我不明白为什么会这样。如果画观察到的矩形边界框,得到同样不准确的结果。

完整代码:

class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {

private let captureSession = AVCaptureSession()
private lazy var previewLayer = AVCaptureVideoPreviewLayer(session: self.captureSession)
private let videoDataOutput = AVCaptureVideoDataOutput()
private var maskLayer = CAShapeLayer()
override func viewDidLoad() {
super.viewDidLoad()
self.setCameraInput()
self.showCameraFeed()
self.setCameraOutput()
self.videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
self.captureSession.startRunning()
}
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
self.previewLayer.frame = self.view.frame
}

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
debugPrint("unable to get image from sample buffer")
return
}
self.detectRectangle(in: frame)
}

private func setCameraInput() {
guard let device = AVCaptureDevice.DiscoverySession(
deviceTypes: [.builtInWideAngleCamera, .builtInDualCamera, .builtInTrueDepthCamera],
mediaType: .video,
position: .back).devices.first else {
fatalError("No back camera device found.")
}
let cameraInput = try! AVCaptureDeviceInput(device: device)
self.captureSession.addInput(cameraInput)
}

private func showCameraFeed() {
self.previewLayer.videoGravity = .resizeAspectFill
self.view.layer.addSublayer(self.previewLayer)
self.previewLayer.frame = self.view.frame
}

private func setCameraOutput() {
self.videoDataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
self.videoDataOutput.alwaysDiscardsLateVideoFrames = true
self.videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
self.captureSession.addOutput(self.videoDataOutput)
guard let connection = self.videoDataOutput.connection(with: AVMediaType.video),
connection.isVideoOrientationSupported else { return }
connection.videoOrientation = .portrait
}

private func detectRectangle(in image: CVPixelBuffer) {
let request = VNDetectRectanglesRequest(completionHandler: { (request: VNRequest, error: Error?) in
DispatchQueue.main.async {
guard let results = request.results as? [VNRectangleObservation] else { return }
self.maskLayer.removeFromSuperlayer()
guard let rect = results.first else{return}
self.drawBoundingBox(rect: rect)
}
})
request.minimumAspectRatio = VNAspectRatio(1.3)
request.maximumAspectRatio = VNAspectRatio(1.6)
request.minimumSize = Float(0.5)
request.maximumObservations = 1
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: image, options: [:])
try? imageRequestHandler.perform([request])
}

func drawBoundingBox(rect : VNRectangleObservation) {
let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -self.previewLayer.frame.height)
let scale = CGAffineTransform.identity.scaledBy(x: self.previewLayer.frame.width, y: self.previewLayer.frame.height)

let path = UIBezierPath()
path.move(to: CGPoint(x: rect.bottomLeft.x, y: rect.bottomLeft.y))
path.addLine(to: CGPoint(x: rect.bottomRight.x, y: rect.bottomRight.y))
path.addLine(to: CGPoint(x: rect.topRight.x, y: rect.topRight.y))
path.addLine(to: CGPoint(x: rect.topLeft.x, y: rect.topLeft.y))
path.addLine(to: CGPoint(x: rect.bottomLeft.x, y: rect.bottomLeft.y))
path.apply(scale)
path.apply(transform)
path.close()

maskLayer = CAShapeLayer()
maskLayer.fillColor = UIColor.clear.cgColor
maskLayer.lineWidth = 5
maskLayer.strokeColor = UIColor.red.cgColor
maskLayer.path = path.cgPath

previewLayer.insertSublayer(maskLayer, at: 1)
}
}
extension CGPoint {
func scaled(to size: CGSize) -> CGPoint {
return CGPoint(x: self.x * size.width,
y: self.y * size.height)
}
}

上面的代码是从tutorial: rectangle detection tutorial

更改的版本

这是我的代码示例。

///SET THE VALUE FOR THE DETECTED RECTANGLE
detectRectanglesRequest.minimumAspectRatio = VNAspectRatio(0.3)
detectRectanglesRequest.maximumAspectRatio = VNAspectRatio(0.9)
detectRectanglesRequest.minimumSize = Float(0.4)
detectRectanglesRequest.maximumObservations = 0
detectRectanglesRequest.minimumConfidence = 0.2
detectRectanglesRequest.quadratureTolerance = 2
detectRectanglesRequest.revision = VNDetectRectanglesRequestRevision1
detectRectanglesRequest.preferBackgroundProcessing = true

"try"最好这样使用:

///SEND THE REQUESTS TO THE REQUEST HANDLER
DispatchQueue.global(qos: .userInteractive).async {
do {
try imageRequestHandler.perform([detectRectanglesRequest])
} catch let error as NSError {
print("Failed to perform image request: (error)")
//                self.presentAlert("Image Request Failed", error: error)
return
}
}

…最后一个:

private func drawBoundingBox(rect: VNRectangleObservation) {

CATransaction.begin()

let transform = CGAffineTransform(scaleX: 1, y: -1)
.translatedBy(x: 0, y: -scanCam.videoPreviewLayer.bounds.height)

let scale = CGAffineTransform.identity
.scaledBy(x: scanCam.videoPreviewLayer.bounds.width,
y: scanCam.videoPreviewLayer.bounds.height)

let currentBounds = rect.boundingBox
.applying(scale).applying(transform)

createLayer(in: currentBounds)

CATransaction.commit()

//viewModel.cameraDetectRectFrame = currentBounds
}
private func createLayer(in rect: CGRect) {
maskLayer = CAShapeLayer()
maskLayer.frame = rect
maskLayer.opacity = 1
maskLayer.borderColor = UIColor.blue.cgColor ///for visual test
maskLayer.borderWidth = 2
scanCam.videoPreviewLayer.insertSublayer(maskLayer, at: 1)
}

最新更新