在使用iOS Vision Framework扫描文档时提高文本识别的准确性



我正在尝试构建一个能够读取任何文档/卡片文本的文档扫描仪。然而,它有时很难正确识别信用卡上的文本。准确度不错,但肯定还有改进的空间。我使用了VisionTextRecognition框架,并使用了所有标准设置,这些设置是设置文本识别的正确设置。

这就是我必须设置的文本识别请求

textRecognitionRequest = VNRecognizeTextRequest(completionHandler: { (request, error) in
if let results = request.results, !results.isEmpty {
if let requestResults = request.results as? [VNRecognizedTextObservation] {
var foundText = ""
for observation in recognizedText {
guard let candidate = observation.topCandidates(1).first else { continue }
foundText.append(candidate.string + "n")
}
}
}
}) 
textRecognitionRequest.recognitionLevel = .accurate
textRecognitionRequest.usesLanguageCorrection = true

有人对在某个时候通过预处理或后处理扫描来以编程方式改进识别有什么建议吗?

更新:我做了一个完全开源的项目,可以帮助您完成所需的任务。看看:https://github.com/ethanwa/credit-card-scanner-and-validator

**

除了添加一些预先设置的值来专门查找之外,你无法做太多的事情来提高准确性,这对CC号来说是没有意义的,所以我甚至不会麻烦显示那个代码。随着iOS的迭代,你需要依靠苹果来改进他们的文本识别模型才能真正改进。

在此期间,我建议你可以做以下两件事:

  1. 对您认为收到的信用卡号进行验证。例如,Visa以4开头,MasterCard以5开头,Discover以6开头,Amex以3开头,等等。它们有特定的长度等等。请参阅此处:https://www.freeformatter.com/credit-card-number-generator-validator.html

  2. 在相机提要上不断迭代,直到得到一个有效的数字。我不确定你目前是否只是在拍一张卡的照片,并处理该图像(听起来像是你在做的(,但你应该每秒处理很多图像,直到你得到一个有效的CC。这很可能是苹果在手机上通过Apple Pay添加卡时,或者在使用银行应用程序以数字方式存入支票时(找到有效的路由和账号(的做法。

这是我的意思的一个例子。。。

我写的这段代码可以在任何给定的文本中挑选和验证ISBN编号(基本上是10和13位数字,用于编目书籍,其中有一个校验数字用于验证(,并将继续查找,直到找到所有编号,然后进行验证。它工作得非常好,速度也非常快。查看此Swift 5.3代码:

import UIKit
import Vision
import Photos
import AVFoundation
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {

var recognizedText = ""
var finalText = ""
var image: UIImage?
var processing = false

@IBOutlet weak var nameLabel: UILabel!
@IBOutlet weak var setLabel: UILabel!
@IBOutlet weak var numberLabel: UILabel!

lazy var textDetectionRequest: VNRecognizeTextRequest = {
let request = VNRecognizeTextRequest(completionHandler: self.handleDetectedText)
request.recognitionLevel = .accurate
request.usesLanguageCorrection = false
return request
}()

private let videoOutput = AVCaptureVideoDataOutput()
private let captureSession = AVCaptureSession()
private lazy var previewLayer: AVCaptureVideoPreviewLayer = {
let preview = AVCaptureVideoPreviewLayer(session: self.captureSession)
preview.videoGravity = .resizeAspect
return preview
}()
// MARK: AV

override func viewDidLoad() {
super.viewDidLoad()
self.addCameraInput()
self.addVideoOutput()
}

private func addCameraInput() {
let device = AVCaptureDevice.default(for: .video)!
let cameraInput = try! AVCaptureDeviceInput(device: device)
self.captureSession.addInput(cameraInput)
}

override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
self.previewLayer.frame = self.view.bounds
}

private func addVideoOutput() {
self.videoOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
self.videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "my.image.handling.queue"))
self.captureSession.addOutput(self.videoOutput)
}

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection)
{
if !processing
{
guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
debugPrint("unable to get image from sample buffer")
return
}
print("did receive image frame")
// process image here

self.processing = true

let ciimage : CIImage = CIImage(cvPixelBuffer: frame)
let theimage : UIImage = self.convert(cmage: ciimage)

self.image = theimage
processImage()
}
}
// Convert CIImage to CGImage
func convert(cmage:CIImage) -> UIImage
{
let context:CIContext = CIContext.init(options: nil)
let cgImage:CGImage = context.createCGImage(cmage, from: cmage.extent)!
let image:UIImage = UIImage.init(cgImage: cgImage)
return image
}

// AV

func processImage()
{
DispatchQueue.main.async {
self.nameLabel.text = ""
self.setLabel.text = ""
self.numberLabel.text = ""
}

guard let image = image, let cgImage = image.cgImage else { return }

let requests = [textDetectionRequest]
let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage, orientation: .right, options: [:])
DispatchQueue.global(qos: .userInitiated).async {
do {
try imageRequestHandler.perform(requests)
} catch let error {
print("Error: (error)")
}
}
}

fileprivate func handleDetectedText(request: VNRequest?, error: Error?)
{
self.finalText = ""

if let error = error {
print(error.localizedDescription)
self.processing = false
return
}
guard let results = request?.results, results.count > 0 else {
print("No text was found.")
self.processing = false
return
}
if let requestResults = request?.results as? [VNRecognizedTextObservation] {
self.recognizedText = ""
for observation in requestResults {
guard let candidiate = observation.topCandidates(1).first else { return }
self.recognizedText += candidiate.string
self.recognizedText += " "
}

var replaced = self.recognizedText.replacingOccurrences(of: "-", with: "")
replaced = String(replaced.filter { !"ntr".contains($0) })
let replacedArr = replaced.components(separatedBy: " ")

for here in replacedArr
{
let final = here.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
if (final.count == 10 || final.count == 13) && final.containsISBNnums && Validate.isbn(final) // validate barcode
{
self.finalText += final
print(final)
self.captureSession.stopRunning()
DispatchQueue.main.async {
self.previewLayer.removeFromSuperlayer()
}
break
}
}
DispatchQueue.main.async {
self.numberLabel.text = self.finalText
}
}

self.processing = false
}

// MARK: Buttons
// This is a live camera view that will start a capture session
@IBAction func takePhoto(_ sender: Any) {
self.view.layer.addSublayer(self.previewLayer)
self.captureSession.startRunning()
}

@IBAction func choosePhoto(_ sender: Any) {
presentPhotoPicker(type: .photoLibrary)
}

fileprivate func presentPhotoPicker(type: UIImagePickerController.SourceType) {
let controller = UIImagePickerController()
controller.sourceType = type
controller.delegate = self
present(controller, animated: true, completion: nil)
}
}
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {

func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
dismiss(animated: true, completion: nil)
}

func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey : Any]) {

dismiss(animated: true, completion: nil)
image = info[.originalImage] as? UIImage
processImage()
}
}
extension String {
var containsISBNnums: Bool {
guard self.count > 0 else { return false }
let nums: Set<Character> = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "X"]
return Set(self).isSubset(of: nums)
}
}

最新更新