将记录器输出渲染到Tesseract.js上的组件(使用React)会减慢速度



我想在Tesseract.js日志中添加一个进度指示器。文档中的示例工作得很好,直到在记录器中设置状态挂钩:

const worker = createWorker({
logger: (m) => {
setProgress(m) //new
}});
...
const [ocr, setOcr] = useState('Recognizing...');
const [progress, setProgress] = useState(null); //new
...
return (
<div className="App">
<p>           
<LogComponent progress={progress}/> //new
</p>
</div>);

这会导致浏览器速度显著减慢(可能是由于React在每次状态更新时重新渲染的方式(。有办法绕过这个吗?也许使用React.memo?

您可以将它设计成一个钩子,这样整个组件就不会重新渲染。下面是我创建的一个useTesseract钩子,您可以使用它:https://gist.github.com/KevinDanikowski/25cdcdda2ef4750bcf443f2027cc375a

复制并粘贴:

import { useState, useEffect } from 'react'
import { createWorker } from 'tesseract.js'
export default function useTesseract({ tesseractLanguage = 'eng', log = false }) {
const [tesseractWorker, setTesseractWorker] = useState(null)
const [loadingModel, setLoadingModel] = useState(true)
const [modelError, setModelError] = useState(false)
const [imgResults, setImgResults] = useState({})
const [processing, setProcessing] = useState(false)
const [progress, setProgress] = useState(0)
const extractTextFromImage = (imageUrl) => {
const recognize = async () => {
const {
data: {
hocr: htmlOutput,
text,
// tsv, box, unlv
},
} = await tesseractWorker.recognize(imageUrl)
setProcessing(false)
setImgResults({ html: htmlOutput, text })
}
if (loadingModel) {
try {
setTimeout(recognize, 400)
} catch (e) {
console.error('Timeout Error:', e.message)
setImgResults({ error: true })
}
} else {
try {
setProcessing(true)
recognize()
} catch (e) {
console.error('Tesseract Error:', e.message)
setProcessing(false)
setImgResults({ error: true })
}
}
}
const logger = (m) => {
setProgress(m.progress)
if (log) {
console.info(m)
}
}
useEffect(() => {
const loadTesseract = async () => {
if (tesseractWorker) {
await tesseractWorker.loadLanguage(tesseractLanguage)
await tesseractWorker.initialize(tesseractLanguage)
console.info(`INFO: loaded ${tesseractLanguage} tesseract model`)
} else {
const tesseractWorker = createWorker({
logger,
// specify paths because sometimes the free CDN goes down
// corePath: '/static/tesseract-core.wasm.2.2.0.js',
// workerPath: '/static/tesseract-worker.v2.1.4.min.js',
})
setTesseractWorker(tesseractWorker)
await tesseractWorker.load()
await tesseractWorker.loadLanguage(tesseractLanguage)
await tesseractWorker.initialize(tesseractLanguage)
console.info(`INFO: loaded ${tesseractLanguage} tesseract model`)
setLoadingModel(false)
setModelError(true)
setLoadingModel(false)
}
}
loadTesseract().catch((e) => {
console.error(`ERROR: Failed to load tesseract model`, e.message)
setModelError(true)
setLoadingModel(false)
})
// TODO: Have to add a ref to reference the latest tesseractWorker in order to terminate
// return () => tesseractWorker.terminate()
}, [tesseractLanguage])
return {
imgResults,
loadingModel,
processing,
modelError,
progress,
extractTextFromImage,
}
}

我通过将我的应用程序渲染器放在一个类中并使用setState方法来实现和渲染tesseract工作程序的进度:

class App extends React.Component {
constructor(props){
super(props)
this.state = {
file: null
}
this.handleChange = this.handleChange.bind(this)
}
setProgress(m) {

if (m.progress !== 0 && m.progress !== 0.5 && m.progress !== 1){

var prog = "Progress: " + Math.round(m.progress*100) + "%"
this.setState({progress: prog})
}
}

worker = createWorker({
logger: m => this.setProgress(m),
});

doOCR = async () => {
await this.worker.load();
await this.worker.loadLanguage('eng');
await this.worker.initialize('eng');
const { data: { text } } = await this.worker.recognize(this.state.file);
this.setState({text: extractTotal(text),
progress: ""});
};
handleChange(event) {

this.setState({text: placeholder});
this.setState({
file: URL.createObjectURL(event.target.files[0]),

})
this.doOCR()
}
setText(input){
if (!input) {
return "Please select a receipt"   
} 
else { 
return input
}
}
render() {
console.log("Text: " + this.state.text) 
return (
<div className="container">
<p>{this.setText(this.state.text)}</p>
<p>{this.state.progress}</p>
<input type="file" onChange={this.handleChange}/>
<img src={this.state.file} className='logo' alt=""/>

</div>
);
}
}

最新更新