Sphinx-4具有HTK模型识别性能



我已经使用Sphinx-4和HTK模型一起构建了一个在线手写识别系统。当我使用HTK自己的解码系统时,识别率是89%。然而,具有相同HTK型号的Sphinx-4系统实现了略高于50%的性能。

这正常吗?使用HTK模型,Sphinx-4的精度还有提高的空间吗?

感谢

附加信息:

我使用LatticeDemo示例构建了Sphinx-4系统,并遵循中的步骤[http://nshmyrev.blogspot.com.tr/2009/09/using-htk-models-in-sphinx4.html]。HTK系统的配置文件如下:

TARGETKIND = MFCC
TARGETRATE = 1
NUMCEPS = 12
ENORMALISE = F
NATURALBYTEORDER = TRUE
NONUMESCAPES = TRUE

Sphinx-4的config.xml如下所示:

<?xml version="1.0" encoding="UTF-8"?>
<!-- ******************************************************** -->
<!--  biship  configuration file                              -->
<!-- ******************************************************** -->
<config>        
    <!-- ******************************************************** -->
    <!-- frequently tuned properties                              -->
    <!-- ******************************************************** --> 
    <property name="absoluteBeamWidth"  value="0"/>
    <property name="relativeBeamWidth"  value="1E-50"/>
    <property name="absoluteWordBeamWidth" value="20"/>
    <property name="relativeWordBeamWidth" value="1E-60"/>
    <property name="wordInsertionProbability" value="1E-16"/>
    <property name="languageWeight" value="15"/>
    <property name="silenceInsertionProbability" value=".0"/>
    <property name="frontend" value="epFrontEnd"/>
    <property name="recognizer" value="recognizer"/>
    <property name="showCreations" value="false"/>
    
    <!-- ******************************************************** -->
    <!-- word recognizer configuration                            -->
    <!-- ******************************************************** --> 
    
    <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
            <item>accuracyTracker </item>
            <item>speedTracker </item>
            <item>memoryTracker </item>
            <item>recognizerMonitor </item>
        </propertylist>
    </component>
    
    <!-- ******************************************************** -->
    <!-- The Decoder   configuration                              -->
    <!-- ******************************************************** --> 
    
    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="searchManager"/>  
        <property name=" " value="50"/> -->
    </component>
    
    <!-- ******************************************************** -->
    <!-- The Search Manager                                       -->
    <!-- ******************************************************** --> 
      
    <component name="searchManager" 
        type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="flatLinguist"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListFactory" value="activeList"/>
    </component>
    
    <!-- ******************************************************** -->
    <!-- The Active Lists                                         -->
    <!-- ******************************************************** --> 
        
    <component name="activeList" 
             type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>
    
    <!-- ******************************************************** -->
    <!-- The Pruner                                               -->
    <!-- ******************************************************** --> 
    <component name="trivialPruner" 
                type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>
    
    <!-- ******************************************************** -->
    <!-- TheScorer                                                -->
    <!-- ******************************************************** --> 
    <component name="threadedScorer" type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
    </component>
    
    <!-- ******************************************************** -->
    <!-- The linguist  configuration                              -->
    <!-- ******************************************************** -->
    
 
    <component name="flatLinguist" type="edu.cmu.sphinx.linguist.flat.FlatLinguist">
        <property name="logMath" value="logMath"/>
        <property name="grammar" value="jsgfGrammar"/>
        <property name="acousticModel" value="wsj"/>
        <property name="wordInsertionProbability" value="${wordInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>
    
   <component name="jsgfGrammar" type="edu.cmu.sphinx.jsgf.JSGFGrammar">
        <property name="grammarLocation" value="/home/efbilgin/HMM-Exp/HTK-9_Feats_5v/sphinx/"/>
        <property name="dictionary" value="dictionary"/>
        <property name="grammarName" value="word"/>
	    <property name="logMath" value="logMath"/>
	    <property name="addSilenceWords" value="false"/>
        <property name="addFillerWords" value="false"/>
    </component>
    
    <!-- ******************************************************** -->
    <!-- The Dictionary configuration                            -->
    <!-- ******************************************************** -->
    <component name="dictionary" 
        type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
        <property name="dictionaryPath" value="/home/efbilgin/HMM-Exp/HTK-9_Feats_5v/sphinx/7266.dic"/>
        <property name="fillerPath" value="home/efbilgin/HMM-Exp/HTK-9_Feats_5v/sphinx/7266.filler"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="wordReplacement" value="&lt;sil&gt;"/>
        <property name="allowMissingWords" value="true"/>
    </component>
        
    
    <!-- ******************************************************** -->
    <!-- The acoustic model configuration                         -->
    <!-- ******************************************************** -->
    <component name="wsj"  type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>
    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.HTKLoader"> <!--Sphinx3Loader">-->
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="modelDefinition" value="/home/efbilgin/HMM-Exp/UNIPEN_1000_10_Eq_J-1/hmm7/hmmdefs"/>
 
    </component>
    <!-- ******************************************************** -->
    <!-- The unit manager configuration                           -->
    <!-- ******************************************************** -->
    <component name="unitManager" 
        type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
    
    <!-- ******************************************************** -->
    <!-- The frontend configuration                               -->
    <!-- ******************************************************** -->
    
    <component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <item>audioFileDataSource </item>
            <item>dataBlocker </item>
            <item>speechClassifier </item>
            <item>speechMarker </item>
            <item>nonSpeechDataFilter </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>liveCMN </item>
            <item>featureExtraction </item>
            <item>streamHTKSource</item>
        </propertylist>
    </component>
	<component name="streamHTKSource" type="edu.cmu.sphinx.frontend.util.StreamHTKCepstrum">
	<property name="cepstrumLength" value="39"/>
	</component>
    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>
    <component name="microphone" type="edu.cmu.sphinx.frontend.util.Microphone"> 
		<property name="closeBetweenUtterances" value="false"/>
    </component>
    <component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/>
    <component name="speechClassifier" type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
        <property name="threshold" value="1"/>
    </component>
    
    <component name="nonSpeechDataFilter"  type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>
    
    <component name="speechMarker" type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker">
        <property name="speechTrailer" value="50"/>
    </component>
    
    <component name="preemphasizer" type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>
    
    <component name="windower" type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>
    
    <component name="fft" type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>
    
    <component name="melFilterBank" type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>
    
    <component name="dct"  type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>
    
    <component name="liveCMN"  type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>
    
    <component name="featureExtraction" type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>
    
    <!-- ******************************************************* -->
    <!--  monitors                                               -->
    <!-- ******************************************************* -->
    
    <component name="accuracyTracker"  type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="showRawResults" value="false"/>
        <property name="showAlignedResults" value="false"/>
    </component>
    
    <component name="memoryTracker"  type="edu.cmu.sphinx.instrumentation.MemoryTracker">
        <property name="recognizer" value="${recognizer}"/>
	<property name="showDetails" value="false"/>
	<property name="showSummary" value="false"/>
    </component>
    
    <component name="speedTracker"  type="edu.cmu.sphinx.instrumentation.SpeedTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="frontend" value="${frontend}"/>
	<property name="showDetails" value="false"/>
    </component>
    
    <component name="recognizerMonitor"  type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
        <property name="recognizer" value="${recognizer}"/>
        <propertylist name="allocatedMonitors">
            <item>configMonitor </item>
        </propertylist>
    </component>
    
    <component name="configMonitor"  type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
        <property name="showConfig" value="false"/>
    </component>
    
    
    <!-- ******************************************************* -->
    <!--  Miscellaneous components                               -->
    <!-- ******************************************************* -->
    
    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>
</config>

好吧,性能问题很难检测,你可以做几件事:

1) 增加光束以确保您不会出现修剪的问题

2) 分析单词错误。你有插入或删除的大部分内容吗。最常混淆的单词是什么

3) Sphinx4是一个语音识别器,例如,它会自动向语法中添加填充词。它在最新版本中也有背景噪声循环。确保您禁用了这些。例如,Sphinx4 HTK加载程序在模型中需要特定的符号。

4) 确保s4和HTK中的特征提取是相同的。

相关内容

  • 没有找到相关文章

最新更新