Microsoft语音转文本 SDK JS 不接受具有长字节数组的文件



我正在使用Microsoft的Azure语音转文本SDK,使用JavaScript从.wav文件中获取文本。问题是,识别器不会接受File对象,并返回错误";未捕获的范围错误:源数组太长;。在用于使File对象正常工作的blob上调用.slice(02248(,返回.wav文件的正确第一个字。但是如果我试图将blob切片为类似(22494497(的块;未捕获的范围错误:偏移量在DataView的边界之外;。我不知道如何a(让识别器接受具有长源数组的blob,或者b(将blob分解为不越界的块。为了匿名,.wav url已更改为短划线,应被忽略。欢迎提供任何解决方案!

JS:
<script>
//get wav file from url, create File object with it
function fromFile() {
fetch("http://www.-----------.com/prod/wp-content/uploads/2020/12/cutafew.wav")
.then(response => response.blob())
.then(blob => {
var file = new File([blob], "http://www.---------.com/prod/wp-content/uploads/2020/12/cutafew.wav", {
type:"audio/x-wav", lastModified:new Date().getTime()
});
//if file got successfully, do the following:
var reader = new FileReader();
var speechConfig = SpeechSDK.SpeechConfig.fromSubscription("f6abc3bfabc64f0d820d537c0d738788", "centralus");
var audioConfig = SpeechSDK.AudioConfig.fromWavFileInput(file);
var recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);

//use recognizer to convert wav file to text
recognizer.recognizing = (s, e) => {
console.log(e.result);
console.log(`RECOGNIZING: Text=${e.result.text}`);
};
recognizer.recognized = (s, e) => {
if (e.result.reason == ResultReason.RecognizedSpeech) {
console.log(`RECOGNIZED: Text=${e.result.text}`);
}
else if (e.result.reason == ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
}
};
recognizer.canceled = (s, e) => {
console.log(`CANCELED: Reason=${e.reason}`);
if (e.reason == CancellationReason.Error) {
console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
console.log("CANCELED: Did you update the subscription info?");
}
recognizer.stopContinuousRecognitionAsync();
};
recognizer.sessionStopped = (s, e) => {
console.log("n    Session stopped event.");
recognizer.stopContinuousRecognitionAsync();
};
recognizer.startContinuousRecognitionAsync();
})
//throw error if file wasn't created
.catch(err => console.error(err));
}
fromFile();
</script>

您可以使用内存流中的识别示例

const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription("<paste-your-speech-key-here>", "<paste-your-speech-location/region-here>");
function fromStream() {
let pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream("YourAudioFile.wav").on('data', function(arrayBuffer) {
pushStream.write(arrayBuffer.slice());
}).on('end', function() {
pushStream.close();
});

let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(result => {
console.log(`RECOGNIZED: Text=${result.text}`);
recognizer.close();
});
}
fromStream();

最新更新