Nodejs Google Speech API Stream Stops



我已经从 https://github.com/GoogleCloudPlatform/nodejs-docs-samples/tree/master/speech 中劫持了以下示例代码(识别.js((需要身份验证(:

/**
 * Copyright 2016, Google, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * This application demonstrates how to perform basic recognize operations with
 * with the Google Cloud Speech API.
 *
 * For more information, see the README.md under /speech and the documentation
 * at https://cloud.google.com/speech/docs.
 */
'use strict';
function syncRecognize (filename, encoding, sampleRate) {
  // [START speech_sync_recognize]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
  // const filename = '/path/to/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  };
  // Detects speech in the audio file
  speech.recognize(filename, request)
    .then((results) => {
      const transcription = results[0];
      console.log(`Transcription: ${transcription}`);
    });
  // [END speech_sync_recognize]
}
function syncRecognizeGCS (gcsUri, encoding, sampleRate) {
  // [START speech_sync_recognize_gcs]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw
  // const gcsUri = 'gs://my-bucket/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  };
  // Detects speech in the audio file
  speech.recognize(gcsUri, request)
    .then((results) => {
      const transcription = results[0];
      console.log(`Transcription: ${transcription}`);
    });
  // [END speech_sync_recognize_gcs]
}
function asyncRecognize (filename, encoding, sampleRate) {
  // [START speech_async_recognize]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
  // const filename = '/path/to/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  };
  // Detects speech in the audio file. This creates a recognition job that you
  // can wait for now, or get its result later.
  speech.startRecognition(filename, request)
    .then((results) => {
      const operation = results[0];
      // Get a Promise represention of the final result of the job
      return operation.promise();
    })
    .then((transcription) => {
      console.log(`Transcription: ${transcription}`);
    });
  // [END speech_async_recognize]
}
function asyncRecognizeGCS (gcsUri, encoding, sampleRate) {
  // [START speech_async_recognize_gcs]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw
  // const gcsUri = 'gs://my-bucket/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  };
  // Detects speech in the audio file. This creates a recognition job that you
  // can wait for now, or get its result later.
  speech.startRecognition(gcsUri, request)
    .then((results) => {
      const operation = results[0];
      // Get a Promise represention of the final result of the job
      return operation.promise();
    })
    .then((transcription) => {
      console.log(`Transcription: ${transcription}`);
    });
  // [END speech_async_recognize_gcs]
}
function streamingRecognize (filename, encoding, sampleRate) {
  // [START speech_streaming_recognize]
  const fs = require('fs');
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
  // const filename = '/path/to/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    config: {
      encoding: encoding,
      sampleRate: sampleRate
    }
  };
  // Stream the audio to the Google Cloud Speech API
  const recognizeStream = speech.createRecognizeStream(request)
    .on('error', console.error)
    .on('data', (data) => {
      console.log('Data received: %j', data);
    });
  // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw"
  fs.createReadStream(filename).pipe(recognizeStream);
  // [END speech_streaming_recognize]
}
function streamingMicRecognize (encoding, sampleRate) {
  // [START speech_streaming_mic_recognize]
  const record = require('node-record-lpcm16');
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    config: {
      encoding: encoding,
      sampleRate: sampleRate
    }
  };
  // Create a recognize stream
  const recognizeStream = speech.createRecognizeStream(request)
    .on('error', console.error)
    .on('data', (data) => process.stdout.write(data.results));
  // Start recording and send the microphone input to the Speech API
  record.start({
    sampleRate: sampleRate,
    threshold: 0
  }).pipe(recognizeStream);
  console.log('Listening, press Ctrl+C to stop.');
  // [END speech_streaming_mic_recognize]
}
require(`yargs`)
  .demand(1)
  .command(
    `sync <filename>`,
    `Detects speech in a local audio file.`,
    {},
    (opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRate)
  )
  .command(
    `sync-gcs <gcsUri>`,
    `Detects speech in an audio file located in a Google Cloud Storage bucket.`,
    {},
    (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
  )
  .command(
    `async <filename>`,
    `Creates a job to detect speech in a local audio file, and waits for the job to complete.`,
    {},
    (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRate)
  )
  .command(
    `async-gcs <gcsUri>`,
    `Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`,
    {},
    (opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
  )
  .command(
    `stream <filename>`,
    `Detects speech in a local audio file by streaming it to the Speech API.`,
    {},
    (opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRate)
  )
  .command(
    `listen`,
    `Detects speech in a microphone input stream.`,
    {},
    (opts) => streamingMicRecognize(opts.encoding, opts.sampleRate)
  )
  .options({
    encoding: {
      alias: 'e',
      default: 'LINEAR16',
      global: true,
      requiresArg: true,
      type: 'string'
    },
    sampleRate: {
      alias: 'r',
      default: 16000,
      global: true,
      requiresArg: true,
      type: 'number'
    }
  })
  .example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`)
  .example(`node $0 async-gcs gs://my-bucket/audio.raw -e LINEAR16 -r 16000`)
  .example(`node $0 stream ./resources/audio.raw  -e LINEAR16 -r 16000`)
  .example(`node $0 listen`)
  .wrap(120)
  .recommendCommands()
  .epilogue(`For more information, see https://cloud.google.com/speech/docs`)
  .help()
  .strict()
  .argv;

我运行它:节点识别.js侦听

该示例首先有效。我对着麦克风说话,它会记录我所说的安慰。问题是,如果我沉默几秒钟,它就会停止收听/识别/打印。我一直在浏览各种文件,但我不知道为什么。

我不是 100% 确定,但听起来可能有多种解释:

  1. 请求请求"单个话语"(流式识别配置(

    这似乎不寻常,因为默认值似乎是 false .但是,在请求中明确说明不会有什么坏处(const request = { singleUtterance: false, config: {...} }(

  2. 您遇到了客户端定义的超时 (createRecognizeStream(。

    这似乎也有点奇怪,因为我猜你不会坐在那里连续60秒说话,然后在这么长时间后停下来。

  3. 麦克风正在关闭传播回语音客户端的流。

    这似乎更合理一些,但我不是 100% 有信心。

如果您可以将所说的内容录制到麦克风中,并使用通过createRecognizeStream方法(而不是实时音频流(发送的文件重现此问题,则可以消除最后一项,并且更容易诊断。

最新更新