Nodejs Google Speech API Stream Stops

我已经从 中劫持了以下示例代码(识别.js((需要身份验证(:

 * Copyright 2016, Google, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * This application demonstrates how to perform basic recognize operations with
 * with the Google Cloud Speech API.
 * For more information, see the under /speech and the documentation
 * at
'use strict';
function syncRecognize (filename, encoding, sampleRate) {
  // [START speech_sync_recognize]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
  // const filename = '/path/to/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  // Detects speech in the audio file
  speech.recognize(filename, request)
    .then((results) => {
      const transcription = results[0];
      console.log(`Transcription: ${transcription}`);
  // [END speech_sync_recognize]
function syncRecognizeGCS (gcsUri, encoding, sampleRate) {
  // [START speech_sync_recognize_gcs]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw
  // const gcsUri = 'gs://my-bucket/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  // Detects speech in the audio file
  speech.recognize(gcsUri, request)
    .then((results) => {
      const transcription = results[0];
      console.log(`Transcription: ${transcription}`);
  // [END speech_sync_recognize_gcs]
function asyncRecognize (filename, encoding, sampleRate) {
  // [START speech_async_recognize]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
  // const filename = '/path/to/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  // Detects speech in the audio file. This creates a recognition job that you
  // can wait for now, or get its result later.
  speech.startRecognition(filename, request)
    .then((results) => {
      const operation = results[0];
      // Get a Promise represention of the final result of the job
      return operation.promise();
    .then((transcription) => {
      console.log(`Transcription: ${transcription}`);
  // [END speech_async_recognize]
function asyncRecognizeGCS (gcsUri, encoding, sampleRate) {
  // [START speech_async_recognize_gcs]
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw
  // const gcsUri = 'gs://my-bucket/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    encoding: encoding,
    sampleRate: sampleRate
  // Detects speech in the audio file. This creates a recognition job that you
  // can wait for now, or get its result later.
  speech.startRecognition(gcsUri, request)
    .then((results) => {
      const operation = results[0];
      // Get a Promise represention of the final result of the job
      return operation.promise();
    .then((transcription) => {
      console.log(`Transcription: ${transcription}`);
  // [END speech_async_recognize_gcs]
function streamingRecognize (filename, encoding, sampleRate) {
  // [START speech_streaming_recognize]
  const fs = require('fs');
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
  // const filename = '/path/to/audio.raw';
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    config: {
      encoding: encoding,
      sampleRate: sampleRate
  // Stream the audio to the Google Cloud Speech API
  const recognizeStream = speech.createRecognizeStream(request)
    .on('error', console.error)
    .on('data', (data) => {
      console.log('Data received: %j', data);
  // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw"
  // [END speech_streaming_recognize]
function streamingMicRecognize (encoding, sampleRate) {
  // [START speech_streaming_mic_recognize]
  const record = require('node-record-lpcm16');
  // Imports the Google Cloud client library
  const Speech = require('@google-cloud/speech');
  // Instantiates a client
  const speech = Speech();
  // The encoding of the audio file, e.g. 'LINEAR16'
  // const encoding = 'LINEAR16';
  // The sample rate of the audio file, e.g. 16000
  // const sampleRate = 16000;
  const request = {
    config: {
      encoding: encoding,
      sampleRate: sampleRate
  // Create a recognize stream
  const recognizeStream = speech.createRecognizeStream(request)
    .on('error', console.error)
    .on('data', (data) => process.stdout.write(data.results));
  // Start recording and send the microphone input to the Speech API
    sampleRate: sampleRate,
    threshold: 0
  console.log('Listening, press Ctrl+C to stop.');
  // [END speech_streaming_mic_recognize]
    `sync <filename>`,
    `Detects speech in a local audio file.`,
    (opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRate)
    `sync-gcs <gcsUri>`,
    `Detects speech in an audio file located in a Google Cloud Storage bucket.`,
    (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
    `async <filename>`,
    `Creates a job to detect speech in a local audio file, and waits for the job to complete.`,
    (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRate)
    `async-gcs <gcsUri>`,
    `Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`,
    (opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
    `stream <filename>`,
    `Detects speech in a local audio file by streaming it to the Speech API.`,
    (opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRate)
    `Detects speech in a microphone input stream.`,
    (opts) => streamingMicRecognize(opts.encoding, opts.sampleRate)
    encoding: {
      alias: 'e',
      default: 'LINEAR16',
      global: true,
      requiresArg: true,
      type: 'string'
    sampleRate: {
      alias: 'r',
      default: 16000,
      global: true,
      requiresArg: true,
      type: 'number'
  .example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`)
  .example(`node $0 async-gcs gs://my-bucket/audio.raw -e LINEAR16 -r 16000`)
  .example(`node $0 stream ./resources/audio.raw  -e LINEAR16 -r 16000`)
  .example(`node $0 listen`)
  .epilogue(`For more information, see`)



我不是 100% 确定,但听起来可能有多种解释:

  1. 请求请求"单个话语"(流式识别配置(

    这似乎不寻常,因为默认值似乎是 false .但是,在请求中明确说明不会有什么坏处(const request = { singleUtterance: false, config: {...} }(

  2. 您遇到了客户端定义的超时 (createRecognizeStream(。


  3. 麦克风正在关闭传播回语音客户端的流。

    这似乎更合理一些,但我不是 100% 有信心。

