import pyaudio
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from threading import Thread
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
try:
from Queue import Queue, Full
except ImportError:
from queue import Queue, Full
###############################################
#### Initalize queue to store the recordings ##
###############################################
CHUNK = 1024
# Note: It will discard if the websocket client can't consumme fast enough
# So, increase the max size as per your choice
BUF_MAX_SIZE = CHUNK * 10
# Buffer to store audio
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
# Create an instance of AudioSource
audio_source = AudioSource(q, True, True)
###############################################
#### Prepare Speech to Text Service ########
###############################################
# initialize speech to text service
authenticator = IAMAuthenticator('apikey')
speech_to_text = SpeechToTextV1(authenticator=authenticator)
#speech_to_text.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/62a2f19f-959f-4c3c-a276-27ab0e458341/v1/recognize')
speech_to_text.set_service_url('https://stream.watsonplatform.net/speech-to-text/api')
# define callback for the speech to text service
class MyRecognizeCallback(RecognizeCallback):
def __init__(self):
RecognizeCallback.__init__(self)
def on_transcription(self, transcript):
print(transcript)
def on_connected(self):
print('Connection was successful')
def on_error(self, error):
print('Error received: {}'.format(error))
def on_inactivity_timeout(self, error):
print('Inactivity timeout: {}'.format(error))
def on_listening(self):
print('Service is listening')
def on_hypothesis(self, hypothesis):
print(hypothesis)
def on_data(self, data):
print(data)
def on_close(self):
print("Connection closed")
# this function will initiate the recognize service and pass in the AudioSource
def recognize_using_weboscket(*args):
mycallback = MyRecognizeCallback()
speech_to_text.recognize_using_websocket(audio=audio_source,
content_type='audio/l16; rate=44100',
recognize_callback=mycallback,
interim_results=True)
###############################################
#### Prepare the for recording using Pyaudio ##
###############################################
# Variables for recording the speech
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
# define callback for pyaudio to store the recording in queue
def pyaudio_callback(in_data, frame_count, time_info, status):
try:
q.put(in_data)
except Full:
pass # discard
return (None, pyaudio.paContinue)
# instantiate pyaudio
audio = pyaudio.PyAudio()
# open stream using callback
stream = audio.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
stream_callback=pyaudio_callback,
start=False
)
#########################################################################
#### Start the recording and start service to recognize the stream ######
#########################################################################
print("Enter CTRL+C to end recording...")
stream.start_stream()
try:
recognize_thread = Thread(target=recognize_using_weboscket, args=())
recognize_thread.start()
while True:
pass
except KeyboardInterrupt:
# stop recording
stream.stop_stream()
stream.close()
audio.terminate()
audio_source.completed_recording()
这是使用麦克风作为输入的IBM语音到文本服务的代码。我可以知道这个程序的输出是什么吗?这是我得到的输出:
Enter CTRL+C to end recording...
Connection was successful
Service is listening
File "C:Users---AppDataLocalProgramsPythonPython38-32libsite-packageswebsocket_app.py", line 320, in _callback
callback(self, *args)
File "C:Users---AppDataLocalProgramsPythonPython38-32libsite-packagesibm_watsonwebsocketrecognize_listener.py", line 199, in on_data
hypothesis = json_object['results'][0]['alternatives'][0][
Connection closed
当我用无线耳机麦克风进行测试时,它突然工作起来。但不确定原因,因为这两种设备都运行良好。输出是控制台中的文字记录。
我也遇到了这种情况,我认为问题的原因是您发送到websocket的音频可能很难识别,因此websocket响应为none/null,当假设函数试图获得答案时,会出现错误,因为结果不存在。
hyposesis函数(def-hypesis(的输出将是一个字符串,其中包含转录音频文件的结果,而data函数(def-data(的输出则是一个json,如下所示:
{'result_index': 0, 'results': [{'final': True, 'alternatives': [{'transcript': 'hello ', 'confidence': 0.66}], 'keywords_result': {}}]}