是否可以在 Watson Java API 中获取从文本到语音的单词计时?



我的老师举了这个Java示例,介绍如何从文本生成语音并保存到Wav文件。他要求我们修改它以将单词计时保存到磁盘。我在 SynthesizeOptions (http://watson-developer-cloud.github.io/java-sdk/docs/java-sdk-7.2.0/com/ibm/watson/text_to_speech/v1/model/SynthesizeOptions.Builder.html( 中看不到任何执行此操作的选项,即使 API 说这是可能的:https://cloud.ibm.com/docs/services/text-to-speech?topic=text-to-speech-timing#timingRequest

Authenticator authenticator = new IamAuthenticator("api_key");
TextToSpeech textToSpeech = new TextToSpeech(authenticator);

try {
SynthesizeOptions synthesizeOptions = new SynthesizeOptions.Builder()
.text(text)
.accept("audio/wav")
.voice("pt-BR_IsabelaV3Voice")
.timings(words)
.build();
// a callback is defined to handle certain events, like an audio transmission or a timing marker
// in this case, we'll build up a byte array of all the received bytes to build the resulting file
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
textToSpeech.synthesizeUsingWebSocket(synthesizeOptions, new BaseSynthesizeCallback() {
@Override
public void onAudioStream(byte[] bytes) {
// append to our byte array
try {
byteArrayOutputStream.write(bytes);
} catch (IOException e) {
e.printStackTrace();
}
}
});

// quick way to wait for synthesis to complete, since synthesizeUsingWebSocket() runs asynchronously
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
// create file with audio data
String filename = id + ".wav";
OutputStream fileOutputStream = new FileOutputStream(filename);
byteArrayOutputStream.writeTo(fileOutputStream);
// clean up
byteArrayOutputStream.close();
fileOutputStream.close();

} catch (IOException e) {
e.printStackTrace();
}

你需要跳出框框思考。你有一个音频文件,单词计时是语音转文本服务的一项功能,而不是文本转语音服务。

package com.watsontest;
import java.io.*;
//import java.io.IOExceintellintellij ideaption;
import java.util.ArrayList;
import com.google.gson.Gson;
import com.ibm.cloud.sdk.core.http.HttpMediaType;
import com.ibm.cloud.sdk.core.security.Authenticator;
import com.ibm.cloud.sdk.core.security.IamAuthenticator;
import com.ibm.watson.speech_to_text.v1.SpeechToText;
import com.ibm.watson.speech_to_text.v1.model.RecognizeOptions;
import com.ibm.watson.speech_to_text.v1.model.SpeechRecognitionResults;
import com.ibm.watson.text_to_speech.v1.TextToSpeech;
import com.ibm.watson.text_to_speech.v1.model.SynthesizeOptions;
import com.ibm.watson.text_to_speech.v1.model.Timings;
import com.ibm.watson.text_to_speech.v1.websocket.BaseSynthesizeCallback;

public class Main {
public void geraVoz(String id, String text, ArrayList<String> words){
Authenticator authenticator = new IamAuthenticator("API_KEY_HERE");
TextToSpeech textToSpeech = new TextToSpeech(authenticator);
ArrayList arrayList = new ArrayList<String>();
arrayList.add("words");
ArrayList timingsArrayList = new ArrayList<Timings>();
try {
SynthesizeOptions synthesizeOptions = new SynthesizeOptions.Builder()
.text(text)
.accept("audio/wav")
.voice("pt-BR_IsabelaV3Voice")
.timings(arrayList)
.build();
// a callback is defined to handle certain events, like an audio transmission or a timing marker
// in this case, we'll build up a byte array of all the received bytes to build the resulting file
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
textToSpeech.synthesizeUsingWebSocket(synthesizeOptions, new BaseSynthesizeCallback() {
@Override
public void onAudioStream(byte[] bytes) {
// append to our byte array
try {
byteArrayOutputStream.write(bytes);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void onTimings(Timings timings) {
timingsArrayList.add(timings);
}
@Override
public void onDisconnected() {
System.out.println("disconnected!");
String json = new Gson().toJson(timingsArrayList);
try {
PrintWriter out = new PrintWriter("timings.json");
out.println(json);
out.close();
} catch (Exception e) {
System.out.println(e);
}
}
});
// quick way to wait for synthesis to complete, since synthesizeUsingWebSocket() runs asynchronously
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
// create file with audio data
String filename = id + ".wav";
OutputStream fileOutputStream = new FileOutputStream(filename);
byteArrayOutputStream.writeTo(fileOutputStream);
System.out.println(synthesizeOptions.getTimings());
// clean up
byteArrayOutputStream.close();
fileOutputStream.close();
System.out.println("recorded file");
} catch (IOException e) {
e.printStackTrace();
}
}

public static void main(String[] args) {
new Main().geraVoz("id1", "testando transcrição de voz. Olá isso é um teste", null);
}
}