这是原始代码:
#include <stdio.h>
#include <string.h>
#include <assert.h>
#if defined(_WIN32) && !defined(__CYGWIN__)
#include <windows.h>
#else
#include <sys/select.h>
#endif
#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>
#include "pocketsphinx.h"
static const arg_t cont_args_def[] = {
POCKETSPHINX_OPTIONS,
/* Argument file. */
{"-argfile",
ARG_STRING,
NULL,
"Argument file giving extra arguments."},
{"-adcdev",
ARG_STRING,
NULL,
"Name of audio device to use for input."},
{"-infile",
ARG_STRING,
NULL,
"Audio file to transcribe."},
{"-inmic",
ARG_BOOLEAN,
"no",
"Transcribe audio from microphone."},
{"-time",
ARG_BOOLEAN,
"no",
"Print word times in file transcription."},
CMDLN_EMPTY_OPTION
};
static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;
static void
print_word_times()
{
int frame_rate = cmd_ln_int32_r(config, "-frate");
ps_seg_t *iter = ps_seg_iter(ps);
while (iter != NULL) {
int32 sf, ef, pprob;
float conf;
ps_seg_frames(iter, &sf, &ef);
pprob = ps_seg_prob(iter, NULL, NULL, NULL);
conf = logmath_exp(ps_get_logmath(ps), pprob);
printf("%s %.3f %.3f %fn", ps_seg_word(iter), ((float)sf / frame_rate),
((float) ef / frame_rate), conf);
iter = ps_seg_next(iter);
}
}
static int
check_wav_header(char *header, int expected_sr)
{
int sr;
if (header[34] != 0x10) {
E_ERROR("Input audio file has [%d] bits per sample instead of 16n", header[34]);
return 0;
}
if (header[20] != 0x1) {
E_ERROR("Input audio file has compression [%d] and not required PCMn", header[20]);
return 0;
}
if (header[22] != 0x1) {
E_ERROR("Input audio file has [%d] channels, expected single channel monon", header[22]);
return 0;
}
sr = ((header[24] & 0xFF) | ((header[25] & 0xFF) << 8) | ((header[26] & 0xFF) << 16) | ((header[27] & 0xFF) << 24));
if (sr != expected_sr) {
E_ERROR("Input audio file has sample rate [%d], but decoder expects [%d]n", sr, expected_sr);
return 0;
}
return 1;
}
/*
* Continuous recognition from a file
*/
static void
recognize_from_file()
{
int16 adbuf[2048];
const char *fname;
const char *hyp;
int32 k;
uint8 utt_started, in_speech;
int32 print_times = cmd_ln_boolean_r(config, "-time");
fname = cmd_ln_str_r(config, "-infile");
if ((rawfd = fopen(fname, "rb")) == NULL) {
E_FATAL_SYSTEM("Failed to open file '%s' for reading",
fname);
}
if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
char waveheader[44];
fread(waveheader, 1, 44, rawfd);
if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
E_FATAL("Failed to process file '%s' due to format mismatch.n", fname);
}
if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.n");
}
ps_start_utt(ps);
utt_started = FALSE;
while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started) {
utt_started = TRUE;
}
if (!in_speech && utt_started) {
ps_end_utt(ps);
hyp = ps_get_hyp(ps, NULL);
if (hyp != NULL)
printf("%sn", hyp);
if (print_times)
print_word_times();
fflush(stdout);
ps_start_utt(ps);
utt_started = FALSE;
}
}
ps_end_utt(ps);
if (utt_started) {
hyp = ps_get_hyp(ps, NULL);
if (hyp != NULL) {
printf("%sn", hyp);
if (print_times) {
print_word_times();
}
}
}
fclose(rawfd);
}
/* Sleep for specified msec */
static void
sleep_msec(int32 ms)
{
#if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
Sleep(ms);
#else
/* ------------------- Unix ------------------ */
struct timeval tmo;
tmo.tv_sec = 0;
tmo.tv_usec = ms * 1000;
select(0, NULL, NULL, NULL, &tmo);
#endif
}
/*
* Main utterance processing loop:
* for (;;) {
* start utterance and wait for speech to process
* decoding till end-of-utterance silence will be detected
* print utterance result;
* }
*/
static void
recognize_from_microphone()
{
ad_rec_t *ad;
int16 adbuf[2048];
uint8 utt_started, in_speech;
int32 k;
char const *hyp;
if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
(int) cmd_ln_float32_r(config,
"-samprate"))) == NULL)
E_FATAL("Failed to open audio devicen");
if (ad_start_rec(ad) < 0)
E_FATAL("Failed to start recordingn");
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterancen");
utt_started = FALSE;
E_INFO("Ready....n");
for (;;) {
if ((k = ad_read(ad, adbuf, 2048)) < 0)
E_FATAL("Failed to read audion");
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started) {
utt_started = TRUE;
E_INFO("Listening...n");
}
if (!in_speech && utt_started) {
/* speech -> silence transition, time to start new utterance */
ps_end_utt(ps);
hyp = ps_get_hyp(ps, NULL );
if (hyp != NULL) {
printf("%sn", hyp);
fflush(stdout);
}
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterancen");
utt_started = FALSE;
E_INFO("Ready....n");
}
sleep_msec(100);
}
ad_close(ad);
}
int
main(int argc, char *argv[])
{
char const *cfg;
config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);
/* Handle argument file as -argfile. */
if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
}
if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.n");
cmd_ln_free_r(config);
return 1;
}
ps_default_search_args(config);
ps = ps_init(config);
if (ps == NULL) {
cmd_ln_free_r(config);
return 1;
}
E_INFO("%s COMPILED ON: %s, AT: %snn", argv[0], __DATE__, __TIME__);
if (cmd_ln_str_r(config, "-infile") != NULL) {
recognize_from_file();
} else if (cmd_ln_boolean_r(config, "-inmic")) {
recognize_from_microphone();
}
ps_free(ps);
cmd_ln_free_r(config);
return 0;
}
#if defined(_WIN32_WCE)
#pragma comment(linker,"/entry:mainWCRTStartup")
#include <windows.h>
//Windows Mobile has the Unicode main only
int
wmain(int32 argc, wchar_t * wargv[])
{
char **argv;
size_t wlen;
size_t len;
int i;
argv = malloc(argc * sizeof(char *));
for (i = 0; i < argc; i++) {
wlen = lstrlenW(wargv[i]);
len = wcstombs(NULL, wargv[i], wlen);
argv[i] = malloc(len + 1);
wcstombs(argv[i], wargv[i], wlen);
}
//assuming ASCII parameters
return main(argc, argv);
}
#endif
我可以通过此命令进行编译:
g++ -o output continuous.cpp -DMODELDIR="`pkg-config --variable=modeldir pocketsphinx`" `pkg-config --cflags --libs pocketsphinx sphinxbase`
并通过此命令运行: output -inmic yes
。
但是我喜欢转换代码,因为它不需要获取inmic yes
,并且会自动从麦克风启动程序。但是当我更改这些部分时,我会出现segmentation fault(core dumped)
错误:
static const arg_t cont_args_def= {"-inmic",
ARG_BOOLEAN,
"no",
"Transcribe audio from microphone."};
int main(int argc, char *argv[])
{
config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);
if (cmd_ln_boolean_r(config, "-inmic")) {
recognize_from_microphone();
}
// recognize_from_microphone();
ps_free(ps);
cmd_ln_free_r(config);
return 0;
}
我搜索了很多东西,并将文档红色,但不了解问题是什么?
将最后一个参数从TRUE
更改为CC_5到FALSE
。这与严格的检查有关。我通过在sphinxbase代码中读取cmd_ln.c的源代码来弄清楚这一点。
我还将cont_args_def
中-inmic
的布尔值从"否"更改为"是"。