Speex回声消除配置



我正在制作一个安卓到安卓VoIP(扬声器)应用程序,使用其AudioRecord和AudioTrack类,以及通过NDK进行回声消除的Speex。我能够成功地传入Speex的Speex_echo_cancellation()函数并从中检索数据,但echo仍然存在。

以下是录制/发送和接收/播放音频的相关安卓线程代码:

//constructor
public MyThread(DatagramSocket socket, int frameSize, int filterLength){
this.socket = socket;
nativeMethod_initEchoState(frameSize, filterLength);
}
public void run(){
short[] audioShorts, recvShorts, recordedShorts, filteredShorts;
byte[] audioBytes, recvBytes;
int shortsRead;
DatagramPacket packet;
//initialize recorder and player
int samplingRate = 8000;
int managerBufferSize = 2000;
AudioTrack player = new AudioTrack(AudioManager.STREAM_MUSIC, samplingRate, AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_16BIT, managerBufferSize, AudioTrack.MODE_STREAM);
recorder = new AudioRecord(MediaRecorder.AudioSource.MIC, samplingRate, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, managerBufferSize);
recorder.startRecording();
player.play();
//record first packet
audioShorts = new short[1000];
shortsRead = recorder.read(audioShorts, 0, audioShorts.length);
//convert shorts to bytes to send
audioBytes = new byte[shortsRead*2];
ByteBuffer.wrap(audioBytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(audioShorts);
//send bytes
packet = new DatagramPacket(audioBytes, audioBytes.length);
socket.send(packet);
while (!this.isInterrupted()){
//recieve packet/bytes (received audio data should have echo cancelled already)
recvBytes = new byte[2000];
packet = new DatagramPacket(recvBytes, recvBytes.length);
socket.receive(packet);
//convert bytes to shorts
recvShorts = new short[packet.getLength()/2];
ByteBuffer.wrap(packet.getData(), 0, packet.getLength()).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(recvShorts);
//play shorts
player.write(recvShorts, 0, recvShorts.length);
//record shorts
recordedShorts = new short[1000];
shortsRead = recorder.read(recordedShorts, 0, recordedShorts.length);
//send played and recorded shorts into speex, 
//returning audio data with the echo removed
filteredShorts = nativeMethod_speexEchoCancel(recordedShorts, recvShorts);
//convert filtered shorts to bytes
audioBytes = new byte[shortsRead*2];
ByteBuffer.wrap(audioBytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(filteredShorts);
//send off bytes
packet = new DatagramPacket(audioBytes, audioBytes.length);
socket.send(packet);                
}//end of while loop 
}

这是相关的NDK/JNI代码:

void nativeMethod_initEchoState(JNIEnv *env, jobject jobj, jint frameSize, jint filterLength){
echo_state = speex_echo_state_init(frameSize, filterLength);
}
jshortArray nativeMethod_speexEchoCancel(JNIEnv *env, jobject jObj, jshortArray input_frame, jshortArray echo_frame){
//create native shorts from java shorts
jshort *native_input_frame = (*env)->GetShortArrayElements(env, input_frame, NULL);
jshort *native_echo_frame = (*env)->GetShortArrayElements(env, echo_frame, NULL);
//allocate memory for output data
jint length = (*env)->GetArrayLength(env, input_frame);
jshortArray temp = (*env)->NewShortArray(env, length);
jshort *native_output_frame = (*env)->GetShortArrayElements(env, temp, 0);
//call echo cancellation
speex_echo_cancellation(echo_state, native_input_frame, native_echo_frame, native_output_frame);
//convert native output to java layer output
jshortArray output_shorts = (*env)->NewShortArray(env, length);
(*env)->SetShortArrayRegion(env, output_shorts, 0, length, native_output_frame);
//cleanup and return
(*env)->ReleaseShortArrayElements(env, input_frame, native_input_frame, 0);
(*env)->ReleaseShortArrayElements(env, echo_frame, native_echo_frame, 0);
(*env)->ReleaseShortArrayElements(env, temp, native_output_frame, 0);
return output_shorts;
}

这些代码运行良好,音频数据肯定是从android发送/接收/处理/播放到android的。给定8000 Hz的音频采样率和2000字节/1000个短片的数据包大小,我发现需要1000的frameSize才能使播放的音频流畅。filterLength的大多数值(根据Speex文档,也称为尾部长度)将运行,但似乎对回声去除没有影响。

有人对AEC有足够的了解,可以为我提供一些关于实现或配置Speex的建议吗?感谢阅读。

您的代码是对的,但在本机代码中缺少一些东西,我修改了init方法,并在echo取消后添加了speex预处理,然后您的代码运行良好(我在windows中尝试过)这是本机代码

#include <jni.h>
#include "speex/speex_echo.h"
#include "speex/speex_preprocess.h"
#include "EchoCanceller_jniHeader.h"
SpeexEchoState *st;
SpeexPreprocessState *den;
JNIEXPORT void JNICALL Java_speex_EchoCanceller_open
(JNIEnv *env, jobject jObj, jint jSampleRate, jint jBufSize, jint jTotalSize)
{
//init
int sampleRate=jSampleRate;
st = speex_echo_state_init(jBufSize, jTotalSize);
den = speex_preprocess_state_init(jBufSize, sampleRate);
speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate);
speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_STATE, st);
}
JNIEXPORT jshortArray JNICALL Java_speex_EchoCanceller_process
(JNIEnv * env, jobject jObj, jshortArray input_frame, jshortArray echo_frame)
{
//create native shorts from java shorts
jshort *native_input_frame = (*env)->GetShortArrayElements(env, input_frame, NULL);
jshort *native_echo_frame = (*env)->GetShortArrayElements(env, echo_frame, NULL);
//allocate memory for output data
jint length = (*env)->GetArrayLength(env, input_frame);
jshortArray temp = (*env)->NewShortArray(env, length);
jshort *native_output_frame = (*env)->GetShortArrayElements(env, temp, 0);
//call echo cancellation
speex_echo_cancellation(st, native_input_frame, native_echo_frame, native_output_frame);
//preprocess output frame
speex_preprocess_run(den, native_output_frame);
//convert native output to java layer output
jshortArray output_shorts = (*env)->NewShortArray(env, length);
(*env)->SetShortArrayRegion(env, output_shorts, 0, length, native_output_frame);
//cleanup and return
(*env)->ReleaseShortArrayElements(env, input_frame, native_input_frame, 0);
(*env)->ReleaseShortArrayElements(env, echo_frame, native_echo_frame, 0);
(*env)->ReleaseShortArrayElements(env, temp, native_output_frame, 0);
return output_shorts;   
}
JNIEXPORT void JNICALL Java_speex_EchoCanceller_close
(JNIEnv *env, jobject jObj)
{
//close
speex_echo_state_destroy(st);
speex_preprocess_state_destroy(den);
}

您可以在speex库的源代码中找到有用的示例,如编码、解码、回声消除(http://www.speex.org/downloads/)

您是否正确地对齐了远端信号(您称之为recv)和近端信号(您也称之为record)?总有一些回放/记录延迟需要考虑。这通常需要将远端信号在环形缓冲器中缓冲一段特定的时间。在PC上,这通常是50-120毫秒。在安卓系统上,我怀疑它要高得多。可能在150-400毫秒的范围内。我建议使用带有speex的100ms尾部长度,并调整远端缓冲区的大小,直到AEC收敛。这些变化应该允许AEC收敛,独立于预处理器的包含,这在这里是不需要的。

相关内容

  • 没有找到相关文章

最新更新