快速傅里叶变换FFT内的幅度值因恒定输入信号而异



在我的节拍检测中,我使用快速傅里叶变换来检测音频信号中的低音。我录制的是独奏底鼓,不移动声音或音量。绘制随时间变化的值后。我得到非常量值。它们的差异非常大。也许你知道为什么会这样?我只能猜测,但也许我没有为 FFT 使用正确的缓冲区大小或窗口大小?

沿着绘制的图形和源代码向下

私有类 RecordingThread 扩展 Thread {

    private boolean mShallContinue = true;
    @Override
    public void run() {
// Compute the minimum required audio buffer size and allocate the
    // buffer.
    mBufferSize = 4096;// AudioRecord.getMinBufferSize(SAMPLING_RATE,
                        // //4096;//
                        // AudioFormat.CHANNEL_IN_MONO,
    mAudioBuffer = new short[1024];// [mBufferSize / 2];
    bufferDouble2 = new int[mBufferSize / 2];
    bufferDouble = new int[(blockSize - 1) * 2];
    camera = Camera.open();
}
        AudioRecord record = new AudioRecord(
                MediaRecorder.AudioSource.DEFAULT, SAMPLING_RATE,
                AudioFormat.CHANNEL_IN_MONO,
                AudioFormat.ENCODING_PCM_16BIT, mBufferSize);
        short[] buffer = new short[blockSize];
        double[] audioDataDoubles = new double[(blockSize * 2)];
        double[] re = new double[blockSize];
        double[] im = new double[blockSize];
        double[] magnitude = new double[blockSize];
        // start collecting data
        record.startRecording();
        DoubleFFT_1D fft = new DoubleFFT_1D(blockSize);
        synchronized (this) {
            while (shallContinue()) {
                /** decibels */
                record.read(mAudioBuffer, 0, 1024);
                // updateDecibelLevel();
                /** frequency */
                // /windowing!?
                for (int i = 0; i < mAudioBuffer.length; i++) {
                    bufferDouble2[i] = (int) mAudioBuffer[i];
                }
                for (int i = 0; i < blockSize - 1; i++) {
                    double x = -Math.PI + 2 * i * (Math.PI / blockSize);
                    double winValue = (1 + Math.cos(x)) / 2.0;
                    bufferDouble[i] = (int) (bufferDouble2[i] * winValue);
                }
                int bufferReadResult = record.read(buffer, 0, blockSize);
                // Read in the data from the mic to the array
                for (int i = 0; i < blockSize && i < bufferReadResult; i++) {
                    audioDataDoubles[2 * i] = (double) buffer[i] / 32768.0; // signed
                                                                            // 16
                                                                            // bit
                    audioDataDoubles[(2 * i) + 1] = 0.0;
                }
                // audiodataDoubles now holds data to work with
                fft.complexForward(audioDataDoubles); // complexForward
                for (int i = 0; i < blockSize; i++) {
                    // real is stored in first part of array
                    re[i] = audioDataDoubles[i * 2];
                    // imaginary is stored in the sequential part
                    im[i] = audioDataDoubles[(i * 2) + 1];
                    // magnitude is calculated by the square root of
                    // (imaginary^2 + real^2)
                    magnitude[i] = Math.sqrt((re[i] * re[i])
                            + (im[i] * im[i]));
                }
                magnitude[0] = 0.0;
                magnitude2 = magnitude[2];
                magnitude3 = magnitude[3];
                magnitude4 = magnitude[4];
                updateShortBuffer();
                bufferCount++;
                updateLongBuffer();
                // if (detectedRoomRMS == 200)
                updateFrequency();
                System.out.println(System.currentTimeMillis() + " M2: "
                        + magnitude2 + " M3: " + magnitude3 + " M4: "
                        + magnitude4 + " M5: " + magnitude[5] + " M10: "
                        + magnitude[10] + " M20: " + magnitude[20] + " M24: "
                        + magnitude[24] + " M48: " + magnitude[48] + " LONG20: "
                        + rms_Long_Buffer_five + " LONNG: "
                        + rms_Long_Buffer);
            }
            record.stop(); // stop recording please.
            record.release(); // Destroy the recording, PLEASE!
        }
    }
    /**
     * true if the thread should continue running or false if it should stop
     */
    private synchronized boolean shallContinue() {
        return mShallContinue;
    }
    /**
     * Notifies the thread that it should stop running at the next
     * opportunity.
     */
    private synchronized void stopRunning() {
        mShallContinue = false;
    }
}
// / post the output frequency to TextView
private void updateFrequency() {
    tvfreq.post(new Runnable() {
        String RoomRMS;
        String s;
        public void run() {
            if (RMSMessureDone == false) {
                String l = "..";
                String KK = "...";
                tvfreq.setTextColor(Color.WHITE);
                if ((rmsCounter > 10))
                    tvfreq.setText(KK); //
                else
                    tvfreq.setText(l);
            } else {
                BPM = round(BPM, 1);
                s = Double.toString(BPM);
                s = s + " bpm";
                tvfreq.setTextColor(Color.WHITE);
                tvfreq.setText((s));
                RoomRMS = Double.toString(detectedRoomRMS);
                tvdb.setText(RoomRMS);
            }
        }
    });
}

我想你看到的差异与发病与用于FFT的窗口的关系有关。

从根本上说,您使用的方法对于此问题是错误的:

1:信号

的性质:来自低音鼓的信号(我猜你可能是指鼓?),具有尖锐的起源(刚刚受到重击),衰减迅速。初始峰值在宽带宽下不相干;它本质上是白噪声。 虽然那里会有很多低频内容,但它不会占主导地位。 初始攻击后,鼓皮以其固有频率振动,输出远低于初始峰值。

2:通过方形窗口

查看:您当前正在对样本应用方形窗口函数。这不是一个成功的选择,因为它将能量溅到你不想要的地方。汉明和布莱克曼窗户是FFT的常见选择。

3:解决方案:使用FFT的根本缺陷是它是窗口的。DFT 的结果只是每个频率箱在窗口期间的贡献。 窗口周期限制您的时间分辨率(您只知道频率范围内的事件发生在窗口中的某个地方)。 另一方面,如果你想从FFT的低频箱中获得有意义的结果,奈奎斯特理论适用于窗口相对于测量信号的频率。 假设您以 44.1kHz 的频率采样,这意味着如果您想在 50Hz 下获得有意义的结果,则需要 2048 点 DFT。 现在每个窗口的周期为 0.047 秒(或大约 1/20 秒)。这是您在每次时间测量中的误差幅度。

目前有多种

时域起始检测算法通常用于节拍检测。 如果要检测信号的可能来源,则可以同时使用频域方法。

最新更新