使用Librosa从blob中提取特征



我正试图在前端录制一条语音消息,并将其发送到Django后端,以根据语音性别识别的ML算法进行测试。在前端,我使用videojs记录语音,并使用AJAX将blob发送到后端,如下所示:

{% extends 'base.html' %}
{% load static %}
{% block title %}Voice Detector{% endblock %}
{% block extracss %}
<link href="{% static 'css/voice_detector.css' %}" rel="stylesheet" />
<link href="{% static 'css/video-js.css' %}" rel="stylesheet" />
<link href="{% static 'css/all.min.css' %}" rel="stylesheet" />
<link href="{% static 'css/videojs.wavesurfer.min.css' %}" rel="stylesheet" />
<link href="{% static 'css/videojs.record.css' %}" rel="stylesheet" />
{% endblock %}
{% block content %}
<div class="banner">
<div class="max-width">
<div class="banner-content">
<p class="motto">Test your voice right now!</p>
<p class="description">
Register your voice while reading the text below and our program will
detect your gender in a few seconds!
</p>
</div>
</div>
</div>
<div class="details">
<section class="section">
<div class="container">
<div class="columns">
<div class="column is-offset-4 is-4">
<h1 class="title">Record audio</h1>
<article class="message is-success" id="alert">
<div class="message-header">
<p>Recorded successfully!</p>
<button class="delete" aria-label="delete"></button>
</div>
<div class="message-body">
You have successfully recorded your message. You can now click on
the Submit button to post it.
</div>
</article>
<div class="field">
<div
class="control has-icons-left has-icons-right"
style="margin-top: 1rem"
>
<audio
id="recordAudio"
class="video-js vjs-default-skin"
></audio>
</div>
<div class="control" style="margin-top: 1rem">
<button class="home-btn" id="submit" disabled>Submit</button>
</div>
</div>
</div>
</div>
</div>
</section>
</div>
{% endblock %}
{% block extrajs %}
<script src="{% static 'js/video.min.js' %}"></script>
<script src="{% static 'js/RecordRTC.js' %}"></script>
<script src="{% static 'js/adapter-latest.js' %}"></script>
<script src="{% static 'js/wavesurfer.js' %}"></script>
<script src="{% static 'js/wavesurfer.microphone.min.js' %}"></script>
<script src="{% static 'js/videojs.wavesurfer.min.js' %}"></script>
<script src="{% static 'js/videojs.record.min.js' %}"></script>
<script src="{% static 'js/browser-workaround.js' %}"></script>
<script>
// First lets hide the message
document.getElementById("alert").style.display = "none";
// Next, declare the options that will passed into the recording constructor
const options = {
controls: true,
bigPlayButton: false,
width: 600,
height: 300,
fluid: true, // this ensures that it's responsive
plugins: {
wavesurfer: {
backend: "WebAudio",
waveColor: "#f7fff7", // change the wave color here. Background color was set in the css above
progressColor: "#ffe66d",
displayMilliseconds: true,
debug: true,
cursorWidth: 1,
hideScrollbar: true,
plugins: [
// enable microphone plugin
WaveSurfer.microphone.create({
bufferSize: 4096,
numberOfInputChannels: 1,
numberOfOutputChannels: 1,
constraints: {
video: false,
audio: true,
},
}),
],
},
record: {
audio: true, // only audio is turned on
video: false, // you can turn this on as well if you prefer video recording.
maxLength: 180, // how long do you want the recording?
displayMilliseconds: true,
debug: true,
},
},
};
// apply audio workarounds for certain browsers
applyAudioWorkaround();
// create player and pass the the audio id we created then
var player = videojs("recordAudio", options, function () {
// print version information at startup
var msg =
"Using video.js " +
videojs.VERSION +
" with videojs-record " +
videojs.getPluginVersion("record") +
", videojs-wavesurfer " +
videojs.getPluginVersion("wavesurfer") +
", wavesurfer.js " +
WaveSurfer.VERSION +
" and recordrtc " +
RecordRTC.version;
videojs.log(msg);
});
// error handling
player.on("deviceError", function () {
console.log("device error:", player.deviceErrorCode);
});
player.on("error", function (element, error) {
console.error(error);
});
// user clicked the record button and started recording
player.on("startRecord", function () {
console.log("started recording!");
$("#submit").prop("disabled", true);
});
// user completed recording and stream is available
player.on("finishRecord", function () {
const audioFile = player.recordedData;
console.log("finished recording: ", audioFile);
$("#submit").prop("disabled", false);
document.getElementById("alert").style.display = "block";
});
// Give event listener to the submit button
$("#submit").on("click", function (event) {
event.preventDefault();
let btn = $(this);
//   change the button text and disable it
btn.html("Submitting...").prop("disabled", true).addClass("disable-btn");
//   create a new File with the recordedData and its name
const recordedFile = new File([player.recordedData], `test.wav`);
//   initializes an empty FormData
let data = new FormData();
//   appends the recorded file and language value
data.append("file", recordedFile);
//   post url endpoint
$.ajax({
url: "{% url 'detector' %}",
method: "POST",
data: data,
dataType: "json",
success: function (response) {
if (response.success) {
document.getElementById("alert").style.display = "block";
window.location.href = `${response.url}`;
} else {
btn.html("Error").prop("disabled", false);
}
},
error: function (error) {
console.error(error);
},
cache: false,
processData: false,
contentType: false,
});
});
</script>
{% endblock %}

在后端,我尝试使用wave将文件保存为有效的.wav,如下所示:

def post(self, request):
f = request.FILES['file']
with open('file.wav', 'wb+') as destination:
for chunk in f.chunks():
destination.write(chunk)
with open('file.wav', 'rb') as file:
file_content = file.read()
audio = wave.open('test.wav', 'wb')
audio.setnchannels(1)
audio.setnframes(1)
audio.setsampwidth(1)
audio.setframerate(8000)
audio.writeframes(file_content)
audio.close()
(prediction, probability) = predict('test.wav')
context["prediction"] = prediction
context["probability"] = probability*100
os.remove(file_name)
return render(request, self.template_name, context=context)

在这里,我尝试了两件事。第一次尝试是使用带"wb"的open将blob直接保存到文件中。这种方法的问题在于librosa抱怨文件类型无法识别。另一次尝试是使用wave,但无论我尝试什么,使用wave保存的文件都会导致噪声,预测算法也会失败。以下是我想如何使用记录语音的文件来进行预测:

def predict(file_name):
# construct the model
model = create_model()
# load the saved/trained weights
model.load_weights('model.h5')
# extract features and reshape it
features = extract_feature(file_name, mel=True).reshape(1, -1)
# predict the gender!
male_prob = model.predict(features)[0][0]
female_prob = 1 - male_prob
gender = "male" if male_prob > female_prob else "female"
if gender == "male":
return (gender, male_prob)
return (gender, female_prob)

这里是extract_feature函数,我在其中加载文件并处理它:

def extract_feature(file_name, **kwargs):
mfcc = kwargs.get("mfcc")
chroma = kwargs.get("chroma")
mel = kwargs.get("mel")
contrast = kwargs.get("contrast")
tonnetz = kwargs.get("tonnetz")
X, sample_rate = librosa.core.load(file_name)
if chroma or contrast:
stft = np.abs(librosa.stft(X))
result = np.array([])
if mfcc:
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
result = np.hstack((result, mfccs))
if chroma:
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, chroma))
if mel:
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
result = np.hstack((result, mel))
if contrast:
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
result = np.hstack((result, contrast))
if tonnetz:
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
result = np.hstack((result, tonnetz))
return result

我做错了什么?有没有办法创建一个与我在前端记录的blob内容相同的有效WAV文件?或者有没有一种方法可以直接使用我从前端收到的blob?

经过一番挖掘,我在Linux上发现了ffmpeg工具,并在python中使用它将文件从WebM格式转换为WAV。以下是解决方案:

def post(self, request):
webm_file = str(datetime.datetime.now()) + ".webm"
wav_file = str(datetime.datetime.now()) + ".wav"
f = request.FILES['file']
with open(webm_file, 'wb+') as destination:
for chunk in f.chunks():
destination.write(chunk)
destination.close()
# convert file from WebM to WAV format
subprocess.run(["ffmpeg", "-i", webm_file, "-vn", wav_file])
(prediction, probability) = predict(wav_file)
context["prediction"] = prediction
context["probability"] = probability*100
os.remove(webm_file)
os.remove(wav_file)
return render(request, self.template_name, context=context)

最新更新