为什么audioClip在这次Unity尝试中关闭了唇同步



我正在使用谷歌云文本到语音来获得实时音频剪辑。目标是将嘴唇移动到附着到身体的简单变换的y比例。然而,在播放剪辑时嘴唇的时间似乎不正确。有什么方法可以纠正吗?谢谢

using UnityEngine;
public class SyncMouthToAudio : MonoBehaviour
{
const float scaleYMin = 0.01f;
const float scaleYMax = 0.05f;
TextToSpeechVoice voice = null;
AudioSource audioSource = null;
float[] clipData = null;
const float updateStep = 0.1f;
float detectedLoudnessMin = Mathf.Infinity;
float detectedLoudnessMax = 0f;
const float updateSeconds = 0.096f;
float updateTime = 0f;
void Start()
{
voice = GetComponentInParent<TextToSpeechVoice>();
voice.onStarts += OnVoiceStarts;
voice.onEnds += OnVoiceEnds;
}
void Update()
{
updateTime += Time.deltaTime;
if (updateTime >= updateSeconds)
{
updateTime = 0f;
CheckLoudness();
}
}
void CheckLoudness()
{
float loudness = 0f;
if (audioSource != null && audioSource.isPlaying && audioSource.timeSamples > 0)
{
audioSource.clip.GetData(clipData, audioSource.timeSamples);
foreach (var sample in clipData)
{
loudness += Mathf.Abs(sample);
}
if      (loudness < detectedLoudnessMin) { detectedLoudnessMin = loudness; }
else if (loudness > detectedLoudnessMax) { detectedLoudnessMax = loudness; }
}
SetScaleByLoudness(loudness);
}
void SetScaleByLoudness(float loudness)
{
const float visibilityMultiplier = 15f;
float scaleY = scaleYMin;
bool detectedLoudness = loudness > 0f && detectedLoudnessMin < Mathf.Infinity &&
detectedLoudnessMax > 0f && detectedLoudnessMin < detectedLoudnessMax;
if (detectedLoudness)
{
float range = detectedLoudnessMax - detectedLoudnessMin;
float threshold = detectedLoudnessMin + range * 0.3f;
bool loudnessIsRelevantEnough = loudness >= threshold;
if (loudnessIsRelevantEnough)
{
float scaleRange = scaleYMax - scaleYMin;
float loudnessRange = detectedLoudnessMax - detectedLoudnessMin;
float scaleToLoudnessRatio = scaleRange / loudnessRange;
scaleY = scaleYMin + (loudness - detectedLoudnessMin) * scaleToLoudnessRatio * scaleYMax * visibilityMultiplier;
scaleY = Mathf.Clamp(scaleY, scaleYMin, scaleYMax);
}
}

transform.SetLocalScaleY(scaleY);
}
void OnVoiceStarts(AudioSource audioSource)
{
this.audioSource = audioSource;
clipData = new float[this.audioSource.clip.samples];
}
void OnVoiceEnds()
{
this.audioSource = null;
}
}

上面的一些注意事项:

  • 我玩过updateSeconds的各种值,也尝试过RepeatInvoke,但都没有用
  • 我已经使用了各种阈值(目标是让嘴唇在近乎静音的情况下闭上(,也完全取消了阈值检查,但这无济于事
  • 该代码试图自动确定典型的最小和最大响度(以便在独立于特定音频的最大范围内显示嘴巴(
  • 我已经将audioSource优先级设置为0,即最高优先级
  • 音频剪辑的内容事先不知道,因为它是由GPT-3 AI根据用户的要求实时编写的

感谢derHugo的帮助,下面通过向clipData提供一个恒定(更小(的数组大小(如512(来解决问题,这样它在getData:期间就不会抓取太多

using UnityEngine;
using System.Linq;
public class SyncMouthToAudio : MonoBehaviour
{
const float scaleYMin = 0.01f;
const float scaleYMax = 0.05f;
TextToSpeechVoice voice = null;
AudioSource audioSource = null;
float[] clipData = null;
int samplesPerCheck = 0;
float detectedLoudnessMin = Mathf.Infinity;
float detectedLoudnessMax = 0f;
const float checkEveryNSeconds = 0.05f;
void Start()
{
voice = GetComponentInParent<TextToSpeechVoice>();
voice.onStarts += OnVoiceStarts;
voice.onEnds += OnVoiceEnds;
}
void CheckLoudness()
{
float loudness = 0f;
if (audioSource != null && audioSource.isPlaying && audioSource.timeSamples > 0)
{
int offset = clipData.Length / 2;
int startSample = (int) Mathf.Clamp(
audioSource.timeSamples - offset, 0, audioSource.clip.samples - offset);
audioSource.clip.GetData(clipData, startSample);
loudness = clipData.Select(x => Mathf.Abs(x)).Average();
if      (loudness < detectedLoudnessMin) { detectedLoudnessMin = loudness; }
else if (loudness > detectedLoudnessMax) { detectedLoudnessMax = loudness; }
}
SetScaleByLoudness(loudness);
}
void SetScaleByLoudness(float loudness)
{
const float visibilityMultiplier = 15f;
float scaleY = scaleYMin;
if (loudness > 0f && detectedLoudnessMin < Mathf.Infinity &&
detectedLoudnessMax > 0f && detectedLoudnessMin < detectedLoudnessMax)
{
float range = detectedLoudnessMax - detectedLoudnessMin;
float scaleRange = scaleYMax - scaleYMin;
float loudnessRange = detectedLoudnessMax - detectedLoudnessMin;
float scaleToLoudnessRatio = scaleRange / loudnessRange;
scaleY = scaleYMin + (loudness - detectedLoudnessMin) *
scaleToLoudnessRatio * scaleYMax * visibilityMultiplier;
scaleY = Mathf.Clamp(scaleY, scaleYMin, scaleYMax);
}

transform.SetLocalScaleY(scaleY);
}
void OnVoiceStarts(AudioSource audioSource)
{
this.audioSource = audioSource;
float numberOfChecks = audioSource.clip.length / checkEveryNSeconds;
samplesPerCheck = (int) Mathf.Round(audioSource.clip.samples / numberOfChecks);
clipData = new float[samplesPerCheck];
CancelInvoke();
InvokeRepeating("CheckLoudness", checkEveryNSeconds * 0.5f, checkEveryNSeconds);
}
void OnVoiceEnds()
{
CancelInvoke();
this.audioSource = null;
}
}

相关内容

  • 没有找到相关文章

最新更新