AudioStream: Split into source and backend streams

This commit is contained in:
Stenzek
2025-12-22 14:09:19 +10:00
parent 471fba1445
commit abcaceeb80
16 changed files with 1312 additions and 1203 deletions

View File

@@ -2527,7 +2527,7 @@ void FullscreenUI::DrawCoverDownloaderWindow()
// TODO: Remove release once using move_only_function
std::unique_ptr<ProgressCallback> progress = OpenModalProgressDialog(FSUI_STR("Cover Downloader"), 1000.0f);
Host::QueueAsyncTask([progress = progress.release(), urls = StringUtil::SplitNewString(template_urls, '\n'),
use_serial_names = use_serial_names]() {
use_serial_names = use_serial_names]() {
Error error;
if (!GameList::DownloadCovers(
urls, use_serial_names, progress, &error, [](const GameList::Entry* entry, std::string save_path) {
@@ -4573,8 +4573,9 @@ void FullscreenUI::DrawAudioSettingsPage()
&AudioStream::GetBackendDisplayName, AudioBackend::Count);
DrawEnumSetting(bsi, FSUI_ICONVSTR(ICON_PF_SFX_SOUND_EFFECT_NOISE, "Stretch Mode"),
FSUI_CSTR("Determines quality of audio when not running at 100% speed."), "Audio", "StretchMode",
AudioStreamParameters::DEFAULT_STRETCH_MODE, &AudioStream::ParseStretchMode,
&AudioStream::GetStretchModeName, &AudioStream::GetStretchModeDisplayName, AudioStretchMode::Count);
AudioStreamParameters::DEFAULT_STRETCH_MODE, &CoreAudioStream::ParseStretchMode,
&CoreAudioStream::GetStretchModeName, &CoreAudioStream::GetStretchModeDisplayName,
AudioStretchMode::Count);
DrawIntRangeSetting(bsi, FSUI_ICONVSTR(ICON_FA_BUCKET, "Buffer Size"),
FSUI_VSTR("Determines the amount of audio buffered before being pulled by the host API."),
"Audio", "BufferMS", AudioStreamParameters::DEFAULT_BUFFER_MS, 10, 500, FSUI_CSTR("%d ms"));

View File

@@ -540,7 +540,7 @@ DEFINE_HOTKEY("AudioMute", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_NOOP("H
{
g_settings.audio_output_muted = !g_settings.audio_output_muted;
const s32 volume = System::GetAudioOutputVolume();
SPU::GetOutputStream()->SetOutputVolume(volume);
SPU::GetOutputStream().SetOutputVolume(volume);
if (g_settings.audio_output_muted)
{
Host::AddIconOSDMessage(OSDMessageType::Quick, "AudioControlHotkey", ICON_EMOJI_MUTED_SPEAKER,
@@ -576,7 +576,7 @@ DEFINE_HOTKEY("AudioVolumeUp", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_NOO
Truncate8(std::min<s32>(static_cast<s32>(System::GetAudioOutputVolume()) + 10, 200));
g_settings.audio_output_volume = volume;
g_settings.audio_fast_forward_volume = volume;
SPU::GetOutputStream()->SetOutputVolume(volume);
SPU::GetOutputStream().SetOutputVolume(volume);
Host::AddIconOSDMessage(OSDMessageType::Quick, "AudioControlHotkey", ICON_EMOJI_HIGH_VOLUME_SPEAKER,
fmt::format(TRANSLATE_FS("OSDMessage", "Volume: {}%"), volume));
}
@@ -590,7 +590,7 @@ DEFINE_HOTKEY("AudioVolumeDown", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_N
const u8 volume = Truncate8(std::max<s32>(static_cast<s32>(System::GetAudioOutputVolume()) - 10, 0));
g_settings.audio_output_volume = volume;
g_settings.audio_fast_forward_volume = volume;
SPU::GetOutputStream()->SetOutputVolume(volume);
SPU::GetOutputStream().SetOutputVolume(volume);
Host::AddIconOSDMessage(OSDMessageType::Quick, "AudioControlHotkey", ICON_EMOJI_MEDIUM_VOLUME_SPEAKER,
fmt::format(TRANSLATE_FS("OSDMessage", "Volume: {}%"), volume));
}

View File

@@ -5,7 +5,7 @@
#include "types.h"
#include "util/audio_stream.h"
#include "util/core_audio_stream.h"
#include "common/small_string.h"

View File

@@ -365,10 +365,8 @@ static void ManualTransferWrite(u16 value);
static void UpdateTransferEvent();
static void UpdateDMARequest();
static void CreateOutputStream();
namespace {
struct SPUState
struct ALIGN_TO_CACHE_LINE SPUState
{
TimingEvent transfer_event{"SPU Transfer", TRANSFER_TICKS_PER_HALFWORD, TRANSFER_TICKS_PER_HALFWORD,
&SPU::ExecuteTransfer, nullptr};
@@ -416,15 +414,15 @@ struct SPUState
std::array<std::array<s16, 64>, 2> reverb_upsample_buffer;
s32 reverb_resample_buffer_position = 0;
s16 last_reverb_input[2];
s32 last_reverb_output[2];
bool audio_output_muted = false;
ALIGN_TO_CACHE_LINE std::array<Voice, NUM_VOICES> voices{};
InlineFIFOQueue<u16, FIFO_SIZE_IN_HALFWORDS> transfer_fifo;
std::unique_ptr<AudioStream> audio_stream;
s16 last_reverb_input[2];
s32 last_reverb_output[2];
bool audio_output_muted = false;
CoreAudioStream audio_stream;
#ifdef SPU_DUMP_ALL_VOICES
// +1 for reverb output
@@ -440,7 +438,7 @@ struct SPUState
};
} // namespace
ALIGN_TO_CACHE_LINE static SPUState s_state;
static SPUState s_state;
ALIGN_TO_CACHE_LINE static std::array<u8, RAM_SIZE> s_ram{};
ALIGN_TO_CACHE_LINE static std::array<s16, (44100 / 60) * 2> s_muted_output_buffer{};
@@ -503,13 +501,11 @@ void SPU::CreateOutputStream()
AudioStream::GetBackendName(g_settings.audio_backend), static_cast<u32>(SAMPLE_RATE),
g_settings.audio_stream_parameters.buffer_ms, g_settings.audio_stream_parameters.output_latency_ms,
g_settings.audio_stream_parameters.output_latency_minimal ? " (or minimal)" : "",
AudioStream::GetStretchModeName(g_settings.audio_stream_parameters.stretch_mode));
CoreAudioStream::GetStretchModeName(g_settings.audio_stream_parameters.stretch_mode));
Error error;
s_state.audio_stream =
AudioStream::CreateStream(g_settings.audio_backend, SAMPLE_RATE, g_settings.audio_stream_parameters,
g_settings.audio_driver.c_str(), g_settings.audio_output_device.c_str(), &error);
if (!s_state.audio_stream)
if (!s_state.audio_stream.Initialize(g_settings.audio_backend, SAMPLE_RATE, g_settings.audio_stream_parameters,
g_settings.audio_driver.c_str(), g_settings.audio_output_device.c_str(), &error))
{
Host::AddIconOSDMessage(
OSDMessageType::Error, "SPUAudioStream", ICON_EMOJI_WARNING,
@@ -517,19 +513,13 @@ void SPU::CreateOutputStream()
TRANSLATE_FS("SPU",
"Failed to create or configure audio stream, falling back to null output. The error was:\n{}"),
error.GetDescription()));
s_state.audio_stream.reset();
s_state.audio_stream = AudioStream::CreateNullStream(SAMPLE_RATE, g_settings.audio_stream_parameters.buffer_ms);
s_state.audio_stream.Initialize(AudioBackend::Null, SAMPLE_RATE, g_settings.audio_stream_parameters, nullptr,
nullptr, nullptr);
}
s_state.audio_stream->SetOutputVolume(System::GetAudioOutputVolume());
s_state.audio_stream->SetNominalRate(System::GetAudioNominalRate());
s_state.audio_stream->SetPaused(System::IsPaused());
}
void SPU::RecreateOutputStream()
{
s_state.audio_stream.reset();
CreateOutputStream();
s_state.audio_stream.SetOutputVolume(System::GetAudioOutputVolume());
s_state.audio_stream.SetNominalRate(System::GetAudioNominalRate());
s_state.audio_stream.SetPaused(System::IsPaused());
}
void SPU::CPUClockChanged()
@@ -550,7 +540,7 @@ void SPU::Shutdown()
s_state.tick_event.Deactivate();
s_state.transfer_event.Deactivate();
s_state.audio_stream.reset();
s_state.audio_stream.Destroy();
}
void SPU::Reset()
@@ -1674,9 +1664,9 @@ void SPU::SetAudioOutputMuted(bool muted)
s_state.audio_output_muted = muted;
}
AudioStream* SPU::GetOutputStream()
CoreAudioStream& SPU::GetOutputStream()
{
return s_state.audio_stream.get();
return s_state.audio_stream;
}
void SPU::Voice::KeyOn()
@@ -2410,7 +2400,7 @@ void SPU::Execute(void* param, TickCount ticks, TickCount ticks_late)
if (!s_state.audio_output_muted) [[likely]]
{
output_frame_space = remaining_frames;
s_state.audio_stream->BeginWrite(&output_frame_start, &output_frame_space);
s_state.audio_stream.BeginWrite(&output_frame_start, &output_frame_space);
}
else
{
@@ -2536,7 +2526,7 @@ void SPU::Execute(void* param, TickCount ticks, TickCount ticks_late)
}
#ifndef __ANDROID__
if (MediaCapture* cap = System::GetMediaCapture(); cap && !s_state.audio_output_muted) [[unlikely]]
if (MediaCapture* cap = System::GetMediaCapture()) [[unlikely]]
{
if (!cap->DeliverAudioFrames(output_frame_start, frames_in_this_batch))
System::StopMediaCapture();
@@ -2544,7 +2534,7 @@ void SPU::Execute(void* param, TickCount ticks, TickCount ticks_late)
#endif
if (!s_state.audio_output_muted) [[likely]]
s_state.audio_stream->EndWrite(frames_in_this_batch);
s_state.audio_stream.EndWrite(frames_in_this_batch);
remaining_frames -= frames_in_this_batch;
}
}
@@ -2554,7 +2544,7 @@ void SPU::UpdateEventInterval()
// Don't generate more than the audio buffer since in a single slice, otherwise we'll both overflow the buffers when
// we do write it, and the audio thread will underflow since it won't have enough data it the game isn't messing with
// the SPU state.
const u32 max_slice_frames = s_state.audio_stream->GetBufferSize();
const u32 max_slice_frames = s_state.audio_stream.GetBufferSize();
// TODO: Make this predict how long until the interrupt will be hit instead...
const u32 interval = (s_state.SPUCNT.enable && s_state.SPUCNT.irq9_enable) ? 1 : max_slice_frames;

View File

@@ -9,7 +9,7 @@
class StateWrapper;
class AudioStream;
class CoreAudioStream;
namespace SPU {
@@ -47,7 +47,7 @@ std::array<u8, RAM_SIZE>& GetWritableRAM();
bool IsAudioOutputMuted();
void SetAudioOutputMuted(bool muted);
AudioStream* GetOutputStream();
void RecreateOutputStream();
CoreAudioStream& GetOutputStream();
void CreateOutputStream();
}; // namespace SPU

View File

@@ -1639,7 +1639,7 @@ void System::PauseSystem(bool paused)
return;
s_state.state = (paused ? State::Paused : State::Running);
SPU::GetOutputStream()->SetPaused(paused);
SPU::GetOutputStream().SetPaused(paused);
GPUThread::RunOnThread([paused]() { GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::SystemPaused, paused); });
if (paused)
@@ -1905,7 +1905,7 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error)
// Good to go.
s_state.state = State::Running;
std::atomic_thread_fence(std::memory_order_release);
SPU::GetOutputStream()->SetPaused(false);
SPU::GetOutputStream().SetPaused(false);
// Immediately pausing?
const bool start_paused = (ShouldStartPaused() || parameters.override_start_paused.value_or(false));
@@ -3666,9 +3666,9 @@ void System::AccumulatePreFrameSleepTime(Timer::Value current_time)
void System::FormatLatencyStats(SmallStringBase& str)
{
AudioStream* audio_stream = SPU::GetOutputStream();
CoreAudioStream& audio_stream = SPU::GetOutputStream();
const u32 audio_latency =
AudioStream::GetMSForBufferSize(audio_stream->GetSampleRate(), audio_stream->GetBufferedFramesRelaxed());
CoreAudioStream::GetMSForBufferSize(audio_stream.GetSampleRate(), audio_stream.GetBufferedFramesRelaxed());
const u32 queued_frame_count = GPUBackend::GetQueuedFrameCount();
const double active_frame_time = std::ceil(Timer::ConvertValueToMilliseconds(s_state.last_active_frame_time));
@@ -3725,13 +3725,13 @@ void System::UpdateSpeedLimiterState()
VERBOSE_LOG("Preset timing: {}", s_state.optimal_frame_pacing ? "consistent" : "immediate");
// Update audio output.
AudioStream* stream = SPU::GetOutputStream();
stream->SetOutputVolume(GetAudioOutputVolume());
stream->SetNominalRate(GetAudioNominalRate());
CoreAudioStream& stream = SPU::GetOutputStream();
stream.SetOutputVolume(GetAudioOutputVolume());
stream.SetNominalRate(GetAudioNominalRate());
// Only empty stretch buffers when we're decreasing speed.
if (s_state.target_speed != prev_speed && (prev_speed > s_state.target_speed || prev_speed == 0.0f))
stream->EmptyStretchBuffers();
stream.EmptyStretchBuffers();
UpdateThrottlePeriod();
ResetThrottler();
@@ -4494,7 +4494,8 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
if (g_settings.audio_backend != old_settings.audio_backend ||
g_settings.audio_driver != old_settings.audio_driver ||
g_settings.audio_output_device != old_settings.audio_output_device)
g_settings.audio_output_device != old_settings.audio_output_device ||
g_settings.audio_stream_parameters != old_settings.audio_stream_parameters)
{
if (g_settings.audio_backend != old_settings.audio_backend)
{
@@ -4503,14 +4504,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
AudioStream::GetBackendDisplayName(g_settings.audio_backend)));
}
SPU::RecreateOutputStream();
}
if (g_settings.audio_stream_parameters.stretch_mode != old_settings.audio_stream_parameters.stretch_mode)
SPU::GetOutputStream()->SetStretchMode(g_settings.audio_stream_parameters.stretch_mode);
if (g_settings.audio_stream_parameters != old_settings.audio_stream_parameters)
{
SPU::RecreateOutputStream();
UpdateSpeedLimiterState();
SPU::CreateOutputStream();
}
if (g_settings.emulation_speed != old_settings.emulation_speed)
@@ -4553,7 +4547,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
InterruptExecution();
}
SPU::GetOutputStream()->SetOutputVolume(GetAudioOutputVolume());
SPU::GetOutputStream().SetOutputVolume(GetAudioOutputVolume());
// CPU side GPU settings
if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode ||
@@ -5432,7 +5426,7 @@ void System::UpdateVolume()
if (!IsValid())
return;
SPU::GetOutputStream()->SetOutputVolume(GetAudioOutputVolume());
SPU::GetOutputStream().SetOutputVolume(GetAudioOutputVolume());
}
std::string System::GetScreenshotPath(const char* extension)

View File

@@ -27,8 +27,9 @@ AudioSettingsWidget::AudioSettingsWidget(SettingsWindow* dialog, QWidget* parent
sif, m_ui.audioBackend, "Audio", "Backend", &AudioStream::ParseBackendName, &AudioStream::GetBackendName,
&AudioStream::GetBackendDisplayName, AudioStream::DEFAULT_BACKEND, AudioBackend::Count);
SettingWidgetBinder::BindWidgetToEnumSetting(
sif, m_ui.stretchMode, "Audio", "StretchMode", &AudioStream::ParseStretchMode, &AudioStream::GetStretchModeName,
&AudioStream::GetStretchModeDisplayName, AudioStreamParameters::DEFAULT_STRETCH_MODE, AudioStretchMode::Count);
sif, m_ui.stretchMode, "Audio", "StretchMode", &CoreAudioStream::ParseStretchMode,
&CoreAudioStream::GetStretchModeName, &CoreAudioStream::GetStretchModeDisplayName,
AudioStreamParameters::DEFAULT_STRETCH_MODE, AudioStretchMode::Count);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.bufferMS, "Audio", "BufferMS",
AudioStreamParameters::DEFAULT_BUFFER_MS);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.outputLatencyMS, "Audio", "OutputLatencyMS",
@@ -116,10 +117,10 @@ AudioSettingsWidget::~AudioSettingsWidget() = default;
void AudioSettingsWidget::onStretchModeChanged()
{
const AudioStretchMode stretch_mode =
AudioStream::ParseStretchMode(
CoreAudioStream::ParseStretchMode(
m_dialog
->getEffectiveStringValue("Audio", "StretchMode",
AudioStream::GetStretchModeName(AudioStreamParameters::DEFAULT_STRETCH_MODE))
CoreAudioStream::GetStretchModeName(AudioStreamParameters::DEFAULT_STRETCH_MODE))
.c_str())
.value_or(AudioStreamParameters::DEFAULT_STRETCH_MODE);
m_ui.stretchSettings->setEnabled(stretch_mode != AudioStretchMode::Off);
@@ -219,7 +220,7 @@ void AudioSettingsWidget::updateLatencyLabel()
m_ui.bufferMSLabel->setText(tr("%1 ms").arg(config_buffer_ms));
const u32 output_latency_ms = minimal_output ?
AudioStream::GetMSForBufferSize(SPU::SAMPLE_RATE, m_output_device_latency) :
CoreAudioStream::GetMSForBufferSize(SPU::SAMPLE_RATE, m_output_device_latency) :
config_output_latency_ms;
if (output_latency_ms > 0)
{

View File

@@ -17,6 +17,8 @@ add_library(util
cd_image_ppf.cpp
compress_helpers.cpp
compress_helpers.h
core_audio_stream.cpp
core_audio_stream.h
cue_parser.cpp
cue_parser.h
dyn_shaderc.h

View File

@@ -4,25 +4,11 @@
#include "audio_stream.h"
#include "translation.h"
#include "common/align.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/gsvector.h"
#include "common/log.h"
#include "common/settings_interface.h"
#include "common/timer.h"
#include "soundtouch/SoundTouch.h"
#include "soundtouch/SoundTouchDLL.h"
AudioStream::AudioStream() = default;
#include <algorithm>
#include <cmath>
#include <cstring>
#include <limits>
LOG_CHANNEL(AudioStream);
static constexpr bool LOG_TIMESTRETCH_STATS = false;
AudioStream::~AudioStream() = default;
AudioStream::DeviceInfo::DeviceInfo(std::string name_, std::string display_name_, u32 minimum_latency_)
: name(std::move(name_)), display_name(std::move(display_name_)), minimum_latency_frames(minimum_latency_)
@@ -31,174 +17,6 @@ AudioStream::DeviceInfo::DeviceInfo(std::string name_, std::string display_name_
AudioStream::DeviceInfo::~DeviceInfo() = default;
void AudioStreamParameters::Load(const SettingsInterface& si, const char* section)
{
stretch_mode =
AudioStream::ParseStretchMode(
si.GetStringValue(section, "StretchMode", AudioStream::GetStretchModeName(DEFAULT_STRETCH_MODE)).c_str())
.value_or(DEFAULT_STRETCH_MODE);
output_latency_ms = static_cast<u16>(std::min<u32>(
si.GetUIntValue(section, "OutputLatencyMS", DEFAULT_OUTPUT_LATENCY_MS), std::numeric_limits<u16>::max()));
output_latency_minimal = si.GetBoolValue(section, "OutputLatencyMinimal", DEFAULT_OUTPUT_LATENCY_MINIMAL);
buffer_ms = static_cast<u16>(
std::min<u32>(si.GetUIntValue(section, "BufferMS", DEFAULT_BUFFER_MS), std::numeric_limits<u16>::max()));
stretch_sequence_length_ms =
static_cast<u16>(std::min<u32>(si.GetUIntValue(section, "StretchSequenceLengthMS", DEFAULT_STRETCH_SEQUENCE_LENGTH),
std::numeric_limits<u16>::max()));
stretch_seekwindow_ms = static_cast<u16>(std::min<u32>(
si.GetUIntValue(section, "StretchSeekWindowMS", DEFAULT_STRETCH_SEEKWINDOW), std::numeric_limits<u16>::max()));
stretch_overlap_ms = static_cast<u16>(std::min<u32>(
si.GetUIntValue(section, "StretchOverlapMS", DEFAULT_STRETCH_OVERLAP), std::numeric_limits<u16>::max()));
stretch_use_quickseek = si.GetBoolValue(section, "StretchUseQuickSeek", DEFAULT_STRETCH_USE_QUICKSEEK);
stretch_use_aa_filter = si.GetBoolValue(section, "StretchUseAAFilter", DEFAULT_STRETCH_USE_AA_FILTER);
}
void AudioStreamParameters::Save(SettingsInterface& si, const char* section) const
{
si.SetStringValue(section, "StretchMode", AudioStream::GetStretchModeName(stretch_mode));
si.SetUIntValue(section, "BufferMS", buffer_ms);
si.SetUIntValue(section, "OutputLatencyMS", output_latency_ms);
si.SetBoolValue(section, "OutputLatencyMinimal", output_latency_minimal);
si.SetUIntValue(section, "StretchSequenceLengthMS", stretch_sequence_length_ms);
si.SetUIntValue(section, "StretchSeekWindowMS", stretch_seekwindow_ms);
si.SetUIntValue(section, "StretchOverlapMS", stretch_overlap_ms);
si.SetBoolValue(section, "StretchUseQuickSeek", stretch_use_quickseek);
si.SetBoolValue(section, "StretchUseAAFilter", stretch_use_aa_filter);
}
void AudioStreamParameters::Clear(SettingsInterface& si, const char* section)
{
si.DeleteValue(section, "StretchMode");
si.DeleteValue(section, "ExpansionMode");
si.DeleteValue(section, "BufferMS");
si.DeleteValue(section, "OutputLatencyMS");
si.DeleteValue(section, "OutputLatencyMinimal");
si.DeleteValue(section, "StretchSequenceLengthMS");
si.DeleteValue(section, "StretchSeekWindowMS");
si.DeleteValue(section, "StretchOverlapMS");
si.DeleteValue(section, "StretchUseQuickSeek");
si.DeleteValue(section, "StretchUseAAFilter");
}
bool AudioStreamParameters::operator!=(const AudioStreamParameters& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
}
bool AudioStreamParameters::operator==(const AudioStreamParameters& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
}
AudioStream::AudioStream(u32 sample_rate, const AudioStreamParameters& parameters)
: m_sample_rate(sample_rate), m_parameters(parameters)
{
}
AudioStream::~AudioStream()
{
StretchDestroy();
DestroyBuffer();
}
std::unique_ptr<AudioStream> AudioStream::CreateNullStream(u32 sample_rate, u32 buffer_ms)
{
// no point stretching with no output
AudioStreamParameters params;
params.stretch_mode = AudioStretchMode::Off;
params.buffer_ms = static_cast<u16>(buffer_ms);
std::unique_ptr<AudioStream> stream(new AudioStream(sample_rate, params));
stream->BaseInitialize();
return stream;
}
std::vector<std::pair<std::string, std::string>> AudioStream::GetDriverNames(AudioBackend backend)
{
std::vector<std::pair<std::string, std::string>> ret;
switch (backend)
{
#ifndef __ANDROID__
case AudioBackend::Cubeb:
ret = GetCubebDriverNames();
break;
#endif
default:
break;
}
return ret;
}
std::vector<AudioStream::DeviceInfo> AudioStream::GetOutputDevices(AudioBackend backend, const char* driver,
u32 sample_rate)
{
std::vector<AudioStream::DeviceInfo> ret;
switch (backend)
{
#ifndef __ANDROID__
case AudioBackend::Cubeb:
ret = GetCubebOutputDevices(driver, sample_rate);
break;
#endif
default:
break;
}
return ret;
}
std::unique_ptr<AudioStream> AudioStream::CreateStream(AudioBackend backend, u32 sample_rate,
const AudioStreamParameters& parameters, const char* driver_name,
const char* device_name, Error* error /* = nullptr */)
{
switch (backend)
{
#ifndef __ANDROID__
case AudioBackend::Cubeb:
return CreateCubebAudioStream(sample_rate, parameters, driver_name, device_name, error);
case AudioBackend::SDL:
return CreateSDLAudioStream(sample_rate, parameters, error);
#else
case AudioBackend::AAudio:
return CreateAAudioAudioStream(sample_rate, parameters, error);
case AudioBackend::OpenSLES:
return CreateOpenSLESAudioStream(sample_rate, parameters, error);
#endif
case AudioBackend::Null:
return CreateNullStream(sample_rate, parameters.buffer_ms);
default:
Error::SetStringView(error, "Unknown audio backend.");
return nullptr;
}
}
u32 AudioStream::GetAlignedBufferSize(u32 size)
{
static_assert(Common::IsPow2(CHUNK_SIZE));
return Common::AlignUpPow2(size, CHUNK_SIZE);
}
u32 AudioStream::GetBufferSizeForMS(u32 sample_rate, u32 ms)
{
return GetAlignedBufferSize((ms * sample_rate) / 1000u);
}
u32 AudioStream::GetMSForBufferSize(u32 sample_rate, u32 buffer_size)
{
buffer_size = GetAlignedBufferSize(buffer_size);
return (buffer_size * 1000u) / sample_rate;
}
static constexpr const std::array s_backend_names = {
"Null",
#ifndef __ANDROID__
@@ -244,669 +62,75 @@ const char* AudioStream::GetBackendDisplayName(AudioBackend backend)
return Host::TranslateToCString("AudioStream", s_backend_display_names[static_cast<int>(backend)]);
}
static constexpr const std::array s_stretch_mode_names = {
"None",
"Resample",
"TimeStretch",
};
static constexpr const std::array s_stretch_mode_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "Off (Noisy)", "AudioStretchMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Resampling (Pitch Shift)", "AudioStretchMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Time Stretch (Tempo Change, Best Sound)", "AudioStretchMode"),
};
const char* AudioStream::GetStretchModeName(AudioStretchMode mode)
u32 AudioStream::FramesToMS(u32 sample_rate, u32 frames)
{
return (static_cast<size_t>(mode) < s_stretch_mode_names.size()) ? s_stretch_mode_names[static_cast<size_t>(mode)] :
"";
return (frames * 1000) / sample_rate;
}
const char* AudioStream::GetStretchModeDisplayName(AudioStretchMode mode)
std::vector<std::pair<std::string, std::string>> AudioStream::GetDriverNames(AudioBackend backend)
{
return (static_cast<size_t>(mode) < s_stretch_mode_display_names.size()) ?
Host::TranslateToCString("Settings", s_stretch_mode_display_names[static_cast<size_t>(mode)],
"AudioStretchMode") :
"";
}
std::optional<AudioStretchMode> AudioStream::ParseStretchMode(const char* name)
{
for (size_t i = 0; i < static_cast<u8>(AudioStretchMode::Count); i++)
std::vector<std::pair<std::string, std::string>> ret;
switch (backend)
{
if (std::strcmp(name, s_stretch_mode_names[i]) == 0)
return static_cast<AudioStretchMode>(i);
}
return std::nullopt;
}
u32 AudioStream::GetBufferedFramesRelaxed() const
{
const u32 rpos = m_rpos.load(std::memory_order_relaxed);
const u32 wpos = m_wpos.load(std::memory_order_relaxed);
return (wpos + m_buffer_size - rpos) % m_buffer_size;
}
void AudioStream::ReadFrames(SampleType* samples, u32 num_frames)
{
const u32 available_frames = GetBufferedFramesRelaxed();
u32 frames_to_read = num_frames;
u32 silence_frames = 0;
if (m_filling)
{
u32 toFill = m_buffer_size / ((m_parameters.stretch_mode != AudioStretchMode::TimeStretch) ? 32 : 400);
toFill = GetAlignedBufferSize(toFill);
if (available_frames < toFill)
{
silence_frames = num_frames;
frames_to_read = 0;
}
else
{
m_filling = false;
VERBOSE_LOG("Underrun compensation done ({} frames buffered)", toFill);
}
}
if (available_frames < frames_to_read)
{
silence_frames = frames_to_read - available_frames;
frames_to_read = available_frames;
m_filling = true;
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
StretchUnderrun();
}
if (frames_to_read > 0)
{
u32 rpos = m_rpos.load(std::memory_order_acquire);
u32 end = m_buffer_size - rpos;
if (end > frames_to_read)
end = frames_to_read;
// towards the end of the buffer
if (end > 0)
{
std::memcpy(samples, &m_buffer[rpos * NUM_CHANNELS], end * NUM_CHANNELS * sizeof(SampleType));
rpos += end;
rpos = (rpos == m_buffer_size) ? 0 : rpos;
}
// after wrapping around
const u32 start = frames_to_read - end;
if (start > 0)
{
std::memcpy(&samples[end * NUM_CHANNELS], &m_buffer[0], start * NUM_CHANNELS * sizeof(SampleType));
rpos = start;
}
m_rpos.store(rpos, std::memory_order_release);
}
if (silence_frames > 0)
{
if (frames_to_read > 0)
{
// super basic resampler - spread the input samples evenly across the output samples. will sound like ass and have
// aliasing, but better than popping by inserting silence.
const u32 increment =
static_cast<u32>(65536.0f * (static_cast<float>(frames_to_read) / static_cast<float>(num_frames)));
SampleType* resample_ptr = static_cast<SampleType*>(alloca(frames_to_read * NUM_CHANNELS * sizeof(SampleType)));
std::memcpy(resample_ptr, samples, frames_to_read * NUM_CHANNELS * sizeof(SampleType));
SampleType* out_ptr = samples;
const u32 copy_stride = sizeof(SampleType) * NUM_CHANNELS;
u32 resample_subpos = 0;
for (u32 i = 0; i < num_frames; i++)
{
std::memcpy(out_ptr, resample_ptr, copy_stride);
out_ptr += NUM_CHANNELS;
resample_subpos += increment;
resample_ptr += (resample_subpos >> 16) * NUM_CHANNELS;
resample_subpos %= 65536u;
}
VERBOSE_LOG("Audio buffer underflow, resampled {} frames to {}", frames_to_read, num_frames);
}
else
{
// no data, fall back to silence
std::memset(samples + (frames_to_read * NUM_CHANNELS), 0, silence_frames * NUM_CHANNELS * sizeof(s16));
}
}
if (m_volume != 100)
{
u32 num_samples = num_frames * NUM_CHANNELS;
const u32 aligned_samples = Common::AlignDownPow2(num_samples, 8);
num_samples -= aligned_samples;
const float volume_mult = static_cast<float>(m_volume) / 100.0f;
const GSVector4 volume_multv = GSVector4(volume_mult);
const SampleType* const aligned_samples_end = samples + aligned_samples;
for (; samples != aligned_samples_end; samples += 8)
{
GSVector4i iv = GSVector4i::load<false>(samples); // [0, 1, 2, 3, 4, 5, 6, 7]
GSVector4i iv1 = iv.upl16(iv); // [0, 0, 1, 1, 2, 2, 3, 3]
GSVector4i iv2 = iv.uph16(iv); // [4, 4, 5, 5, 6, 6, 7, 7]
iv1 = iv1.sra32<16>(); // [0, 1, 2, 3]
iv2 = iv2.sra32<16>(); // [4, 5, 6, 7]
GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
fv1 = fv1 * volume_multv; // [f0, f1, f2, f3]
fv2 = fv2 * volume_multv; // [f4, f5, f6, f7]
iv1 = GSVector4i(fv1); // [0, 1, 2, 3]
iv2 = GSVector4i(fv2); // [4, 5, 6, 7]
iv = iv1.ps32(iv2); // [0, 1, 2, 3, 4, 5, 6, 7]
GSVector4i::store<false>(samples, iv);
}
while (num_samples > 0)
{
*samples = static_cast<s16>(std::clamp(static_cast<float>(*samples) * volume_mult, -32768.0f, 32767.0f));
samples++;
num_samples--;
}
}
}
void AudioStream::InternalWriteFrames(s16* data, u32 num_frames)
{
const u32 free = m_buffer_size - GetBufferedFramesRelaxed();
if (free <= num_frames)
{
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
{
StretchOverrun();
}
else
{
DEBUG_LOG("Buffer overrun, chunk dropped");
return;
}
}
u32 wpos = m_wpos.load(std::memory_order_acquire);
// wrapping around the end of the buffer?
if ((m_buffer_size - wpos) <= num_frames)
{
// needs to be written in two parts
const u32 end = m_buffer_size - wpos;
const u32 start = num_frames - end;
// start is zero when this chunk reaches exactly the end
std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, end * NUM_CHANNELS * sizeof(SampleType));
if (start > 0)
std::memcpy(&m_buffer[0], data + end * NUM_CHANNELS, start * NUM_CHANNELS * sizeof(SampleType));
wpos = start;
}
else
{
// no split
std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, num_frames * NUM_CHANNELS * sizeof(SampleType));
wpos += num_frames;
}
m_wpos.store(wpos, std::memory_order_release);
}
void AudioStream::BaseInitialize()
{
AllocateBuffer();
StretchAllocate();
}
void AudioStream::AllocateBuffer()
{
// Stretcher can produce a large amount of samples from few samples when running slow, so allocate a larger buffer.
// In most cases it's not going to be used, but better to have a larger buffer and not need it than overrun.
const u32 multiplier = (m_parameters.stretch_mode == AudioStretchMode::TimeStretch) ?
16 :
((m_parameters.stretch_mode == AudioStretchMode::Off) ? 1 : 2);
m_buffer_size = GetAlignedBufferSize(((m_parameters.buffer_ms * multiplier) * m_sample_rate) / 1000);
m_target_buffer_size = GetAlignedBufferSize((m_sample_rate * m_parameters.buffer_ms) / 1000u);
m_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, m_buffer_size * NUM_CHANNELS);
m_staging_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
m_float_buffer = Common::make_unique_aligned_for_overwrite<float[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
DEV_LOG("Allocated buffer of {} frames for buffer of {} ms [stretch {}, target size {}].", m_buffer_size,
m_parameters.buffer_ms, GetStretchModeName(m_parameters.stretch_mode), m_target_buffer_size);
}
void AudioStream::DestroyBuffer()
{
m_staging_buffer.reset();
m_float_buffer.reset();
m_buffer.reset();
m_buffer_size = 0;
m_wpos.store(0, std::memory_order_release);
m_rpos.store(0, std::memory_order_release);
}
void AudioStream::EmptyBuffer()
{
if (IsStretchEnabled())
{
soundtouch_clear(m_soundtouch);
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
soundtouch_setTempo(m_soundtouch, m_nominal_rate);
}
m_wpos.store(m_rpos.load(std::memory_order_acquire), std::memory_order_release);
}
void AudioStream::SetNominalRate(float tempo)
{
m_nominal_rate = tempo;
if (m_parameters.stretch_mode == AudioStretchMode::Resample)
soundtouch_setRate(m_soundtouch, tempo);
else if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch && !m_stretch_inactive)
soundtouch_setTempo(m_soundtouch, tempo);
}
void AudioStream::SetStretchMode(AudioStretchMode mode)
{
if (m_parameters.stretch_mode == mode)
return;
// can't resize the buffers while paused
bool paused = m_paused;
if (!paused)
SetPaused(true);
DestroyBuffer();
StretchDestroy();
m_parameters.stretch_mode = mode;
AllocateBuffer();
if (m_parameters.stretch_mode != AudioStretchMode::Off)
StretchAllocate();
if (!paused)
SetPaused(false);
}
void AudioStream::SetPaused(bool paused)
{
m_paused = paused;
}
void AudioStream::SetOutputVolume(u32 volume)
{
m_volume = volume;
}
void AudioStream::BeginWrite(SampleType** buffer_ptr, u32* num_frames)
{
// TODO: Write directly to buffer when not using stretching.
*buffer_ptr = &m_staging_buffer[m_staging_buffer_pos];
*num_frames = CHUNK_SIZE - (m_staging_buffer_pos / NUM_CHANNELS);
}
static void S16ChunkToFloat(const s16* src, float* dst, u32 num_samples)
{
constexpr GSVector4 S16_TO_FLOAT_V = GSVector4::cxpr(1.0f / 32767.0f);
const u32 iterations = (num_samples + 7) / 8;
for (u32 i = 0; i < iterations; i++)
{
const GSVector4i sv = GSVector4i::load<true>(src);
src += 8;
GSVector4i iv1 = sv.upl16(sv); // [0, 0, 1, 1, 2, 2, 3, 3]
GSVector4i iv2 = sv.uph16(sv); // [4, 4, 5, 5, 6, 6, 7, 7]
iv1 = iv1.sra32<16>(); // [0, 1, 2, 3]
iv2 = iv2.sra32<16>(); // [4, 5, 6, 7]
GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
fv1 = fv1 * S16_TO_FLOAT_V;
fv2 = fv2 * S16_TO_FLOAT_V;
GSVector4::store<true>(dst + 0, fv1);
GSVector4::store<true>(dst + 4, fv2);
dst += 8;
}
}
static void FloatChunkToS16(s16* dst, const float* src, u32 num_samples)
{
const GSVector4 FLOAT_TO_S16_V = GSVector4::cxpr(32767.0f);
const u32 iterations = (num_samples + 7) / 8;
for (u32 i = 0; i < iterations; i++)
{
GSVector4 fv1 = GSVector4::load<true>(src + 0);
GSVector4 fv2 = GSVector4::load<true>(src + 4);
src += 8;
fv1 = fv1 * FLOAT_TO_S16_V;
fv2 = fv2 * FLOAT_TO_S16_V;
GSVector4i iv1 = GSVector4i(fv1);
GSVector4i iv2 = GSVector4i(fv2);
const GSVector4i iv = iv1.ps32(iv2);
GSVector4i::store<true>(dst, iv);
dst += 8;
}
}
void AudioStream::EndWrite(u32 num_frames)
{
// don't bother committing anything when muted
if (m_volume == 0)
return;
m_staging_buffer_pos += num_frames * NUM_CHANNELS;
DebugAssert(m_staging_buffer_pos <= (CHUNK_SIZE * NUM_CHANNELS));
if ((m_staging_buffer_pos / NUM_CHANNELS) < CHUNK_SIZE)
return;
m_staging_buffer_pos = 0;
if (!IsStretchEnabled())
{
InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
return;
}
S16ChunkToFloat(m_staging_buffer.get(), m_float_buffer.get(), CHUNK_SIZE * NUM_CHANNELS);
StretchWriteBlock(m_float_buffer.get());
}
// Time stretching algorithm based on PCSX2 implementation.
template<class T>
ALWAYS_INLINE static bool IsInRange(const T& val, const T& min, const T& max)
{
return (min <= val && val <= max);
}
void AudioStream::StretchAllocate()
{
if (m_parameters.stretch_mode == AudioStretchMode::Off)
return;
m_soundtouch = soundtouch_createInstance();
soundtouch_setSampleRate(m_soundtouch, m_sample_rate);
soundtouch_setChannels(m_soundtouch, NUM_CHANNELS);
soundtouch_setSetting(m_soundtouch, SETTING_USE_QUICKSEEK, m_parameters.stretch_use_quickseek);
soundtouch_setSetting(m_soundtouch, SETTING_USE_AA_FILTER, m_parameters.stretch_use_aa_filter);
soundtouch_setSetting(m_soundtouch, SETTING_SEQUENCE_MS, m_parameters.stretch_sequence_length_ms);
soundtouch_setSetting(m_soundtouch, SETTING_SEEKWINDOW_MS, m_parameters.stretch_seekwindow_ms);
soundtouch_setSetting(m_soundtouch, SETTING_OVERLAP_MS, m_parameters.stretch_overlap_ms);
if (m_parameters.stretch_mode == AudioStretchMode::Resample)
soundtouch_setRate(m_soundtouch, m_nominal_rate);
else
soundtouch_setTempo(m_soundtouch, m_nominal_rate);
m_stretch_reset = STRETCH_RESET_THRESHOLD;
m_stretch_inactive = false;
m_stretch_ok_count = 0;
m_dynamic_target_usage = 0.0f;
m_average_position = 0;
m_average_available = 0;
m_staging_buffer_pos = 0;
}
void AudioStream::StretchDestroy()
{
if (m_soundtouch)
{
soundtouch_destroyInstance(m_soundtouch);
m_soundtouch = nullptr;
}
}
void AudioStream::StretchWriteBlock(const float* block)
{
if (IsStretchEnabled())
{
soundtouch_putSamples(m_soundtouch, block, CHUNK_SIZE);
u32 tempProgress;
while (tempProgress = soundtouch_receiveSamples(m_soundtouch, m_float_buffer.get(), CHUNK_SIZE), tempProgress != 0)
{
FloatChunkToS16(m_staging_buffer.get(), m_float_buffer.get(), tempProgress * NUM_CHANNELS);
InternalWriteFrames(m_staging_buffer.get(), tempProgress);
}
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
UpdateStretchTempo();
}
else
{
FloatChunkToS16(m_staging_buffer.get(), block, CHUNK_SIZE * NUM_CHANNELS);
InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
}
}
float AudioStream::AddAndGetAverageTempo(float val)
{
static constexpr u32 AVERAGING_WINDOW = 50;
// Build up a circular buffer for tempo averaging to prevent rapid tempo oscillations.
if (m_average_available < AVERAGING_BUFFER_SIZE)
m_average_available++;
m_average_fullness[m_average_position] = val;
m_average_position = (m_average_position + 1U) % AVERAGING_BUFFER_SIZE;
// The + AVERAGING_BUFFER_SIZE ensures we don't go negative when using modulo arithmetic.
const u32 actual_window = std::min<u32>(m_average_available, AVERAGING_WINDOW);
u32 index = (m_average_position - actual_window + AVERAGING_BUFFER_SIZE) % AVERAGING_BUFFER_SIZE;
float sum = 0.0f;
u32 count = 0;
#ifdef CPU_ARCH_SIMD
GSVector4 vsum = GSVector4::zero();
const u32 vcount = Common::AlignDownPow2(actual_window, 4);
for (; count < vcount; count += 4)
{
if ((index + 4) > AVERAGING_BUFFER_SIZE)
{
// wraparound
for (u32 i = 0; i < 4; i++)
{
sum += m_average_fullness[index];
index = (index + 1) % AVERAGING_BUFFER_SIZE;
}
}
else
{
vsum += GSVector4::load<false>(&m_average_fullness[index]);
index = (index + 4) % AVERAGING_BUFFER_SIZE;
}
}
sum += vsum.addv();
#ifndef __ANDROID__
case AudioBackend::Cubeb:
ret = GetCubebDriverNames();
break;
#endif
for (; count < actual_window; count++)
{
sum += m_average_fullness[index];
index = (index + 1) % AVERAGING_BUFFER_SIZE;
}
sum /= static_cast<float>(actual_window);
return (sum != 0.0f) ? sum : 1.0f;
default:
break;
}
return ret;
}
void AudioStream::UpdateStretchTempo()
std::vector<AudioStream::DeviceInfo> AudioStream::GetOutputDevices(AudioBackend backend, const char* driver,
u32 sample_rate)
{
static constexpr float MIN_TEMPO = 0.05f;
static constexpr float MAX_TEMPO = 500.0f;
// Hysteresis thresholds to prevent stretcher from constantly toggling on/off.
// i.e. this is the range we will run in 1:1 mode for.
static constexpr float INACTIVE_GOOD_FACTOR = 1.04f;
static constexpr float INACTIVE_BAD_FACTOR = 1.2f;
// Require sustained good performance before deactivating.
static constexpr u32 INACTIVE_MIN_OK_COUNT = 50;
static constexpr u32 COMPENSATION_DIVIDER = 100;
// Controls how aggressively we adjust the dynamic target. We want to keep the same target size regardless
// of the target speed, but need additional buffering when intentionally running below 100%.
float base_target_usage = static_cast<float>(m_target_buffer_size) / std::min(m_nominal_rate, 1.0f);
// tempo = current_buffer / target_buffer.
const u32 ibuffer_usage = GetBufferedFramesRelaxed();
float buffer_usage = static_cast<float>(ibuffer_usage);
float tempo = buffer_usage / m_dynamic_target_usage;
// Prevents the system from getting stuck in a bad state due to accumulated errors.
if (m_stretch_reset >= STRETCH_RESET_THRESHOLD)
std::vector<AudioStream::DeviceInfo> ret;
switch (backend)
{
VERBOSE_LOG("___ Stretcher is being reset.");
m_stretch_inactive = false;
m_stretch_ok_count = 0;
m_dynamic_target_usage = base_target_usage;
m_average_available = 0;
m_average_position = 0;
m_stretch_reset = 0;
tempo = m_nominal_rate;
}
else if (m_stretch_reset > 0)
{
// Back off resets if enough time has passed. That way a very occasional lag/overflow
// doesn't cascade into unnecessary tempo adjustment.
const u64 now = Timer::GetCurrentValue();
if (Timer::ConvertValueToSeconds(now - m_stretch_reset_time) >= 2.0f)
{
m_stretch_reset--;
m_stretch_reset_time = now;
}
#ifndef __ANDROID__
case AudioBackend::Cubeb:
ret = GetCubebOutputDevices(driver, sample_rate);
break;
#endif
default:
break;
}
// Apply temporal smoothing to prevent rapid tempo changes that cause artifacts.
tempo = AddAndGetAverageTempo(tempo);
// Apply non-linear dampening when close to target to reduce oscillation.
if (tempo < 2.0f)
tempo = std::sqrt(tempo);
tempo = std::clamp(tempo, MIN_TEMPO, MAX_TEMPO);
if (tempo < 1.0f)
base_target_usage /= std::sqrt(tempo);
// Gradually adjust our dynamic target toward what would give us the desired tempo.
m_dynamic_target_usage +=
static_cast<float>(base_target_usage / tempo - m_dynamic_target_usage) / static_cast<float>(COMPENSATION_DIVIDER);
// Snap back to baseline if we're very close.
if (IsInRange(tempo, 0.9f, 1.1f) &&
IsInRange(m_dynamic_target_usage, base_target_usage * 0.9f, base_target_usage * 1.1f))
{
m_dynamic_target_usage = base_target_usage;
}
// Are we changing the active state?
if (!m_stretch_inactive)
{
if (IsInRange(tempo, 1.0f / INACTIVE_GOOD_FACTOR, INACTIVE_GOOD_FACTOR))
m_stretch_ok_count++;
else
m_stretch_ok_count = 0;
if (m_stretch_ok_count >= INACTIVE_MIN_OK_COUNT)
{
VERBOSE_LOG("=== Stretcher is now inactive.");
m_stretch_inactive = true;
}
}
else if (!IsInRange(tempo, 1.0f / INACTIVE_BAD_FACTOR, INACTIVE_BAD_FACTOR))
{
VERBOSE_LOG("~~~ Stretcher is now active @ tempo {}.", tempo);
m_stretch_inactive = false;
m_stretch_ok_count = 0;
}
// If we're inactive, we don't want to change the tempo.
if (m_stretch_inactive)
tempo = m_nominal_rate;
if constexpr (LOG_TIMESTRETCH_STATS)
{
static float min_tempo = 0.0f;
static float max_tempo = 0.0f;
static float acc_tempo = 0.0f;
static u32 acc_cnt = 0;
acc_tempo += tempo;
acc_cnt++;
min_tempo = std::min(min_tempo, tempo);
max_tempo = std::max(max_tempo, tempo);
static int iterations = 0;
static u64 last_log_time = 0;
const u64 now = Timer::GetCurrentValue();
if (Timer::ConvertValueToSeconds(now - last_log_time) > 1.0f)
{
const float avg_tempo = (acc_cnt > 0) ? (acc_tempo / static_cast<float>(acc_cnt)) : 0.0f;
VERBOSE_LOG("{:3d} ms ({:3.0f}%), tempo: avg={:.2f} min={:.2f} max={:.2f}, comp: {:2.3f}, iters: {}, reset:{}",
(ibuffer_usage * 1000u) / m_sample_rate, 100.0f * buffer_usage / base_target_usage, avg_tempo,
min_tempo, max_tempo, m_dynamic_target_usage / base_target_usage, iterations, m_stretch_reset);
last_log_time = now;
iterations = 0;
min_tempo = std::numeric_limits<float>::max();
max_tempo = std::numeric_limits<float>::min();
acc_tempo = 0.0f;
acc_cnt = 0;
}
iterations++;
}
soundtouch_setTempo(m_soundtouch, tempo);
return ret;
}
void AudioStream::StretchUnderrun()
std::unique_ptr<AudioStream> AudioStream::CreateStream(AudioBackend backend, u32 sample_rate, u32 channels,
u32 output_latency_frames, bool output_latency_minimal,
const char* driver_name, const char* device_name,
AudioStreamSource* source, bool auto_start, Error* error)
{
// Didn't produce enough frames in time.
m_stretch_reset++;
if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
m_stretch_reset_time = Timer::GetCurrentValue();
}
void AudioStream::StretchOverrun()
{
// Produced more frames than can fit in the buffer.
m_stretch_reset++;
if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
m_stretch_reset_time = Timer::GetCurrentValue();
// Drop two packets to give the time stretcher a bit more time to slow things down.
// This prevents a cascading overrun situation where each overrun makes the next one more likely.
const u32 discard = CHUNK_SIZE * 2;
m_rpos.store((m_rpos.load(std::memory_order_acquire) + discard) % m_buffer_size, std::memory_order_release);
}
void AudioStream::EmptyStretchBuffers()
{
if (!IsStretchEnabled())
return;
m_stretch_reset = STRETCH_RESET_THRESHOLD;
// Wipe soundtouch samples. If we don't do this and we're switching from a high tempo to low,
// we'll still have quite a large buffer of samples that will be played back at a low tempo,
// resulting in a long delay before the audio starts playing at the new tempo.
soundtouch_clear(m_soundtouch);
switch (backend)
{
#ifndef __ANDROID__
case AudioBackend::Cubeb:
return CreateCubebAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, driver_name,
device_name, source, auto_start, error);
case AudioBackend::SDL:
return CreateSDLAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, source,
auto_start, error);
#else
case AudioBackend::AAudio:
return CreateAAudioAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, source,
auto_start, error);
case AudioBackend::OpenSLES:
return CreateOpenSLESAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, source,
auto_start, error);
#endif
default:
Error::SetStringView(error, "Unknown audio backend.");
return nullptr;
}
}

View File

@@ -1,24 +1,13 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
#include "common/align.h"
#include "common/types.h"
#include <array>
#include <atomic>
#include <memory>
#include <optional>
#include <string>
#include <vector>
class Error;
class SettingsInterface;
namespace soundtouch {
class SoundTouch;
}
enum class AudioBackend : u8
{
@@ -33,59 +22,18 @@ enum class AudioBackend : u8
Count
};
enum class AudioStretchMode : u8
class AudioStreamSource
{
Off,
Resample,
TimeStretch,
Count
};
public:
using SampleType = s16;
struct AudioStreamParameters
{
AudioStretchMode stretch_mode = DEFAULT_STRETCH_MODE;
bool output_latency_minimal = DEFAULT_OUTPUT_LATENCY_MINIMAL;
u16 output_latency_ms = DEFAULT_OUTPUT_LATENCY_MS;
u16 buffer_ms = DEFAULT_BUFFER_MS;
u16 stretch_sequence_length_ms = DEFAULT_STRETCH_SEQUENCE_LENGTH;
u16 stretch_seekwindow_ms = DEFAULT_STRETCH_SEEKWINDOW;
u16 stretch_overlap_ms = DEFAULT_STRETCH_OVERLAP;
bool stretch_use_quickseek = DEFAULT_STRETCH_USE_QUICKSEEK;
bool stretch_use_aa_filter = DEFAULT_STRETCH_USE_AA_FILTER;
static constexpr AudioStretchMode DEFAULT_STRETCH_MODE = AudioStretchMode::TimeStretch;
#ifndef __ANDROID__
static constexpr u16 DEFAULT_BUFFER_MS = 50;
static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
#else
static constexpr u16 DEFAULT_BUFFER_MS = 100;
static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
#endif
static constexpr bool DEFAULT_OUTPUT_LATENCY_MINIMAL = false;
static constexpr u16 DEFAULT_STRETCH_SEQUENCE_LENGTH = 30;
static constexpr u16 DEFAULT_STRETCH_SEEKWINDOW = 20;
static constexpr u16 DEFAULT_STRETCH_OVERLAP = 10;
static constexpr bool DEFAULT_STRETCH_USE_QUICKSEEK = false;
static constexpr bool DEFAULT_STRETCH_USE_AA_FILTER = false;
void Load(const SettingsInterface& si, const char* section);
void Save(SettingsInterface& si, const char* section) const;
void Clear(SettingsInterface& si, const char* section);
bool operator==(const AudioStreamParameters& rhs) const;
bool operator!=(const AudioStreamParameters& rhs) const;
virtual void ReadFrames(SampleType* samples, u32 num_frames) = 0;
};
class AudioStream
{
public:
using SampleType = s16;
static constexpr u32 NUM_CHANNELS = 2;
static constexpr u32 CHUNK_SIZE = 64;
using SampleType = AudioStreamSource::SampleType;
#ifndef __ANDROID__
static constexpr AudioBackend DEFAULT_BACKEND = AudioBackend::Cubeb;
@@ -103,129 +51,52 @@ public:
~DeviceInfo();
};
public:
virtual ~AudioStream();
static u32 GetAlignedBufferSize(u32 size);
static u32 GetBufferSizeForMS(u32 sample_rate, u32 ms);
static u32 GetMSForBufferSize(u32 sample_rate, u32 buffer_size);
static std::optional<AudioBackend> ParseBackendName(const char* str);
static const char* GetBackendName(AudioBackend backend);
static const char* GetBackendDisplayName(AudioBackend backend);
static const char* GetStretchModeName(AudioStretchMode mode);
static const char* GetStretchModeDisplayName(AudioStretchMode mode);
static std::optional<AudioStretchMode> ParseStretchMode(const char* name);
static u32 FramesToMS(u32 sample_rate, u32 frames);
ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
ALWAYS_INLINE u32 GetBufferSize() const { return m_buffer_size; }
ALWAYS_INLINE u32 GetTargetBufferSize() const { return m_target_buffer_size; }
ALWAYS_INLINE u32 GetOutputVolume() const { return m_volume; }
ALWAYS_INLINE float GetNominalTempo() const { return m_nominal_rate; }
ALWAYS_INLINE bool IsPaused() const { return m_paused; }
/// Returns a list of available driver names for the specified backend.
static std::vector<std::pair<std::string, std::string>> GetDriverNames(AudioBackend backend);
u32 GetBufferedFramesRelaxed() const;
/// Returns a list of available output devices for the specified backend and driver.
static std::vector<DeviceInfo> GetOutputDevices(AudioBackend backend, const char* driver, u32 sample_rate);
/// Creates an audio stream with the specified parameters.
static std::unique_ptr<AudioStream> CreateStream(AudioBackend backend, u32 sample_rate, u32 channels,
u32 output_latency_frames, bool output_latency_minimal,
const char* driver_name, const char* device_name,
AudioStreamSource* source, bool auto_start, Error* error);
/// Starts the stream, allowing it to request data.
virtual bool Start(Error* error) = 0;
/// Temporarily pauses the stream, preventing it from requesting data.
virtual void SetPaused(bool paused);
void SetOutputVolume(u32 volume);
void BeginWrite(SampleType** buffer_ptr, u32* num_frames);
void EndWrite(u32 num_frames);
void EmptyBuffer();
/// Nominal rate is used for both resampling and timestretching, input samples are assumed to be this amount faster
/// than the sample rate.
void SetNominalRate(float tempo);
void SetStretchMode(AudioStretchMode mode);
/// Wipes out the time stretching buffer, call when reducing target speed.
void EmptyStretchBuffers();
static std::vector<std::pair<std::string, std::string>> GetDriverNames(AudioBackend backend);
static std::vector<DeviceInfo> GetOutputDevices(AudioBackend backend, const char* driver, u32 sample_rate);
static std::unique_ptr<AudioStream> CreateStream(AudioBackend backend, u32 sample_rate,
const AudioStreamParameters& parameters, const char* driver_name,
const char* device_name, Error* error = nullptr);
static std::unique_ptr<AudioStream> CreateNullStream(u32 sample_rate, u32 buffer_ms);
virtual bool Stop(Error* error) = 0;
protected:
AudioStream(u32 sample_rate, const AudioStreamParameters& parameters);
void BaseInitialize();
void ReadFrames(SampleType* samples, u32 num_frames);
u32 m_sample_rate = 0;
u32 m_volume = 100;
AudioStreamParameters m_parameters;
bool m_stretch_inactive = false;
bool m_filling = false;
bool m_paused = false;
AudioStream();
private:
static constexpr u32 AVERAGING_BUFFER_SIZE = 256;
static constexpr u32 STRETCH_RESET_THRESHOLD = 5;
#ifndef __ANDROID__
static std::vector<std::pair<std::string, std::string>> GetCubebDriverNames();
static std::vector<DeviceInfo> GetCubebOutputDevices(const char* driver, u32 sample_rate);
static std::unique_ptr<AudioStream> CreateCubebAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
const char* driver_name, const char* device_name,
Error* error);
static std::unique_ptr<AudioStream> CreateSDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
Error* error);
static std::unique_ptr<AudioStream> CreateCubebAudioStream(u32 sample_rate, u32 channels, u32 output_latency_frames,
bool output_latency_minimal, const char* driver_name,
const char* device_name, AudioStreamSource* source,
bool auto_start, Error* error);
static std::unique_ptr<AudioStream> CreateSDLAudioStream(u32 sample_rate, u32 channels, u32 output_latency_frames,
bool output_latency_minimal, AudioStreamSource* source,
bool auto_start, Error* error);
#else
static std::unique_ptr<AudioStream> CreateAAudioAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
Error* error);
static std::unique_ptr<AudioStream> CreateOpenSLESAudioStream(u32 sample_rate,
const AudioStreamParameters& parameters, Error* error);
static std::unique_ptr<AudioStream> CreateAAudioAudioStream(u32 sample_rate, u32 output_latency_frames,
bool output_latency_minimal, AudioStreamSource* source,
bool auto_start, Error* error);
static std::unique_ptr<AudioStream> CreateOpenSLESAudioStream(u32 sample_rate, u32 output_latency_frames,
bool output_latency_minimal, AudioStreamSource* source,
bool auto_start, Error* error);
#endif
ALWAYS_INLINE bool IsStretchEnabled() const { return m_parameters.stretch_mode != AudioStretchMode::Off; }
void AllocateBuffer();
void DestroyBuffer();
void InternalWriteFrames(SampleType* samples, u32 num_frames);
void StretchAllocate();
void StretchDestroy();
void StretchWriteBlock(const float* block);
void StretchUnderrun();
void StretchOverrun();
float AddAndGetAverageTempo(float val);
void UpdateStretchTempo();
u32 m_buffer_size = 0;
Common::unique_aligned_ptr<s16[]> m_buffer;
std::atomic<u32> m_rpos{0};
std::atomic<u32> m_wpos{0};
void* m_soundtouch = nullptr;
u32 m_target_buffer_size = 0;
u32 m_stretch_reset = STRETCH_RESET_THRESHOLD;
u64 m_stretch_reset_time = 0;
u32 m_stretch_ok_count = 0;
float m_nominal_rate = 1.0f;
float m_dynamic_target_usage = 0.0f;
u32 m_average_position = 0;
u32 m_average_available = 0;
u32 m_staging_buffer_pos = 0;
std::array<float, AVERAGING_BUFFER_SIZE> m_average_fullness = {};
// temporary staging buffer, used for timestretching
Common::unique_aligned_ptr<s16[]> m_staging_buffer;
// float buffer, soundtouch only accepts float samples as input
Common::unique_aligned_ptr<float[]> m_float_buffer;
};

View File

@@ -0,0 +1,832 @@
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "core_audio_stream.h"
#include "translation.h"
#include "common/align.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/gsvector.h"
#include "common/log.h"
#include "common/settings_interface.h"
#include "common/timer.h"
#include "soundtouch/SoundTouch.h"
#include "soundtouch/SoundTouchDLL.h"
#include <algorithm>
#include <cmath>
#include <cstring>
#include <limits>
LOG_CHANNEL(AudioStream);
static constexpr bool LOG_TIMESTRETCH_STATS = false;
void AudioStreamParameters::Load(const SettingsInterface& si, const char* section)
{
stretch_mode =
CoreAudioStream::ParseStretchMode(
si.GetStringValue(section, "StretchMode", CoreAudioStream::GetStretchModeName(DEFAULT_STRETCH_MODE)).c_str())
.value_or(DEFAULT_STRETCH_MODE);
output_latency_ms = static_cast<u16>(std::min<u32>(
si.GetUIntValue(section, "OutputLatencyMS", DEFAULT_OUTPUT_LATENCY_MS), std::numeric_limits<u16>::max()));
output_latency_minimal = si.GetBoolValue(section, "OutputLatencyMinimal", DEFAULT_OUTPUT_LATENCY_MINIMAL);
buffer_ms = static_cast<u16>(
std::min<u32>(si.GetUIntValue(section, "BufferMS", DEFAULT_BUFFER_MS), std::numeric_limits<u16>::max()));
stretch_sequence_length_ms =
static_cast<u16>(std::min<u32>(si.GetUIntValue(section, "StretchSequenceLengthMS", DEFAULT_STRETCH_SEQUENCE_LENGTH),
std::numeric_limits<u16>::max()));
stretch_seekwindow_ms = static_cast<u16>(std::min<u32>(
si.GetUIntValue(section, "StretchSeekWindowMS", DEFAULT_STRETCH_SEEKWINDOW), std::numeric_limits<u16>::max()));
stretch_overlap_ms = static_cast<u16>(std::min<u32>(
si.GetUIntValue(section, "StretchOverlapMS", DEFAULT_STRETCH_OVERLAP), std::numeric_limits<u16>::max()));
stretch_use_quickseek = si.GetBoolValue(section, "StretchUseQuickSeek", DEFAULT_STRETCH_USE_QUICKSEEK);
stretch_use_aa_filter = si.GetBoolValue(section, "StretchUseAAFilter", DEFAULT_STRETCH_USE_AA_FILTER);
}
void AudioStreamParameters::Save(SettingsInterface& si, const char* section) const
{
si.SetStringValue(section, "StretchMode", CoreAudioStream::GetStretchModeName(stretch_mode));
si.SetUIntValue(section, "BufferMS", buffer_ms);
si.SetUIntValue(section, "OutputLatencyMS", output_latency_ms);
si.SetBoolValue(section, "OutputLatencyMinimal", output_latency_minimal);
si.SetUIntValue(section, "StretchSequenceLengthMS", stretch_sequence_length_ms);
si.SetUIntValue(section, "StretchSeekWindowMS", stretch_seekwindow_ms);
si.SetUIntValue(section, "StretchOverlapMS", stretch_overlap_ms);
si.SetBoolValue(section, "StretchUseQuickSeek", stretch_use_quickseek);
si.SetBoolValue(section, "StretchUseAAFilter", stretch_use_aa_filter);
}
void AudioStreamParameters::Clear(SettingsInterface& si, const char* section)
{
si.DeleteValue(section, "StretchMode");
si.DeleteValue(section, "ExpansionMode");
si.DeleteValue(section, "BufferMS");
si.DeleteValue(section, "OutputLatencyMS");
si.DeleteValue(section, "OutputLatencyMinimal");
si.DeleteValue(section, "StretchSequenceLengthMS");
si.DeleteValue(section, "StretchSeekWindowMS");
si.DeleteValue(section, "StretchOverlapMS");
si.DeleteValue(section, "StretchUseQuickSeek");
si.DeleteValue(section, "StretchUseAAFilter");
}
bool AudioStreamParameters::operator!=(const AudioStreamParameters& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
}
bool AudioStreamParameters::operator==(const AudioStreamParameters& rhs) const
{
return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
}
CoreAudioStream::CoreAudioStream() = default;
CoreAudioStream::~CoreAudioStream()
{
Destroy();
}
bool CoreAudioStream::Initialize(AudioBackend backend, u32 sample_rate, const AudioStreamParameters& params,
const char* driver_name, const char* device_name, Error* error /* = nullptr */)
{
Destroy();
m_sample_rate = sample_rate;
m_volume = 100;
m_parameters = params;
m_filling = false;
m_paused = false;
AllocateBuffer();
StretchAllocate();
const u32 output_latency_frames =
GetBufferSizeForMS(sample_rate, (params.output_latency_ms != 0) ? params.output_latency_ms : params.buffer_ms);
if (backend != AudioBackend::Null)
{
if (!(m_stream =
AudioStream::CreateStream(backend, sample_rate, NUM_CHANNELS, output_latency_frames,
params.output_latency_minimal, driver_name, device_name, this, true, error)))
{
Destroy();
return false;
}
}
else
{
// no point stretching with no output
m_parameters = AudioStreamParameters();
m_parameters.stretch_mode = AudioStretchMode::Off;
m_parameters.buffer_ms = params.buffer_ms;
// always paused to avoid output
m_paused = true;
}
return true;
}
void CoreAudioStream::Destroy()
{
StretchDestroy();
DestroyBuffer();
m_stream.reset();
m_sample_rate = 0;
m_parameters = AudioStreamParameters();
m_volume = 0;
m_filling = false;
m_paused = true;
}
u32 CoreAudioStream::GetAlignedBufferSize(u32 size)
{
static_assert(Common::IsPow2(CHUNK_SIZE));
return Common::AlignUpPow2(size, CHUNK_SIZE);
}
u32 CoreAudioStream::GetBufferSizeForMS(u32 sample_rate, u32 ms)
{
return GetAlignedBufferSize((ms * sample_rate) / 1000u);
}
u32 CoreAudioStream::GetMSForBufferSize(u32 sample_rate, u32 buffer_size)
{
buffer_size = GetAlignedBufferSize(buffer_size);
return (buffer_size * 1000u) / sample_rate;
}
static constexpr const std::array s_stretch_mode_names = {
"None",
"Resample",
"TimeStretch",
};
static constexpr const std::array s_stretch_mode_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "Off (Noisy)", "AudioStretchMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Resampling (Pitch Shift)", "AudioStretchMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Time Stretch (Tempo Change, Best Sound)", "AudioStretchMode"),
};
const char* CoreAudioStream::GetStretchModeName(AudioStretchMode mode)
{
return (static_cast<size_t>(mode) < s_stretch_mode_names.size()) ? s_stretch_mode_names[static_cast<size_t>(mode)] :
"";
}
const char* CoreAudioStream::GetStretchModeDisplayName(AudioStretchMode mode)
{
return (static_cast<size_t>(mode) < s_stretch_mode_display_names.size()) ?
Host::TranslateToCString("Settings", s_stretch_mode_display_names[static_cast<size_t>(mode)],
"AudioStretchMode") :
"";
}
std::optional<AudioStretchMode> CoreAudioStream::ParseStretchMode(const char* name)
{
for (size_t i = 0; i < static_cast<u8>(AudioStretchMode::Count); i++)
{
if (std::strcmp(name, s_stretch_mode_names[i]) == 0)
return static_cast<AudioStretchMode>(i);
}
return std::nullopt;
}
u32 CoreAudioStream::GetBufferedFramesRelaxed() const
{
const u32 rpos = m_rpos.load(std::memory_order_relaxed);
const u32 wpos = m_wpos.load(std::memory_order_relaxed);
return (wpos + m_buffer_size - rpos) % m_buffer_size;
}
void CoreAudioStream::ReadFrames(SampleType* samples, u32 num_frames)
{
const u32 available_frames = GetBufferedFramesRelaxed();
u32 frames_to_read = num_frames;
u32 silence_frames = 0;
if (m_filling)
{
u32 toFill = m_buffer_size / ((m_parameters.stretch_mode != AudioStretchMode::TimeStretch) ? 32 : 400);
toFill = GetAlignedBufferSize(toFill);
if (available_frames < toFill)
{
silence_frames = num_frames;
frames_to_read = 0;
}
else
{
m_filling = false;
VERBOSE_LOG("Underrun compensation done ({} frames buffered)", toFill);
}
}
if (available_frames < frames_to_read)
{
silence_frames = frames_to_read - available_frames;
frames_to_read = available_frames;
m_filling = true;
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
StretchUnderrun();
}
if (frames_to_read > 0)
{
u32 rpos = m_rpos.load(std::memory_order_acquire);
u32 end = m_buffer_size - rpos;
if (end > frames_to_read)
end = frames_to_read;
// towards the end of the buffer
if (end > 0)
{
std::memcpy(samples, &m_buffer[rpos * NUM_CHANNELS], end * NUM_CHANNELS * sizeof(SampleType));
rpos += end;
rpos = (rpos == m_buffer_size) ? 0 : rpos;
}
// after wrapping around
const u32 start = frames_to_read - end;
if (start > 0)
{
std::memcpy(&samples[end * NUM_CHANNELS], &m_buffer[0], start * NUM_CHANNELS * sizeof(SampleType));
rpos = start;
}
m_rpos.store(rpos, std::memory_order_release);
}
if (silence_frames > 0)
{
if (frames_to_read > 0)
{
// super basic resampler - spread the input samples evenly across the output samples. will sound like ass and have
// aliasing, but better than popping by inserting silence.
const u32 increment =
static_cast<u32>(65536.0f * (static_cast<float>(frames_to_read) / static_cast<float>(num_frames)));
SampleType* resample_ptr = static_cast<SampleType*>(alloca(frames_to_read * NUM_CHANNELS * sizeof(SampleType)));
std::memcpy(resample_ptr, samples, frames_to_read * NUM_CHANNELS * sizeof(SampleType));
SampleType* out_ptr = samples;
const u32 copy_stride = sizeof(SampleType) * NUM_CHANNELS;
u32 resample_subpos = 0;
for (u32 i = 0; i < num_frames; i++)
{
std::memcpy(out_ptr, resample_ptr, copy_stride);
out_ptr += NUM_CHANNELS;
resample_subpos += increment;
resample_ptr += (resample_subpos >> 16) * NUM_CHANNELS;
resample_subpos %= 65536u;
}
VERBOSE_LOG("Audio buffer underflow, resampled {} frames to {}", frames_to_read, num_frames);
}
else
{
// no data, fall back to silence
std::memset(samples + (frames_to_read * NUM_CHANNELS), 0, silence_frames * NUM_CHANNELS * sizeof(s16));
}
}
if (m_volume != 100)
{
u32 num_samples = num_frames * NUM_CHANNELS;
const u32 aligned_samples = Common::AlignDownPow2(num_samples, 8);
num_samples -= aligned_samples;
const float volume_mult = static_cast<float>(m_volume) / 100.0f;
const GSVector4 volume_multv = GSVector4(volume_mult);
const SampleType* const aligned_samples_end = samples + aligned_samples;
for (; samples != aligned_samples_end; samples += 8)
{
GSVector4i iv = GSVector4i::load<false>(samples); // [0, 1, 2, 3, 4, 5, 6, 7]
GSVector4i iv1 = iv.upl16(iv); // [0, 0, 1, 1, 2, 2, 3, 3]
GSVector4i iv2 = iv.uph16(iv); // [4, 4, 5, 5, 6, 6, 7, 7]
iv1 = iv1.sra32<16>(); // [0, 1, 2, 3]
iv2 = iv2.sra32<16>(); // [4, 5, 6, 7]
GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
fv1 = fv1 * volume_multv; // [f0, f1, f2, f3]
fv2 = fv2 * volume_multv; // [f4, f5, f6, f7]
iv1 = GSVector4i(fv1); // [0, 1, 2, 3]
iv2 = GSVector4i(fv2); // [4, 5, 6, 7]
iv = iv1.ps32(iv2); // [0, 1, 2, 3, 4, 5, 6, 7]
GSVector4i::store<false>(samples, iv);
}
while (num_samples > 0)
{
*samples = static_cast<s16>(std::clamp(static_cast<float>(*samples) * volume_mult, -32768.0f, 32767.0f));
samples++;
num_samples--;
}
}
}
void CoreAudioStream::InternalWriteFrames(s16* data, u32 num_frames)
{
const u32 free = m_buffer_size - GetBufferedFramesRelaxed();
if (free <= num_frames)
{
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
{
StretchOverrun();
}
else
{
DEBUG_LOG("Buffer overrun, chunk dropped");
return;
}
}
u32 wpos = m_wpos.load(std::memory_order_acquire);
// wrapping around the end of the buffer?
if ((m_buffer_size - wpos) <= num_frames)
{
// needs to be written in two parts
const u32 end = m_buffer_size - wpos;
const u32 start = num_frames - end;
// start is zero when this chunk reaches exactly the end
std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, end * NUM_CHANNELS * sizeof(SampleType));
if (start > 0)
std::memcpy(&m_buffer[0], data + end * NUM_CHANNELS, start * NUM_CHANNELS * sizeof(SampleType));
wpos = start;
}
else
{
// no split
std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, num_frames * NUM_CHANNELS * sizeof(SampleType));
wpos += num_frames;
}
m_wpos.store(wpos, std::memory_order_release);
}
void CoreAudioStream::AllocateBuffer()
{
// Stretcher can produce a large amount of samples from few samples when running slow, so allocate a larger buffer.
// In most cases it's not going to be used, but better to have a larger buffer and not need it than overrun.
const u32 multiplier = (m_parameters.stretch_mode == AudioStretchMode::TimeStretch) ?
16 :
((m_parameters.stretch_mode == AudioStretchMode::Off) ? 1 : 2);
m_buffer_size = GetAlignedBufferSize(((m_parameters.buffer_ms * multiplier) * m_sample_rate) / 1000);
m_target_buffer_size = GetAlignedBufferSize((m_sample_rate * m_parameters.buffer_ms) / 1000u);
m_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, m_buffer_size * NUM_CHANNELS);
m_staging_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
m_float_buffer = Common::make_unique_aligned_for_overwrite<float[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
DEV_LOG("Allocated buffer of {} frames for buffer of {} ms [stretch {}, target size {}].", m_buffer_size,
m_parameters.buffer_ms, GetStretchModeName(m_parameters.stretch_mode), m_target_buffer_size);
}
void CoreAudioStream::DestroyBuffer()
{
m_staging_buffer.reset();
m_float_buffer.reset();
m_buffer.reset();
m_buffer_size = 0;
m_wpos.store(0, std::memory_order_release);
m_rpos.store(0, std::memory_order_release);
}
void CoreAudioStream::EmptyBuffer()
{
if (IsStretchEnabled())
{
soundtouch_clear(m_soundtouch);
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
soundtouch_setTempo(m_soundtouch, m_nominal_rate);
}
m_wpos.store(m_rpos.load(std::memory_order_acquire), std::memory_order_release);
}
void CoreAudioStream::SetNominalRate(float tempo)
{
m_nominal_rate = tempo;
if (m_parameters.stretch_mode == AudioStretchMode::Resample)
soundtouch_setRate(m_soundtouch, tempo);
else if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch && !m_stretch_inactive)
soundtouch_setTempo(m_soundtouch, tempo);
}
void CoreAudioStream::SetStretchMode(AudioStretchMode mode)
{
if (m_parameters.stretch_mode == mode)
return;
// can't resize the buffers while paused
bool paused = m_paused;
if (!paused)
SetPaused(true);
DestroyBuffer();
StretchDestroy();
m_parameters.stretch_mode = mode;
AllocateBuffer();
if (m_parameters.stretch_mode != AudioStretchMode::Off)
StretchAllocate();
if (!paused)
SetPaused(false);
}
void CoreAudioStream::SetPaused(bool paused)
{
// force state to always be paused if we're a null output
if (m_paused == paused || !m_stream)
return;
Error error;
if (!(paused ? m_stream->Stop(&error) : m_stream->Start(&error)))
ERROR_LOG("Failed to {} stream: {}", paused ? "pause" : "restart", error.GetDescription());
else
m_paused = paused;
}
void CoreAudioStream::SetOutputVolume(u32 volume)
{
m_volume = volume;
}
void CoreAudioStream::BeginWrite(SampleType** buffer_ptr, u32* num_frames)
{
// TODO: Write directly to buffer when not using stretching.
*buffer_ptr = &m_staging_buffer[m_staging_buffer_pos];
*num_frames = CHUNK_SIZE - (m_staging_buffer_pos / NUM_CHANNELS);
}
static void S16ChunkToFloat(const s16* src, float* dst, u32 num_samples)
{
constexpr GSVector4 S16_TO_FLOAT_V = GSVector4::cxpr(1.0f / 32767.0f);
const u32 iterations = (num_samples + 7) / 8;
for (u32 i = 0; i < iterations; i++)
{
const GSVector4i sv = GSVector4i::load<true>(src);
src += 8;
GSVector4i iv1 = sv.upl16(sv); // [0, 0, 1, 1, 2, 2, 3, 3]
GSVector4i iv2 = sv.uph16(sv); // [4, 4, 5, 5, 6, 6, 7, 7]
iv1 = iv1.sra32<16>(); // [0, 1, 2, 3]
iv2 = iv2.sra32<16>(); // [4, 5, 6, 7]
GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
fv1 = fv1 * S16_TO_FLOAT_V;
fv2 = fv2 * S16_TO_FLOAT_V;
GSVector4::store<true>(dst + 0, fv1);
GSVector4::store<true>(dst + 4, fv2);
dst += 8;
}
}
static void FloatChunkToS16(s16* dst, const float* src, u32 num_samples)
{
const GSVector4 FLOAT_TO_S16_V = GSVector4::cxpr(32767.0f);
const u32 iterations = (num_samples + 7) / 8;
for (u32 i = 0; i < iterations; i++)
{
GSVector4 fv1 = GSVector4::load<true>(src + 0);
GSVector4 fv2 = GSVector4::load<true>(src + 4);
src += 8;
fv1 = fv1 * FLOAT_TO_S16_V;
fv2 = fv2 * FLOAT_TO_S16_V;
GSVector4i iv1 = GSVector4i(fv1);
GSVector4i iv2 = GSVector4i(fv2);
const GSVector4i iv = iv1.ps32(iv2);
GSVector4i::store<true>(dst, iv);
dst += 8;
}
}
void CoreAudioStream::EndWrite(u32 num_frames)
{
// don't bother committing anything when muted
if (m_volume == 0 || m_paused)
return;
m_staging_buffer_pos += num_frames * NUM_CHANNELS;
DebugAssert(m_staging_buffer_pos <= (CHUNK_SIZE * NUM_CHANNELS));
if ((m_staging_buffer_pos / NUM_CHANNELS) < CHUNK_SIZE)
return;
m_staging_buffer_pos = 0;
if (!IsStretchEnabled())
{
InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
return;
}
S16ChunkToFloat(m_staging_buffer.get(), m_float_buffer.get(), CHUNK_SIZE * NUM_CHANNELS);
StretchWriteBlock(m_float_buffer.get());
}
// Time stretching algorithm based on PCSX2 implementation.
template<class T>
ALWAYS_INLINE static bool IsInRange(const T& val, const T& min, const T& max)
{
return (min <= val && val <= max);
}
void CoreAudioStream::StretchAllocate()
{
if (m_parameters.stretch_mode == AudioStretchMode::Off)
return;
m_soundtouch = soundtouch_createInstance();
soundtouch_setSampleRate(m_soundtouch, m_sample_rate);
soundtouch_setChannels(m_soundtouch, NUM_CHANNELS);
soundtouch_setSetting(m_soundtouch, SETTING_USE_QUICKSEEK, m_parameters.stretch_use_quickseek);
soundtouch_setSetting(m_soundtouch, SETTING_USE_AA_FILTER, m_parameters.stretch_use_aa_filter);
soundtouch_setSetting(m_soundtouch, SETTING_SEQUENCE_MS, m_parameters.stretch_sequence_length_ms);
soundtouch_setSetting(m_soundtouch, SETTING_SEEKWINDOW_MS, m_parameters.stretch_seekwindow_ms);
soundtouch_setSetting(m_soundtouch, SETTING_OVERLAP_MS, m_parameters.stretch_overlap_ms);
if (m_parameters.stretch_mode == AudioStretchMode::Resample)
soundtouch_setRate(m_soundtouch, m_nominal_rate);
else
soundtouch_setTempo(m_soundtouch, m_nominal_rate);
m_stretch_reset = STRETCH_RESET_THRESHOLD;
m_stretch_inactive = false;
m_stretch_ok_count = 0;
m_dynamic_target_usage = 0.0f;
m_average_position = 0;
m_average_available = 0;
m_staging_buffer_pos = 0;
}
void CoreAudioStream::StretchDestroy()
{
if (m_soundtouch)
{
soundtouch_destroyInstance(m_soundtouch);
m_soundtouch = nullptr;
}
}
void CoreAudioStream::StretchWriteBlock(const float* block)
{
if (IsStretchEnabled())
{
soundtouch_putSamples(m_soundtouch, block, CHUNK_SIZE);
u32 tempProgress;
while (tempProgress = soundtouch_receiveSamples(m_soundtouch, m_float_buffer.get(), CHUNK_SIZE), tempProgress != 0)
{
FloatChunkToS16(m_staging_buffer.get(), m_float_buffer.get(), tempProgress * NUM_CHANNELS);
InternalWriteFrames(m_staging_buffer.get(), tempProgress);
}
if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
UpdateStretchTempo();
}
else
{
FloatChunkToS16(m_staging_buffer.get(), block, CHUNK_SIZE * NUM_CHANNELS);
InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
}
}
float CoreAudioStream::AddAndGetAverageTempo(float val)
{
static constexpr u32 AVERAGING_WINDOW = 50;
// Build up a circular buffer for tempo averaging to prevent rapid tempo oscillations.
if (m_average_available < AVERAGING_BUFFER_SIZE)
m_average_available++;
m_average_fullness[m_average_position] = val;
m_average_position = (m_average_position + 1U) % AVERAGING_BUFFER_SIZE;
// The + AVERAGING_BUFFER_SIZE ensures we don't go negative when using modulo arithmetic.
const u32 actual_window = std::min<u32>(m_average_available, AVERAGING_WINDOW);
u32 index = (m_average_position - actual_window + AVERAGING_BUFFER_SIZE) % AVERAGING_BUFFER_SIZE;
float sum = 0.0f;
u32 count = 0;
#ifdef CPU_ARCH_SIMD
GSVector4 vsum = GSVector4::zero();
const u32 vcount = Common::AlignDownPow2(actual_window, 4);
for (; count < vcount; count += 4)
{
if ((index + 4) > AVERAGING_BUFFER_SIZE)
{
// wraparound
for (u32 i = 0; i < 4; i++)
{
sum += m_average_fullness[index];
index = (index + 1) % AVERAGING_BUFFER_SIZE;
}
}
else
{
vsum += GSVector4::load<false>(&m_average_fullness[index]);
index = (index + 4) % AVERAGING_BUFFER_SIZE;
}
}
sum += vsum.addv();
#endif
for (; count < actual_window; count++)
{
sum += m_average_fullness[index];
index = (index + 1) % AVERAGING_BUFFER_SIZE;
}
sum /= static_cast<float>(actual_window);
return (sum != 0.0f) ? sum : 1.0f;
}
void CoreAudioStream::UpdateStretchTempo()
{
static constexpr float MIN_TEMPO = 0.05f;
static constexpr float MAX_TEMPO = 500.0f;
// Hysteresis thresholds to prevent stretcher from constantly toggling on/off.
// i.e. this is the range we will run in 1:1 mode for.
static constexpr float INACTIVE_GOOD_FACTOR = 1.04f;
static constexpr float INACTIVE_BAD_FACTOR = 1.2f;
// Require sustained good performance before deactivating.
static constexpr u32 INACTIVE_MIN_OK_COUNT = 50;
static constexpr u32 COMPENSATION_DIVIDER = 100;
// Controls how aggressively we adjust the dynamic target. We want to keep the same target size regardless
// of the target speed, but need additional buffering when intentionally running below 100%.
float base_target_usage = static_cast<float>(m_target_buffer_size) / std::min(m_nominal_rate, 1.0f);
// tempo = current_buffer / target_buffer.
const u32 ibuffer_usage = GetBufferedFramesRelaxed();
float buffer_usage = static_cast<float>(ibuffer_usage);
float tempo = buffer_usage / m_dynamic_target_usage;
// Prevents the system from getting stuck in a bad state due to accumulated errors.
if (m_stretch_reset >= STRETCH_RESET_THRESHOLD)
{
VERBOSE_LOG("___ Stretcher is being reset.");
m_stretch_inactive = false;
m_stretch_ok_count = 0;
m_dynamic_target_usage = base_target_usage;
m_average_available = 0;
m_average_position = 0;
m_stretch_reset = 0;
tempo = m_nominal_rate;
}
else if (m_stretch_reset > 0)
{
// Back off resets if enough time has passed. That way a very occasional lag/overflow
// doesn't cascade into unnecessary tempo adjustment.
const u64 now = Timer::GetCurrentValue();
if (Timer::ConvertValueToSeconds(now - m_stretch_reset_time) >= 2.0f)
{
m_stretch_reset--;
m_stretch_reset_time = now;
}
}
// Apply temporal smoothing to prevent rapid tempo changes that cause artifacts.
tempo = AddAndGetAverageTempo(tempo);
// Apply non-linear dampening when close to target to reduce oscillation.
if (tempo < 2.0f)
tempo = std::sqrt(tempo);
tempo = std::clamp(tempo, MIN_TEMPO, MAX_TEMPO);
if (tempo < 1.0f)
base_target_usage /= std::sqrt(tempo);
// Gradually adjust our dynamic target toward what would give us the desired tempo.
m_dynamic_target_usage +=
static_cast<float>(base_target_usage / tempo - m_dynamic_target_usage) / static_cast<float>(COMPENSATION_DIVIDER);
// Snap back to baseline if we're very close.
if (IsInRange(tempo, 0.9f, 1.1f) &&
IsInRange(m_dynamic_target_usage, base_target_usage * 0.9f, base_target_usage * 1.1f))
{
m_dynamic_target_usage = base_target_usage;
}
// Are we changing the active state?
if (!m_stretch_inactive)
{
if (IsInRange(tempo, 1.0f / INACTIVE_GOOD_FACTOR, INACTIVE_GOOD_FACTOR))
m_stretch_ok_count++;
else
m_stretch_ok_count = 0;
if (m_stretch_ok_count >= INACTIVE_MIN_OK_COUNT)
{
VERBOSE_LOG("=== Stretcher is now inactive.");
m_stretch_inactive = true;
}
}
else if (!IsInRange(tempo, 1.0f / INACTIVE_BAD_FACTOR, INACTIVE_BAD_FACTOR))
{
VERBOSE_LOG("~~~ Stretcher is now active @ tempo {}.", tempo);
m_stretch_inactive = false;
m_stretch_ok_count = 0;
}
// If we're inactive, we don't want to change the tempo.
if (m_stretch_inactive)
tempo = m_nominal_rate;
if constexpr (LOG_TIMESTRETCH_STATS)
{
static float min_tempo = 0.0f;
static float max_tempo = 0.0f;
static float acc_tempo = 0.0f;
static u32 acc_cnt = 0;
acc_tempo += tempo;
acc_cnt++;
min_tempo = std::min(min_tempo, tempo);
max_tempo = std::max(max_tempo, tempo);
static int iterations = 0;
static u64 last_log_time = 0;
const u64 now = Timer::GetCurrentValue();
if (Timer::ConvertValueToSeconds(now - last_log_time) > 1.0f)
{
const float avg_tempo = (acc_cnt > 0) ? (acc_tempo / static_cast<float>(acc_cnt)) : 0.0f;
VERBOSE_LOG("{:3d} ms ({:3.0f}%), tempo: avg={:.2f} min={:.2f} max={:.2f}, comp: {:2.3f}, iters: {}, reset:{}",
(ibuffer_usage * 1000u) / m_sample_rate, 100.0f * buffer_usage / base_target_usage, avg_tempo,
min_tempo, max_tempo, m_dynamic_target_usage / base_target_usage, iterations, m_stretch_reset);
last_log_time = now;
iterations = 0;
min_tempo = std::numeric_limits<float>::max();
max_tempo = std::numeric_limits<float>::min();
acc_tempo = 0.0f;
acc_cnt = 0;
}
iterations++;
}
soundtouch_setTempo(m_soundtouch, tempo);
}
void CoreAudioStream::StretchUnderrun()
{
// Didn't produce enough frames in time.
m_stretch_reset++;
if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
m_stretch_reset_time = Timer::GetCurrentValue();
}
void CoreAudioStream::StretchOverrun()
{
// Produced more frames than can fit in the buffer.
m_stretch_reset++;
if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
m_stretch_reset_time = Timer::GetCurrentValue();
// Drop two packets to give the time stretcher a bit more time to slow things down.
// This prevents a cascading overrun situation where each overrun makes the next one more likely.
const u32 discard = CHUNK_SIZE * 2;
m_rpos.store((m_rpos.load(std::memory_order_acquire) + discard) % m_buffer_size, std::memory_order_release);
}
void CoreAudioStream::EmptyStretchBuffers()
{
if (!IsStretchEnabled())
return;
m_stretch_reset = STRETCH_RESET_THRESHOLD;
// Wipe soundtouch samples. If we don't do this and we're switching from a high tempo to low,
// we'll still have quite a large buffer of samples that will be played back at a low tempo,
// resulting in a long delay before the audio starts playing at the new tempo.
soundtouch_clear(m_soundtouch);
}

View File

@@ -0,0 +1,173 @@
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
#include "audio_stream.h"
#include "common/align.h"
#include <array>
#include <atomic>
#include <memory>
#include <optional>
class Error;
class SettingsInterface;
enum class AudioStretchMode : u8
{
Off,
Resample,
TimeStretch,
Count
};
struct AudioStreamParameters
{
AudioStretchMode stretch_mode = DEFAULT_STRETCH_MODE;
bool output_latency_minimal = DEFAULT_OUTPUT_LATENCY_MINIMAL;
u16 output_latency_ms = DEFAULT_OUTPUT_LATENCY_MS;
u16 buffer_ms = DEFAULT_BUFFER_MS;
u16 stretch_sequence_length_ms = DEFAULT_STRETCH_SEQUENCE_LENGTH;
u16 stretch_seekwindow_ms = DEFAULT_STRETCH_SEEKWINDOW;
u16 stretch_overlap_ms = DEFAULT_STRETCH_OVERLAP;
bool stretch_use_quickseek = DEFAULT_STRETCH_USE_QUICKSEEK;
bool stretch_use_aa_filter = DEFAULT_STRETCH_USE_AA_FILTER;
static constexpr AudioStretchMode DEFAULT_STRETCH_MODE = AudioStretchMode::TimeStretch;
#ifndef __ANDROID__
static constexpr u16 DEFAULT_BUFFER_MS = 50;
static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
#else
static constexpr u16 DEFAULT_BUFFER_MS = 100;
static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
#endif
static constexpr bool DEFAULT_OUTPUT_LATENCY_MINIMAL = false;
static constexpr u16 DEFAULT_STRETCH_SEQUENCE_LENGTH = 30;
static constexpr u16 DEFAULT_STRETCH_SEEKWINDOW = 20;
static constexpr u16 DEFAULT_STRETCH_OVERLAP = 10;
static constexpr bool DEFAULT_STRETCH_USE_QUICKSEEK = false;
static constexpr bool DEFAULT_STRETCH_USE_AA_FILTER = false;
void Load(const SettingsInterface& si, const char* section);
void Save(SettingsInterface& si, const char* section) const;
void Clear(SettingsInterface& si, const char* section);
bool operator==(const AudioStreamParameters& rhs) const;
bool operator!=(const AudioStreamParameters& rhs) const;
};
class CoreAudioStream final : private AudioStreamSource
{
public:
using SampleType = AudioStreamSource::SampleType;
static constexpr u32 NUM_CHANNELS = 2;
static constexpr u32 CHUNK_SIZE = 64;
CoreAudioStream();
~CoreAudioStream();
static u32 GetAlignedBufferSize(u32 size);
static u32 GetBufferSizeForMS(u32 sample_rate, u32 ms);
static u32 GetMSForBufferSize(u32 sample_rate, u32 buffer_size);
static const char* GetStretchModeName(AudioStretchMode mode);
static const char* GetStretchModeDisplayName(AudioStretchMode mode);
static std::optional<AudioStretchMode> ParseStretchMode(const char* name);
ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
ALWAYS_INLINE u32 GetBufferSize() const { return m_buffer_size; }
ALWAYS_INLINE u32 GetTargetBufferSize() const { return m_target_buffer_size; }
ALWAYS_INLINE u32 GetOutputVolume() const { return m_volume; }
ALWAYS_INLINE float GetNominalTempo() const { return m_nominal_rate; }
ALWAYS_INLINE bool IsPaused() const { return m_paused; }
u32 GetBufferedFramesRelaxed() const;
/// Creation/destruction.
bool Initialize(AudioBackend backend, u32 sample_rate, const AudioStreamParameters& params, const char* driver_name,
const char* device_name, Error* error);
void Destroy();
/// Temporarily pauses the stream, preventing it from requesting data.
void SetPaused(bool paused);
void SetOutputVolume(u32 volume);
void BeginWrite(SampleType** buffer_ptr, u32* num_frames);
void EndWrite(u32 num_frames);
void EmptyBuffer();
/// Nominal rate is used for both resampling and timestretching, input samples are assumed to be this amount faster
/// than the sample rate.
void SetNominalRate(float tempo);
void SetStretchMode(AudioStretchMode mode);
/// Wipes out the time stretching buffer, call when reducing target speed.
void EmptyStretchBuffers();
private:
static constexpr u32 AVERAGING_BUFFER_SIZE = 256;
static constexpr u32 STRETCH_RESET_THRESHOLD = 5;
ALWAYS_INLINE bool IsStretchEnabled() const { return m_parameters.stretch_mode != AudioStretchMode::Off; }
void AllocateBuffer();
void DestroyBuffer();
void InternalWriteFrames(SampleType* samples, u32 num_frames);
void StretchAllocate();
void StretchDestroy();
void StretchWriteBlock(const float* block);
void StretchUnderrun();
void StretchOverrun();
float AddAndGetAverageTempo(float val);
void UpdateStretchTempo();
void ReadFrames(SampleType* samples, u32 num_frames) override;
std::unique_ptr<AudioStream> m_stream;
u32 m_sample_rate = 0;
u32 m_volume = 0;
AudioStreamParameters m_parameters;
bool m_stretch_inactive = false;
bool m_filling = false;
bool m_paused = false;
u32 m_buffer_size = 0;
Common::unique_aligned_ptr<s16[]> m_buffer;
// temporary staging buffer, used for timestretching
Common::unique_aligned_ptr<s16[]> m_staging_buffer;
// float buffer, soundtouch only accepts float samples as input
Common::unique_aligned_ptr<float[]> m_float_buffer;
std::atomic<u32> m_rpos{0};
std::atomic<u32> m_wpos{0};
void* m_soundtouch = nullptr;
u32 m_target_buffer_size = 0;
u32 m_stretch_reset = STRETCH_RESET_THRESHOLD;
u64 m_stretch_reset_time = 0;
u32 m_stretch_ok_count = 0;
float m_nominal_rate = 1.0f;
float m_dynamic_target_usage = 0.0f;
u32 m_average_position = 0;
u32 m_average_available = 0;
u32 m_staging_buffer_pos = 0;
std::array<float, AVERAGING_BUFFER_SIZE> m_average_fullness = {};
};

View File

@@ -19,15 +19,18 @@ LOG_CHANNEL(CubebAudioStream);
namespace {
class CubebAudioStream : public AudioStream
class CubebAudioStream final : public AudioStream
{
public:
CubebAudioStream(u32 sample_rate, const AudioStreamParameters& parameters);
~CubebAudioStream();
CubebAudioStream();
~CubebAudioStream() override;
void SetPaused(bool paused) override;
bool Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
const char* driver_name, const char* device_name, AudioStreamSource* source, bool auto_start,
Error* error);
bool Initialize(const char* driver_name, const char* device_name, Error* error);
bool Start(Error* error) override;
bool Stop(Error* error) override;
private:
static void LogCallback(const char* fmt, ...);
@@ -35,20 +38,18 @@ private:
long nframes);
static void StateCallback(cubeb_stream* stream, void* user_ptr, cubeb_state state);
void DestroyContextAndStream();
cubeb* m_context = nullptr;
cubeb_stream* stream = nullptr;
};
} // namespace
static TinyString GetCubebErrorString(int rv)
static void FormatCubebError(Error* error, const char* prefix, int rv)
{
TinyString ret;
const char* str;
switch (rv)
{
// clang-format off
#define C(e) case e: ret.assign(#e); break
#define C(e) case e: str = #e; break
// clang-format on
C(CUBEB_OK);
@@ -59,37 +60,18 @@ static TinyString GetCubebErrorString(int rv)
C(CUBEB_ERROR_DEVICE_UNAVAILABLE);
default:
ret = "CUBEB_ERROR_UNKNOWN";
str = "CUBEB_ERROR_UNKNOWN";
break;
#undef C
}
ret.append_format(" ({})", rv);
return ret;
Error::SetStringFmt(error, "{}: {} ({})", prefix, str, rv);
}
CubebAudioStream::CubebAudioStream(u32 sample_rate, const AudioStreamParameters& parameters)
: AudioStream(sample_rate, parameters)
{
}
CubebAudioStream::CubebAudioStream() = default;
CubebAudioStream::~CubebAudioStream()
{
DestroyContextAndStream();
}
void CubebAudioStream::LogCallback(const char* fmt, ...)
{
LargeString str;
std::va_list ap;
va_start(ap, fmt);
str.vsprintf(fmt, ap);
va_end(ap);
DEV_LOG(str);
}
void CubebAudioStream::DestroyContextAndStream()
{
if (stream)
{
@@ -105,63 +87,71 @@ void CubebAudioStream::DestroyContextAndStream()
}
}
bool CubebAudioStream::Initialize(const char* driver_name, const char* device_name, Error* error)
void CubebAudioStream::LogCallback(const char* fmt, ...)
{
LargeString str;
std::va_list ap;
va_start(ap, fmt);
str.vsprintf(fmt, ap);
va_end(ap);
DEV_LOG(str);
}
bool CubebAudioStream::Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
const char* driver_name, const char* device_name, AudioStreamSource* source,
bool auto_start, Error* error)
{
cubeb_set_log_callback(CUBEB_LOG_NORMAL, LogCallback);
int rv =
cubeb_init(&m_context, "DuckStation", g_settings.audio_driver.empty() ? nullptr : g_settings.audio_driver.c_str());
int rv = cubeb_init(&m_context, "DuckStation", (driver_name && *driver_name != '\0') ? driver_name : nullptr);
if (rv != CUBEB_OK)
{
Error::SetStringFmt(error, "Could not initialize cubeb context: {}", GetCubebErrorString(rv));
FormatCubebError(error, "Could not initialize cubeb context: ", rv);
return false;
}
cubeb_stream_params params = {};
params.format = CUBEB_SAMPLE_S16LE;
params.rate = m_sample_rate;
params.channels = NUM_CHANNELS;
params.rate = sample_rate;
params.channels = channels;
params.layout = CUBEB_LAYOUT_STEREO;
params.prefs = CUBEB_STREAM_PREF_NONE;
u32 latency_frames = GetBufferSizeForMS(
m_sample_rate, (m_parameters.output_latency_ms == 0) ? m_parameters.buffer_ms : m_parameters.output_latency_ms);
u32 min_latency_frames = 0;
rv = cubeb_get_min_latency(m_context, &params, &min_latency_frames);
if (rv == CUBEB_ERROR_NOT_SUPPORTED)
{
DEV_LOG("Cubeb backend does not support latency queries, using latency of {} ms ({} frames).",
m_parameters.buffer_ms, latency_frames);
FramesToMS(sample_rate, output_latency_frames), output_latency_frames);
}
else
{
if (rv != CUBEB_OK)
{
Error::SetStringFmt(error, "cubeb_get_min_latency() failed: {}", GetCubebErrorString(rv));
DestroyContextAndStream();
FormatCubebError(error, "cubeb_get_min_latency() failed: {}", rv);
return false;
}
const u32 minimum_latency_ms = GetMSForBufferSize(m_sample_rate, min_latency_frames);
DEV_LOG("Minimum latency: {} ms ({} audio frames)", minimum_latency_ms, min_latency_frames);
if (m_parameters.output_latency_minimal)
if (output_latency_minimal)
{
// use minimum
latency_frames = min_latency_frames;
output_latency_frames = min_latency_frames;
}
else if (minimum_latency_ms > m_parameters.output_latency_ms)
else if (min_latency_frames > output_latency_frames)
{
WARNING_LOG("Minimum latency is above requested latency: {} vs {}, adjusting to compensate.", min_latency_frames,
latency_frames);
latency_frames = min_latency_frames;
output_latency_frames);
output_latency_frames = min_latency_frames;
}
}
DEV_LOG("Output latency: {} ms ({} audio frames)", FramesToMS(sample_rate, output_latency_frames),
min_latency_frames);
cubeb_devid selected_device = nullptr;
const std::string& selected_device_name = g_settings.audio_output_device;
cubeb_device_collection devices;
bool devices_valid = false;
if (!selected_device_name.empty())
if (device_name && *device_name != '\0')
{
rv = cubeb_enumerate_devices(m_context, CUBEB_DEVICE_TYPE_OUTPUT, &devices);
devices_valid = (rv == CUBEB_OK);
@@ -170,7 +160,7 @@ bool CubebAudioStream::Initialize(const char* driver_name, const char* device_na
for (size_t i = 0; i < devices.count; i++)
{
const cubeb_device_info& di = devices.device[i];
if (di.device_id && selected_device_name == di.device_id)
if (di.device_id && std::strcmp(device_name, di.device_id) == 0)
{
INFO_LOG("Using output device '{}' ({}).", di.device_id, di.friendly_name ? di.friendly_name : di.device_id);
selected_device = di.devid;
@@ -180,41 +170,41 @@ bool CubebAudioStream::Initialize(const char* driver_name, const char* device_na
if (!selected_device)
{
Host::AddOSDMessage(
OSDMessageType::Error,
fmt::format("Requested audio output device '{}' not found, using default.", selected_device_name));
Host::AddOSDMessage(OSDMessageType::Error,
fmt::format("Requested audio output device '{}' not found, using default.", device_name));
}
}
else
{
WARNING_LOG("cubeb_enumerate_devices() returned {}, using default device.", GetCubebErrorString(rv));
Error enumerate_error;
FormatCubebError(&enumerate_error, "cubeb_enumerate_devices() failed: ", rv);
WARNING_LOG("{}, using default device.", enumerate_error.GetDescription());
}
}
BaseInitialize();
char stream_name[32];
std::snprintf(stream_name, sizeof(stream_name), "%p", this);
rv = cubeb_stream_init(m_context, &stream, stream_name, nullptr, nullptr, selected_device, &params, latency_frames,
&CubebAudioStream::DataCallback, StateCallback, this);
rv = cubeb_stream_init(m_context, &stream, stream_name, nullptr, nullptr, selected_device, &params,
output_latency_frames, &CubebAudioStream::DataCallback, StateCallback, source);
if (devices_valid)
cubeb_device_collection_destroy(m_context, &devices);
if (rv != CUBEB_OK)
{
Error::SetStringFmt(error, "cubeb_stream_init() failed: {}", GetCubebErrorString(rv));
DestroyContextAndStream();
FormatCubebError(error, "cubeb_stream_init() failed: ", rv);
return false;
}
rv = cubeb_stream_start(stream);
if (rv != CUBEB_OK)
if (auto_start)
{
Error::SetStringFmt(error, "cubeb_stream_start() failed: {}", GetCubebErrorString(rv));
DestroyContextAndStream();
return false;
rv = cubeb_stream_start(stream);
if (rv != CUBEB_OK)
{
FormatCubebError(error, "cubeb_stream_start() failed: ", rv);
return false;
}
}
return true;
@@ -228,33 +218,47 @@ void CubebAudioStream::StateCallback(cubeb_stream* stream, void* user_ptr, cubeb
long CubebAudioStream::DataCallback(cubeb_stream* stm, void* user_ptr, const void* input_buffer, void* output_buffer,
long nframes)
{
static_cast<CubebAudioStream*>(user_ptr)->ReadFrames(static_cast<s16*>(output_buffer), static_cast<u32>(nframes));
static_cast<AudioStreamSource*>(user_ptr)->ReadFrames(static_cast<s16*>(output_buffer), static_cast<u32>(nframes));
return nframes;
}
void CubebAudioStream::SetPaused(bool paused)
bool CubebAudioStream::Start(Error* error)
{
if (paused == m_paused || !stream)
return;
const int rv = paused ? cubeb_stream_stop(stream) : cubeb_stream_start(stream);
const int rv = cubeb_stream_start(stream);
if (rv != CUBEB_OK)
{
ERROR_LOG("Could not {} stream: {}", paused ? "pause" : "resume", rv);
return;
FormatCubebError(error, "cubeb_stream_start() failed: ", rv);
return false;
}
m_paused = paused;
return true;
}
std::unique_ptr<AudioStream> AudioStream::CreateCubebAudioStream(u32 sample_rate,
const AudioStreamParameters& parameters,
bool CubebAudioStream::Stop(Error* error)
{
const int rv = cubeb_stream_stop(stream);
if (rv != CUBEB_OK)
{
FormatCubebError(error, "cubeb_stream_stop() failed: ", rv);
return false;
}
return true;
}
std::unique_ptr<AudioStream> AudioStream::CreateCubebAudioStream(u32 sample_rate, u32 channels,
u32 output_latency_frames, bool output_latency_minimal,
const char* driver_name, const char* device_name,
AudioStreamSource* source, bool auto_start,
Error* error)
{
std::unique_ptr<CubebAudioStream> stream = std::make_unique<CubebAudioStream>(sample_rate, parameters);
if (!stream->Initialize(driver_name, device_name, error))
std::unique_ptr<CubebAudioStream> stream = std::make_unique<CubebAudioStream>();
if (!stream->Initialize(sample_rate, channels, output_latency_frames, output_latency_minimal, driver_name,
device_name, source, auto_start, error))
{
stream.reset();
}
return stream;
}
@@ -271,6 +275,8 @@ std::vector<std::pair<std::string, std::string>> AudioStream::GetCubebDriverName
std::vector<AudioStream::DeviceInfo> AudioStream::GetCubebOutputDevices(const char* driver, u32 sample_rate)
{
Error error;
std::vector<AudioStream::DeviceInfo> ret;
ret.emplace_back(std::string(), TRANSLATE_STR("AudioStream", "Default"), 0);
@@ -278,7 +284,8 @@ std::vector<AudioStream::DeviceInfo> AudioStream::GetCubebOutputDevices(const ch
int rv = cubeb_init(&context, "DuckStation", (driver && *driver) ? driver : nullptr);
if (rv != CUBEB_OK)
{
ERROR_LOG("cubeb_init() failed: {}", GetCubebErrorString(rv));
FormatCubebError(&error, "cubeb_init() failed: ", rv);
ERROR_LOG(error.GetDescription());
return ret;
}
@@ -288,7 +295,8 @@ std::vector<AudioStream::DeviceInfo> AudioStream::GetCubebOutputDevices(const ch
rv = cubeb_enumerate_devices(context, CUBEB_DEVICE_TYPE_OUTPUT, &devices);
if (rv != CUBEB_OK)
{
ERROR_LOG("cubeb_enumerate_devices() failed: {}", GetCubebErrorString(rv));
FormatCubebError(&error, "cubeb_enumerate_devices() failed: ", rv);
ERROR_LOG(error.GetDescription());
return ret;
}

View File

@@ -16,18 +16,21 @@ namespace {
class SDLAudioStream final : public AudioStream
{
public:
SDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters);
~SDLAudioStream();
SDLAudioStream(AudioStreamSource* source, u32 channels);
~SDLAudioStream() override;
void SetPaused(bool paused) override;
bool Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
bool auto_start, Error* error);
bool OpenDevice(Error* error);
void CloseDevice();
bool Start(Error* error) override;
bool Stop(Error* error) override;
protected:
static void AudioCallback(void* userdata, SDL_AudioStream* stream, int additional_amount, int total_amount);
AudioStreamSource* m_source;
SDL_AudioStream* m_sdl_stream = nullptr;
u32 m_channels;
};
} // namespace
@@ -50,60 +53,11 @@ static bool InitializeSDLAudio(Error* error)
return true;
}
SDLAudioStream::SDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters)
: AudioStream(sample_rate, parameters)
SDLAudioStream::SDLAudioStream(AudioStreamSource* source, u32 channels) : m_source(source), m_channels(channels)
{
}
SDLAudioStream::~SDLAudioStream()
{
SDLAudioStream::CloseDevice();
}
std::unique_ptr<AudioStream> AudioStream::CreateSDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
Error* error)
{
if (!InitializeSDLAudio(error))
return {};
std::unique_ptr<SDLAudioStream> stream = std::make_unique<SDLAudioStream>(sample_rate, parameters);
if (!stream->OpenDevice(error))
stream.reset();
return stream;
}
bool SDLAudioStream::OpenDevice(Error* error)
{
DebugAssert(!m_sdl_stream);
const SDL_AudioSpec spec = {
.format = SDL_AUDIO_S16LE, .channels = NUM_CHANNELS, .freq = static_cast<int>(m_sample_rate)};
m_sdl_stream =
SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec, AudioCallback, static_cast<void*>(this));
if (!m_sdl_stream)
{
Error::SetStringFmt(error, "SDL_OpenAudioDeviceStream() failed: {}", SDL_GetError());
return false;
}
BaseInitialize();
SDL_ResumeAudioDevice(SDL_GetAudioStreamDevice(m_sdl_stream));
return true;
}
void SDLAudioStream::SetPaused(bool paused)
{
if (m_paused == paused)
return;
paused ? SDL_PauseAudioStreamDevice(m_sdl_stream) : SDL_ResumeAudioStreamDevice(m_sdl_stream);
m_paused = paused;
}
void SDLAudioStream::CloseDevice()
{
if (m_sdl_stream)
{
@@ -112,6 +66,47 @@ void SDLAudioStream::CloseDevice()
}
}
bool SDLAudioStream::Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
bool auto_start, Error* error)
{
const SDL_AudioSpec spec = {
.format = SDL_AUDIO_S16LE, .channels = static_cast<int>(channels), .freq = static_cast<int>(sample_rate)};
m_sdl_stream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec, AudioCallback, this);
if (!m_sdl_stream)
{
Error::SetStringFmt(error, "SDL_OpenAudioDeviceStream() failed: {}", SDL_GetError());
return false;
}
if (auto_start)
SDL_ResumeAudioDevice(SDL_GetAudioStreamDevice(m_sdl_stream));
return true;
}
bool SDLAudioStream::Start(Error* error)
{
if (!SDL_ResumeAudioStreamDevice(m_sdl_stream))
{
Error::SetStringFmt(error, "SDL_ResumeAudioStreamDevice() failed: {}", SDL_GetError());
return false;
}
return true;
}
bool SDLAudioStream::Stop(Error* error)
{
if (!SDL_PauseAudioStreamDevice(m_sdl_stream))
{
Error::SetStringFmt(error, "SDL_PauseAudioStreamDevice() failed: {}", SDL_GetError());
return false;
}
return true;
}
void SDLAudioStream::AudioCallback(void* userdata, SDL_AudioStream* stream, int additional_amount, int total_amount)
{
if (additional_amount == 0)
@@ -121,9 +116,23 @@ void SDLAudioStream::AudioCallback(void* userdata, SDL_AudioStream* stream, int
if (data)
{
SDLAudioStream* const this_ptr = static_cast<SDLAudioStream*>(userdata);
const u32 num_frames = static_cast<u32>(additional_amount) / (sizeof(SampleType) * NUM_CHANNELS);
this_ptr->ReadFrames(reinterpret_cast<SampleType*>(data), num_frames);
const u32 num_frames = static_cast<u32>(additional_amount) / (sizeof(SampleType) * this_ptr->m_channels);
this_ptr->m_source->ReadFrames(reinterpret_cast<SampleType*>(data), num_frames);
SDL_PutAudioStreamData(stream, data, additional_amount);
SDL_stack_free(data);
}
}
std::unique_ptr<AudioStream> AudioStream::CreateSDLAudioStream(u32 sample_rate, u32 channels, u32 output_latency_frames,
bool output_latency_minimal, AudioStreamSource* source,
bool auto_start, Error* error)
{
if (!InitializeSDLAudio(error))
return {};
std::unique_ptr<SDLAudioStream> stream = std::make_unique<SDLAudioStream>(source, channels);
if (!stream->Initialize(sample_rate, channels, output_latency_frames, output_latency_minimal, auto_start, error))
stream.reset();
return stream;
}

View File

@@ -4,6 +4,7 @@
<ItemGroup>
<ClInclude Include="animated_image.h" />
<ClInclude Include="compress_helpers.h" />
<ClInclude Include="core_audio_stream.h" />
<ClInclude Include="dyn_shaderc.h" />
<ClInclude Include="dyn_spirv_cross.h" />
<ClInclude Include="elf_file.h" />
@@ -127,6 +128,7 @@
<ClCompile Include="cd_image_memory.cpp" />
<ClCompile Include="cd_image_pbp.cpp" />
<ClCompile Include="compress_helpers.cpp" />
<ClCompile Include="core_audio_stream.cpp" />
<ClCompile Include="cubeb_audio_stream.cpp" />
<ClCompile Include="cue_parser.cpp" />
<ClCompile Include="cd_image_ppf.cpp" />

View File

@@ -80,6 +80,7 @@
<ClInclude Include="postprocessing_shader_slang.h" />
<ClInclude Include="imgui_gsvector.h" />
<ClInclude Include="translation.h" />
<ClInclude Include="core_audio_stream.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="state_wrapper.cpp" />
@@ -165,6 +166,7 @@
<ClCompile Include="spirv_module.cpp" />
<ClCompile Include="postprocessing_shader_slang.cpp" />
<ClCompile Include="translation.cpp" />
<ClCompile Include="core_audio_stream.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="metal_shaders.metal" />