AudioStream: Split into source and backend streams

2026-07-10 10:37:10 +00:00 · 2025-12-22 14:09:19 +10:00
parent 471fba1445
commit abcaceeb80
16 changed files with 1312 additions and 1203 deletions
--- a/src/core/fullscreenui_settings.cpp
+++ b/src/core/fullscreenui_settings.cpp
@@ -2527,7 +2527,7 @@ void FullscreenUI::DrawCoverDownloaderWindow()
    // TODO: Remove release once using move_only_function
    std::unique_ptr<ProgressCallback> progress = OpenModalProgressDialog(FSUI_STR("Cover Downloader"), 1000.0f);
    Host::QueueAsyncTask([progress = progress.release(), urls = StringUtil::SplitNewString(template_urls, '\n'),
-                            use_serial_names = use_serial_names]() {
+                          use_serial_names = use_serial_names]() {
      Error error;
      if (!GameList::DownloadCovers(
            urls, use_serial_names, progress, &error, [](const GameList::Entry* entry, std::string save_path) {
@@ -4573,8 +4573,9 @@ void FullscreenUI::DrawAudioSettingsPage()
    &AudioStream::GetBackendDisplayName, AudioBackend::Count);
  DrawEnumSetting(bsi, FSUI_ICONVSTR(ICON_PF_SFX_SOUND_EFFECT_NOISE, "Stretch Mode"),
                  FSUI_CSTR("Determines quality of audio when not running at 100% speed."), "Audio", "StretchMode",
-                  AudioStreamParameters::DEFAULT_STRETCH_MODE, &AudioStream::ParseStretchMode,
-                  &AudioStream::GetStretchModeName, &AudioStream::GetStretchModeDisplayName, AudioStretchMode::Count);
+                  AudioStreamParameters::DEFAULT_STRETCH_MODE, &CoreAudioStream::ParseStretchMode,
+                  &CoreAudioStream::GetStretchModeName, &CoreAudioStream::GetStretchModeDisplayName,
+                  AudioStretchMode::Count);
  DrawIntRangeSetting(bsi, FSUI_ICONVSTR(ICON_FA_BUCKET, "Buffer Size"),
                      FSUI_VSTR("Determines the amount of audio buffered before being pulled by the host API."),
                      "Audio", "BufferMS", AudioStreamParameters::DEFAULT_BUFFER_MS, 10, 500, FSUI_CSTR("%d ms"));
--- a/src/core/hotkeys.cpp
+++ b/src/core/hotkeys.cpp
@@ -540,7 +540,7 @@ DEFINE_HOTKEY("AudioMute", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_NOOP("H
                {
                  g_settings.audio_output_muted = !g_settings.audio_output_muted;
                  const s32 volume = System::GetAudioOutputVolume();
-                  SPU::GetOutputStream()->SetOutputVolume(volume);
+                  SPU::GetOutputStream().SetOutputVolume(volume);
                  if (g_settings.audio_output_muted)
                  {
                    Host::AddIconOSDMessage(OSDMessageType::Quick, "AudioControlHotkey", ICON_EMOJI_MUTED_SPEAKER,
@@ -576,7 +576,7 @@ DEFINE_HOTKEY("AudioVolumeUp", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_NOO
                    Truncate8(std::min<s32>(static_cast<s32>(System::GetAudioOutputVolume()) + 10, 200));
                  g_settings.audio_output_volume = volume;
                  g_settings.audio_fast_forward_volume = volume;
-                  SPU::GetOutputStream()->SetOutputVolume(volume);
+                  SPU::GetOutputStream().SetOutputVolume(volume);
                  Host::AddIconOSDMessage(OSDMessageType::Quick, "AudioControlHotkey", ICON_EMOJI_HIGH_VOLUME_SPEAKER,
                                          fmt::format(TRANSLATE_FS("OSDMessage", "Volume: {}%"), volume));
                }
@@ -590,7 +590,7 @@ DEFINE_HOTKEY("AudioVolumeDown", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_N
                  const u8 volume = Truncate8(std::max<s32>(static_cast<s32>(System::GetAudioOutputVolume()) - 10, 0));
                  g_settings.audio_output_volume = volume;
                  g_settings.audio_fast_forward_volume = volume;
-                  SPU::GetOutputStream()->SetOutputVolume(volume);
+                  SPU::GetOutputStream().SetOutputVolume(volume);
                  Host::AddIconOSDMessage(OSDMessageType::Quick, "AudioControlHotkey", ICON_EMOJI_MEDIUM_VOLUME_SPEAKER,
                                          fmt::format(TRANSLATE_FS("OSDMessage", "Volume: {}%"), volume));
                }
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -5,7 +5,7 @@

 #include "types.h"

-#include "util/audio_stream.h"
+#include "util/core_audio_stream.h"

 #include "common/small_string.h"

--- a/src/core/spu.cpp
+++ b/src/core/spu.cpp
@@ -365,10 +365,8 @@ static void ManualTransferWrite(u16 value);
 static void UpdateTransferEvent();
 static void UpdateDMARequest();

-static void CreateOutputStream();
-
 namespace {
-struct SPUState
+struct ALIGN_TO_CACHE_LINE SPUState
 {
  TimingEvent transfer_event{"SPU Transfer", TRANSFER_TICKS_PER_HALFWORD, TRANSFER_TICKS_PER_HALFWORD,
                             &SPU::ExecuteTransfer, nullptr};
@@ -416,15 +414,15 @@ struct SPUState
  std::array<std::array<s16, 64>, 2> reverb_upsample_buffer;
  s32 reverb_resample_buffer_position = 0;

+  s16 last_reverb_input[2];
+  s32 last_reverb_output[2];
+  bool audio_output_muted = false;
+
  ALIGN_TO_CACHE_LINE std::array<Voice, NUM_VOICES> voices{};

  InlineFIFOQueue<u16, FIFO_SIZE_IN_HALFWORDS> transfer_fifo;

-  std::unique_ptr<AudioStream> audio_stream;
-
-  s16 last_reverb_input[2];
-  s32 last_reverb_output[2];
-  bool audio_output_muted = false;
+  CoreAudioStream audio_stream;

 #ifdef SPU_DUMP_ALL_VOICES
  // +1 for reverb output
@@ -440,7 +438,7 @@ struct SPUState
 };
 } // namespace

-ALIGN_TO_CACHE_LINE static SPUState s_state;
+static SPUState s_state;
 ALIGN_TO_CACHE_LINE static std::array<u8, RAM_SIZE> s_ram{};
 ALIGN_TO_CACHE_LINE static std::array<s16, (44100 / 60) * 2> s_muted_output_buffer{};

@@ -503,13 +501,11 @@ void SPU::CreateOutputStream()
           AudioStream::GetBackendName(g_settings.audio_backend), static_cast<u32>(SAMPLE_RATE),
           g_settings.audio_stream_parameters.buffer_ms, g_settings.audio_stream_parameters.output_latency_ms,
           g_settings.audio_stream_parameters.output_latency_minimal ? " (or minimal)" : "",
-           AudioStream::GetStretchModeName(g_settings.audio_stream_parameters.stretch_mode));
+           CoreAudioStream::GetStretchModeName(g_settings.audio_stream_parameters.stretch_mode));

  Error error;
-  s_state.audio_stream =
-    AudioStream::CreateStream(g_settings.audio_backend, SAMPLE_RATE, g_settings.audio_stream_parameters,
-                              g_settings.audio_driver.c_str(), g_settings.audio_output_device.c_str(), &error);
-  if (!s_state.audio_stream)
+  if (!s_state.audio_stream.Initialize(g_settings.audio_backend, SAMPLE_RATE, g_settings.audio_stream_parameters,
+                                       g_settings.audio_driver.c_str(), g_settings.audio_output_device.c_str(), &error))
  {
    Host::AddIconOSDMessage(
      OSDMessageType::Error, "SPUAudioStream", ICON_EMOJI_WARNING,
@@ -517,19 +513,13 @@ void SPU::CreateOutputStream()
        TRANSLATE_FS("SPU",
                     "Failed to create or configure audio stream, falling back to null output. The error was:\n{}"),
        error.GetDescription()));
-    s_state.audio_stream.reset();
-    s_state.audio_stream = AudioStream::CreateNullStream(SAMPLE_RATE, g_settings.audio_stream_parameters.buffer_ms);
+    s_state.audio_stream.Initialize(AudioBackend::Null, SAMPLE_RATE, g_settings.audio_stream_parameters, nullptr,
+                                    nullptr, nullptr);
  }

-  s_state.audio_stream->SetOutputVolume(System::GetAudioOutputVolume());
-  s_state.audio_stream->SetNominalRate(System::GetAudioNominalRate());
-  s_state.audio_stream->SetPaused(System::IsPaused());
-}
-
-void SPU::RecreateOutputStream()
-{
-  s_state.audio_stream.reset();
-  CreateOutputStream();
+  s_state.audio_stream.SetOutputVolume(System::GetAudioOutputVolume());
+  s_state.audio_stream.SetNominalRate(System::GetAudioNominalRate());
+  s_state.audio_stream.SetPaused(System::IsPaused());
 }

 void SPU::CPUClockChanged()
@@ -550,7 +540,7 @@ void SPU::Shutdown()

  s_state.tick_event.Deactivate();
  s_state.transfer_event.Deactivate();
-  s_state.audio_stream.reset();
+  s_state.audio_stream.Destroy();
 }

 void SPU::Reset()
@@ -1674,9 +1664,9 @@ void SPU::SetAudioOutputMuted(bool muted)
  s_state.audio_output_muted = muted;
 }

-AudioStream* SPU::GetOutputStream()
+CoreAudioStream& SPU::GetOutputStream()
 {
-  return s_state.audio_stream.get();
+  return s_state.audio_stream;
 }

 void SPU::Voice::KeyOn()
@@ -2410,7 +2400,7 @@ void SPU::Execute(void* param, TickCount ticks, TickCount ticks_late)
    if (!s_state.audio_output_muted) [[likely]]
    {
      output_frame_space = remaining_frames;
-      s_state.audio_stream->BeginWrite(&output_frame_start, &output_frame_space);
+      s_state.audio_stream.BeginWrite(&output_frame_start, &output_frame_space);
    }
    else
    {
@@ -2536,7 +2526,7 @@ void SPU::Execute(void* param, TickCount ticks, TickCount ticks_late)
    }

 #ifndef __ANDROID__
-    if (MediaCapture* cap = System::GetMediaCapture(); cap && !s_state.audio_output_muted) [[unlikely]]
+    if (MediaCapture* cap = System::GetMediaCapture()) [[unlikely]]
    {
      if (!cap->DeliverAudioFrames(output_frame_start, frames_in_this_batch))
        System::StopMediaCapture();
@@ -2544,7 +2534,7 @@ void SPU::Execute(void* param, TickCount ticks, TickCount ticks_late)
 #endif

    if (!s_state.audio_output_muted) [[likely]]
-      s_state.audio_stream->EndWrite(frames_in_this_batch);
+      s_state.audio_stream.EndWrite(frames_in_this_batch);
    remaining_frames -= frames_in_this_batch;
  }
 }
@@ -2554,7 +2544,7 @@ void SPU::UpdateEventInterval()
  // Don't generate more than the audio buffer since in a single slice, otherwise we'll both overflow the buffers when
  // we do write it, and the audio thread will underflow since it won't have enough data it the game isn't messing with
  // the SPU state.
-  const u32 max_slice_frames = s_state.audio_stream->GetBufferSize();
+  const u32 max_slice_frames = s_state.audio_stream.GetBufferSize();

  // TODO: Make this predict how long until the interrupt will be hit instead...
  const u32 interval = (s_state.SPUCNT.enable && s_state.SPUCNT.irq9_enable) ? 1 : max_slice_frames;
--- a/src/core/spu.h
+++ b/src/core/spu.h
@@ -9,7 +9,7 @@

 class StateWrapper;

-class AudioStream;
+class CoreAudioStream;

 namespace SPU {

@@ -47,7 +47,7 @@ std::array<u8, RAM_SIZE>& GetWritableRAM();
 bool IsAudioOutputMuted();
 void SetAudioOutputMuted(bool muted);

-AudioStream* GetOutputStream();
-void RecreateOutputStream();
+CoreAudioStream& GetOutputStream();
+void CreateOutputStream();

 }; // namespace SPU
--- a/src/core/system.cpp
+++ b/src/core/system.cpp
@@ -1639,7 +1639,7 @@ void System::PauseSystem(bool paused)
    return;

  s_state.state = (paused ? State::Paused : State::Running);
-  SPU::GetOutputStream()->SetPaused(paused);
+  SPU::GetOutputStream().SetPaused(paused);
  GPUThread::RunOnThread([paused]() { GPUThread::SetRunIdleReason(GPUThread::RunIdleReason::SystemPaused, paused); });

  if (paused)
@@ -1905,7 +1905,7 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error)
  // Good to go.
  s_state.state = State::Running;
  std::atomic_thread_fence(std::memory_order_release);
-  SPU::GetOutputStream()->SetPaused(false);
+  SPU::GetOutputStream().SetPaused(false);

  // Immediately pausing?
  const bool start_paused = (ShouldStartPaused() || parameters.override_start_paused.value_or(false));
@@ -3666,9 +3666,9 @@ void System::AccumulatePreFrameSleepTime(Timer::Value current_time)

 void System::FormatLatencyStats(SmallStringBase& str)
 {
-  AudioStream* audio_stream = SPU::GetOutputStream();
+  CoreAudioStream& audio_stream = SPU::GetOutputStream();
  const u32 audio_latency =
-    AudioStream::GetMSForBufferSize(audio_stream->GetSampleRate(), audio_stream->GetBufferedFramesRelaxed());
+    CoreAudioStream::GetMSForBufferSize(audio_stream.GetSampleRate(), audio_stream.GetBufferedFramesRelaxed());
  const u32 queued_frame_count = GPUBackend::GetQueuedFrameCount();

  const double active_frame_time = std::ceil(Timer::ConvertValueToMilliseconds(s_state.last_active_frame_time));
@@ -3725,13 +3725,13 @@ void System::UpdateSpeedLimiterState()
  VERBOSE_LOG("Preset timing: {}", s_state.optimal_frame_pacing ? "consistent" : "immediate");

  // Update audio output.
-  AudioStream* stream = SPU::GetOutputStream();
-  stream->SetOutputVolume(GetAudioOutputVolume());
-  stream->SetNominalRate(GetAudioNominalRate());
+  CoreAudioStream& stream = SPU::GetOutputStream();
+  stream.SetOutputVolume(GetAudioOutputVolume());
+  stream.SetNominalRate(GetAudioNominalRate());

  // Only empty stretch buffers when we're decreasing speed.
  if (s_state.target_speed != prev_speed && (prev_speed > s_state.target_speed || prev_speed == 0.0f))
-    stream->EmptyStretchBuffers();
+    stream.EmptyStretchBuffers();

  UpdateThrottlePeriod();
  ResetThrottler();
@@ -4494,7 +4494,8 @@ void System::CheckForSettingsChanges(const Settings& old_settings)

    if (g_settings.audio_backend != old_settings.audio_backend ||
        g_settings.audio_driver != old_settings.audio_driver ||
-        g_settings.audio_output_device != old_settings.audio_output_device)
+        g_settings.audio_output_device != old_settings.audio_output_device ||
+        g_settings.audio_stream_parameters != old_settings.audio_stream_parameters)
    {
      if (g_settings.audio_backend != old_settings.audio_backend)
      {
@@ -4503,14 +4504,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
                                            AudioStream::GetBackendDisplayName(g_settings.audio_backend)));
      }

-      SPU::RecreateOutputStream();
-    }
-    if (g_settings.audio_stream_parameters.stretch_mode != old_settings.audio_stream_parameters.stretch_mode)
-      SPU::GetOutputStream()->SetStretchMode(g_settings.audio_stream_parameters.stretch_mode);
-    if (g_settings.audio_stream_parameters != old_settings.audio_stream_parameters)
-    {
-      SPU::RecreateOutputStream();
-      UpdateSpeedLimiterState();
+      SPU::CreateOutputStream();
    }

    if (g_settings.emulation_speed != old_settings.emulation_speed)
@@ -4553,7 +4547,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
      InterruptExecution();
    }

-    SPU::GetOutputStream()->SetOutputVolume(GetAudioOutputVolume());
+    SPU::GetOutputStream().SetOutputVolume(GetAudioOutputVolume());

    // CPU side GPU settings
    if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode ||
@@ -5432,7 +5426,7 @@ void System::UpdateVolume()
  if (!IsValid())
    return;

-  SPU::GetOutputStream()->SetOutputVolume(GetAudioOutputVolume());
+  SPU::GetOutputStream().SetOutputVolume(GetAudioOutputVolume());
 }

 std::string System::GetScreenshotPath(const char* extension)
--- a/src/duckstation-qt/audiosettingswidget.cpp
+++ b/src/duckstation-qt/audiosettingswidget.cpp
@@ -27,8 +27,9 @@ AudioSettingsWidget::AudioSettingsWidget(SettingsWindow* dialog, QWidget* parent
    sif, m_ui.audioBackend, "Audio", "Backend", &AudioStream::ParseBackendName, &AudioStream::GetBackendName,
    &AudioStream::GetBackendDisplayName, AudioStream::DEFAULT_BACKEND, AudioBackend::Count);
  SettingWidgetBinder::BindWidgetToEnumSetting(
-    sif, m_ui.stretchMode, "Audio", "StretchMode", &AudioStream::ParseStretchMode, &AudioStream::GetStretchModeName,
-    &AudioStream::GetStretchModeDisplayName, AudioStreamParameters::DEFAULT_STRETCH_MODE, AudioStretchMode::Count);
+    sif, m_ui.stretchMode, "Audio", "StretchMode", &CoreAudioStream::ParseStretchMode,
+    &CoreAudioStream::GetStretchModeName, &CoreAudioStream::GetStretchModeDisplayName,
+    AudioStreamParameters::DEFAULT_STRETCH_MODE, AudioStretchMode::Count);
  SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.bufferMS, "Audio", "BufferMS",
                                              AudioStreamParameters::DEFAULT_BUFFER_MS);
  SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.outputLatencyMS, "Audio", "OutputLatencyMS",
@@ -116,10 +117,10 @@ AudioSettingsWidget::~AudioSettingsWidget() = default;
 void AudioSettingsWidget::onStretchModeChanged()
 {
  const AudioStretchMode stretch_mode =
-    AudioStream::ParseStretchMode(
+    CoreAudioStream::ParseStretchMode(
      m_dialog
        ->getEffectiveStringValue("Audio", "StretchMode",
-                                  AudioStream::GetStretchModeName(AudioStreamParameters::DEFAULT_STRETCH_MODE))
+                                  CoreAudioStream::GetStretchModeName(AudioStreamParameters::DEFAULT_STRETCH_MODE))
        .c_str())
      .value_or(AudioStreamParameters::DEFAULT_STRETCH_MODE);
  m_ui.stretchSettings->setEnabled(stretch_mode != AudioStretchMode::Off);
@@ -219,7 +220,7 @@ void AudioSettingsWidget::updateLatencyLabel()
  m_ui.bufferMSLabel->setText(tr("%1 ms").arg(config_buffer_ms));

  const u32 output_latency_ms = minimal_output ?
-                                  AudioStream::GetMSForBufferSize(SPU::SAMPLE_RATE, m_output_device_latency) :
+                                  CoreAudioStream::GetMSForBufferSize(SPU::SAMPLE_RATE, m_output_device_latency) :
                                  config_output_latency_ms;
  if (output_latency_ms > 0)
  {
--- a/src/util/CMakeLists.txt
+++ b/src/util/CMakeLists.txt
@@ -17,6 +17,8 @@ add_library(util
  cd_image_ppf.cpp
  compress_helpers.cpp
  compress_helpers.h
+  core_audio_stream.cpp
+  core_audio_stream.h
  cue_parser.cpp
  cue_parser.h
  dyn_shaderc.h
--- a/src/util/audio_stream.cpp
+++ b/src/util/audio_stream.cpp
@@ -4,25 +4,11 @@
 #include "audio_stream.h"
 #include "translation.h"

-#include "common/align.h"
-#include "common/assert.h"
 #include "common/error.h"
-#include "common/gsvector.h"
-#include "common/log.h"
-#include "common/settings_interface.h"
-#include "common/timer.h"

-#include "soundtouch/SoundTouch.h"
-#include "soundtouch/SoundTouchDLL.h"
+AudioStream::AudioStream() = default;

-#include <algorithm>
-#include <cmath>
-#include <cstring>
-#include <limits>
-
-LOG_CHANNEL(AudioStream);
-
-static constexpr bool LOG_TIMESTRETCH_STATS = false;
+AudioStream::~AudioStream() = default;

 AudioStream::DeviceInfo::DeviceInfo(std::string name_, std::string display_name_, u32 minimum_latency_)
  : name(std::move(name_)), display_name(std::move(display_name_)), minimum_latency_frames(minimum_latency_)
@@ -31,174 +17,6 @@ AudioStream::DeviceInfo::DeviceInfo(std::string name_, std::string display_name_

 AudioStream::DeviceInfo::~DeviceInfo() = default;

-void AudioStreamParameters::Load(const SettingsInterface& si, const char* section)
-{
-  stretch_mode =
-    AudioStream::ParseStretchMode(
-      si.GetStringValue(section, "StretchMode", AudioStream::GetStretchModeName(DEFAULT_STRETCH_MODE)).c_str())
-      .value_or(DEFAULT_STRETCH_MODE);
-  output_latency_ms = static_cast<u16>(std::min<u32>(
-    si.GetUIntValue(section, "OutputLatencyMS", DEFAULT_OUTPUT_LATENCY_MS), std::numeric_limits<u16>::max()));
-  output_latency_minimal = si.GetBoolValue(section, "OutputLatencyMinimal", DEFAULT_OUTPUT_LATENCY_MINIMAL);
-  buffer_ms = static_cast<u16>(
-    std::min<u32>(si.GetUIntValue(section, "BufferMS", DEFAULT_BUFFER_MS), std::numeric_limits<u16>::max()));
-
-  stretch_sequence_length_ms =
-    static_cast<u16>(std::min<u32>(si.GetUIntValue(section, "StretchSequenceLengthMS", DEFAULT_STRETCH_SEQUENCE_LENGTH),
-                                   std::numeric_limits<u16>::max()));
-  stretch_seekwindow_ms = static_cast<u16>(std::min<u32>(
-    si.GetUIntValue(section, "StretchSeekWindowMS", DEFAULT_STRETCH_SEEKWINDOW), std::numeric_limits<u16>::max()));
-  stretch_overlap_ms = static_cast<u16>(std::min<u32>(
-    si.GetUIntValue(section, "StretchOverlapMS", DEFAULT_STRETCH_OVERLAP), std::numeric_limits<u16>::max()));
-  stretch_use_quickseek = si.GetBoolValue(section, "StretchUseQuickSeek", DEFAULT_STRETCH_USE_QUICKSEEK);
-  stretch_use_aa_filter = si.GetBoolValue(section, "StretchUseAAFilter", DEFAULT_STRETCH_USE_AA_FILTER);
-}
-
-void AudioStreamParameters::Save(SettingsInterface& si, const char* section) const
-{
-  si.SetStringValue(section, "StretchMode", AudioStream::GetStretchModeName(stretch_mode));
-  si.SetUIntValue(section, "BufferMS", buffer_ms);
-  si.SetUIntValue(section, "OutputLatencyMS", output_latency_ms);
-  si.SetBoolValue(section, "OutputLatencyMinimal", output_latency_minimal);
-
-  si.SetUIntValue(section, "StretchSequenceLengthMS", stretch_sequence_length_ms);
-  si.SetUIntValue(section, "StretchSeekWindowMS", stretch_seekwindow_ms);
-  si.SetUIntValue(section, "StretchOverlapMS", stretch_overlap_ms);
-  si.SetBoolValue(section, "StretchUseQuickSeek", stretch_use_quickseek);
-  si.SetBoolValue(section, "StretchUseAAFilter", stretch_use_aa_filter);
-}
-
-void AudioStreamParameters::Clear(SettingsInterface& si, const char* section)
-{
-  si.DeleteValue(section, "StretchMode");
-  si.DeleteValue(section, "ExpansionMode");
-  si.DeleteValue(section, "BufferMS");
-  si.DeleteValue(section, "OutputLatencyMS");
-  si.DeleteValue(section, "OutputLatencyMinimal");
-
-  si.DeleteValue(section, "StretchSequenceLengthMS");
-  si.DeleteValue(section, "StretchSeekWindowMS");
-  si.DeleteValue(section, "StretchOverlapMS");
-  si.DeleteValue(section, "StretchUseQuickSeek");
-  si.DeleteValue(section, "StretchUseAAFilter");
-}
-
-bool AudioStreamParameters::operator!=(const AudioStreamParameters& rhs) const
-{
-  return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
-}
-
-bool AudioStreamParameters::operator==(const AudioStreamParameters& rhs) const
-{
-  return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
-}
-
-AudioStream::AudioStream(u32 sample_rate, const AudioStreamParameters& parameters)
-  : m_sample_rate(sample_rate), m_parameters(parameters)
-{
-}
-
-AudioStream::~AudioStream()
-{
-  StretchDestroy();
-  DestroyBuffer();
-}
-
-std::unique_ptr<AudioStream> AudioStream::CreateNullStream(u32 sample_rate, u32 buffer_ms)
-{
-  // no point stretching with no output
-  AudioStreamParameters params;
-  params.stretch_mode = AudioStretchMode::Off;
-  params.buffer_ms = static_cast<u16>(buffer_ms);
-
-  std::unique_ptr<AudioStream> stream(new AudioStream(sample_rate, params));
-  stream->BaseInitialize();
-  return stream;
-}
-
-std::vector<std::pair<std::string, std::string>> AudioStream::GetDriverNames(AudioBackend backend)
-{
-  std::vector<std::pair<std::string, std::string>> ret;
-  switch (backend)
-  {
-#ifndef __ANDROID__
-    case AudioBackend::Cubeb:
-      ret = GetCubebDriverNames();
-      break;
-#endif
-
-    default:
-      break;
-  }
-
-  return ret;
-}
-
-std::vector<AudioStream::DeviceInfo> AudioStream::GetOutputDevices(AudioBackend backend, const char* driver,
-                                                                   u32 sample_rate)
-{
-  std::vector<AudioStream::DeviceInfo> ret;
-  switch (backend)
-  {
-#ifndef __ANDROID__
-    case AudioBackend::Cubeb:
-      ret = GetCubebOutputDevices(driver, sample_rate);
-      break;
-#endif
-
-    default:
-      break;
-  }
-
-  return ret;
-}
-
-std::unique_ptr<AudioStream> AudioStream::CreateStream(AudioBackend backend, u32 sample_rate,
-                                                       const AudioStreamParameters& parameters, const char* driver_name,
-                                                       const char* device_name, Error* error /* = nullptr */)
-{
-  switch (backend)
-  {
-#ifndef __ANDROID__
-    case AudioBackend::Cubeb:
-      return CreateCubebAudioStream(sample_rate, parameters, driver_name, device_name, error);
-
-    case AudioBackend::SDL:
-      return CreateSDLAudioStream(sample_rate, parameters, error);
-#else
-    case AudioBackend::AAudio:
-      return CreateAAudioAudioStream(sample_rate, parameters, error);
-
-    case AudioBackend::OpenSLES:
-      return CreateOpenSLESAudioStream(sample_rate, parameters, error);
-#endif
-
-    case AudioBackend::Null:
-      return CreateNullStream(sample_rate, parameters.buffer_ms);
-
-    default:
-      Error::SetStringView(error, "Unknown audio backend.");
-      return nullptr;
-  }
-}
-
-u32 AudioStream::GetAlignedBufferSize(u32 size)
-{
-  static_assert(Common::IsPow2(CHUNK_SIZE));
-  return Common::AlignUpPow2(size, CHUNK_SIZE);
-}
-
-u32 AudioStream::GetBufferSizeForMS(u32 sample_rate, u32 ms)
-{
-  return GetAlignedBufferSize((ms * sample_rate) / 1000u);
-}
-
-u32 AudioStream::GetMSForBufferSize(u32 sample_rate, u32 buffer_size)
-{
-  buffer_size = GetAlignedBufferSize(buffer_size);
-  return (buffer_size * 1000u) / sample_rate;
-}
-
 static constexpr const std::array s_backend_names = {
  "Null",
 #ifndef __ANDROID__
@@ -244,669 +62,75 @@ const char* AudioStream::GetBackendDisplayName(AudioBackend backend)
  return Host::TranslateToCString("AudioStream", s_backend_display_names[static_cast<int>(backend)]);
 }

-static constexpr const std::array s_stretch_mode_names = {
-  "None",
-  "Resample",
-  "TimeStretch",
-};
-static constexpr const std::array s_stretch_mode_display_names = {
-  TRANSLATE_DISAMBIG_NOOP("Settings", "Off (Noisy)", "AudioStretchMode"),
-  TRANSLATE_DISAMBIG_NOOP("Settings", "Resampling (Pitch Shift)", "AudioStretchMode"),
-  TRANSLATE_DISAMBIG_NOOP("Settings", "Time Stretch (Tempo Change, Best Sound)", "AudioStretchMode"),
-};
-
-const char* AudioStream::GetStretchModeName(AudioStretchMode mode)
+u32 AudioStream::FramesToMS(u32 sample_rate, u32 frames)
 {
-  return (static_cast<size_t>(mode) < s_stretch_mode_names.size()) ? s_stretch_mode_names[static_cast<size_t>(mode)] :
-                                                                     "";
+  return (frames * 1000) / sample_rate;
 }

-const char* AudioStream::GetStretchModeDisplayName(AudioStretchMode mode)
+std::vector<std::pair<std::string, std::string>> AudioStream::GetDriverNames(AudioBackend backend)
 {
-  return (static_cast<size_t>(mode) < s_stretch_mode_display_names.size()) ?
-           Host::TranslateToCString("Settings", s_stretch_mode_display_names[static_cast<size_t>(mode)],
-                                    "AudioStretchMode") :
-           "";
-}
-
-std::optional<AudioStretchMode> AudioStream::ParseStretchMode(const char* name)
-{
-  for (size_t i = 0; i < static_cast<u8>(AudioStretchMode::Count); i++)
+  std::vector<std::pair<std::string, std::string>> ret;
+  switch (backend)
  {
-    if (std::strcmp(name, s_stretch_mode_names[i]) == 0)
-      return static_cast<AudioStretchMode>(i);
-  }
-
-  return std::nullopt;
-}
-
-u32 AudioStream::GetBufferedFramesRelaxed() const
-{
-  const u32 rpos = m_rpos.load(std::memory_order_relaxed);
-  const u32 wpos = m_wpos.load(std::memory_order_relaxed);
-  return (wpos + m_buffer_size - rpos) % m_buffer_size;
-}
-
-void AudioStream::ReadFrames(SampleType* samples, u32 num_frames)
-{
-  const u32 available_frames = GetBufferedFramesRelaxed();
-  u32 frames_to_read = num_frames;
-  u32 silence_frames = 0;
-
-  if (m_filling)
-  {
-    u32 toFill = m_buffer_size / ((m_parameters.stretch_mode != AudioStretchMode::TimeStretch) ? 32 : 400);
-    toFill = GetAlignedBufferSize(toFill);
-
-    if (available_frames < toFill)
-    {
-      silence_frames = num_frames;
-      frames_to_read = 0;
-    }
-    else
-    {
-      m_filling = false;
-      VERBOSE_LOG("Underrun compensation done ({} frames buffered)", toFill);
-    }
-  }
-
-  if (available_frames < frames_to_read)
-  {
-    silence_frames = frames_to_read - available_frames;
-    frames_to_read = available_frames;
-    m_filling = true;
-
-    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
-      StretchUnderrun();
-  }
-
-  if (frames_to_read > 0)
-  {
-    u32 rpos = m_rpos.load(std::memory_order_acquire);
-
-    u32 end = m_buffer_size - rpos;
-    if (end > frames_to_read)
-      end = frames_to_read;
-
-    // towards the end of the buffer
-    if (end > 0)
-    {
-      std::memcpy(samples, &m_buffer[rpos * NUM_CHANNELS], end * NUM_CHANNELS * sizeof(SampleType));
-      rpos += end;
-      rpos = (rpos == m_buffer_size) ? 0 : rpos;
-    }
-
-    // after wrapping around
-    const u32 start = frames_to_read - end;
-    if (start > 0)
-    {
-      std::memcpy(&samples[end * NUM_CHANNELS], &m_buffer[0], start * NUM_CHANNELS * sizeof(SampleType));
-      rpos = start;
-    }
-
-    m_rpos.store(rpos, std::memory_order_release);
-  }
-
-  if (silence_frames > 0)
-  {
-    if (frames_to_read > 0)
-    {
-      // super basic resampler - spread the input samples evenly across the output samples. will sound like ass and have
-      // aliasing, but better than popping by inserting silence.
-      const u32 increment =
-        static_cast<u32>(65536.0f * (static_cast<float>(frames_to_read) / static_cast<float>(num_frames)));
-
-      SampleType* resample_ptr = static_cast<SampleType*>(alloca(frames_to_read * NUM_CHANNELS * sizeof(SampleType)));
-      std::memcpy(resample_ptr, samples, frames_to_read * NUM_CHANNELS * sizeof(SampleType));
-
-      SampleType* out_ptr = samples;
-      const u32 copy_stride = sizeof(SampleType) * NUM_CHANNELS;
-      u32 resample_subpos = 0;
-      for (u32 i = 0; i < num_frames; i++)
-      {
-        std::memcpy(out_ptr, resample_ptr, copy_stride);
-        out_ptr += NUM_CHANNELS;
-
-        resample_subpos += increment;
-        resample_ptr += (resample_subpos >> 16) * NUM_CHANNELS;
-        resample_subpos %= 65536u;
-      }
-
-      VERBOSE_LOG("Audio buffer underflow, resampled {} frames to {}", frames_to_read, num_frames);
-    }
-    else
-    {
-      // no data, fall back to silence
-      std::memset(samples + (frames_to_read * NUM_CHANNELS), 0, silence_frames * NUM_CHANNELS * sizeof(s16));
-    }
-  }
-
-  if (m_volume != 100)
-  {
-    u32 num_samples = num_frames * NUM_CHANNELS;
-
-    const u32 aligned_samples = Common::AlignDownPow2(num_samples, 8);
-    num_samples -= aligned_samples;
-
-    const float volume_mult = static_cast<float>(m_volume) / 100.0f;
-    const GSVector4 volume_multv = GSVector4(volume_mult);
-    const SampleType* const aligned_samples_end = samples + aligned_samples;
-    for (; samples != aligned_samples_end; samples += 8)
-    {
-      GSVector4i iv = GSVector4i::load<false>(samples); // [0, 1, 2, 3, 4, 5, 6, 7]
-      GSVector4i iv1 = iv.upl16(iv);                    // [0, 0, 1, 1, 2, 2, 3, 3]
-      GSVector4i iv2 = iv.uph16(iv);                    // [4, 4, 5, 5, 6, 6, 7, 7]
-      iv1 = iv1.sra32<16>();                            // [0, 1, 2, 3]
-      iv2 = iv2.sra32<16>();                            // [4, 5, 6, 7]
-      GSVector4 fv1 = GSVector4(iv1);                   // [f0, f1, f2, f3]
-      GSVector4 fv2 = GSVector4(iv2);                   // [f4, f5, f6, f7]
-      fv1 = fv1 * volume_multv;                         // [f0, f1, f2, f3]
-      fv2 = fv2 * volume_multv;                         // [f4, f5, f6, f7]
-      iv1 = GSVector4i(fv1);                            // [0, 1, 2, 3]
-      iv2 = GSVector4i(fv2);                            // [4, 5, 6, 7]
-      iv = iv1.ps32(iv2);                               // [0, 1, 2, 3, 4, 5, 6, 7]
-      GSVector4i::store<false>(samples, iv);
-    }
-
-    while (num_samples > 0)
-    {
-      *samples = static_cast<s16>(std::clamp(static_cast<float>(*samples) * volume_mult, -32768.0f, 32767.0f));
-      samples++;
-      num_samples--;
-    }
-  }
-}
-
-void AudioStream::InternalWriteFrames(s16* data, u32 num_frames)
-{
-  const u32 free = m_buffer_size - GetBufferedFramesRelaxed();
-  if (free <= num_frames)
-  {
-    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
-    {
-      StretchOverrun();
-    }
-    else
-    {
-      DEBUG_LOG("Buffer overrun, chunk dropped");
-      return;
-    }
-  }
-
-  u32 wpos = m_wpos.load(std::memory_order_acquire);
-
-  // wrapping around the end of the buffer?
-  if ((m_buffer_size - wpos) <= num_frames)
-  {
-    // needs to be written in two parts
-    const u32 end = m_buffer_size - wpos;
-    const u32 start = num_frames - end;
-
-    // start is zero when this chunk reaches exactly the end
-    std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, end * NUM_CHANNELS * sizeof(SampleType));
-    if (start > 0)
-      std::memcpy(&m_buffer[0], data + end * NUM_CHANNELS, start * NUM_CHANNELS * sizeof(SampleType));
-
-    wpos = start;
-  }
-  else
-  {
-    // no split
-    std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, num_frames * NUM_CHANNELS * sizeof(SampleType));
-    wpos += num_frames;
-  }
-
-  m_wpos.store(wpos, std::memory_order_release);
-}
-
-void AudioStream::BaseInitialize()
-{
-  AllocateBuffer();
-  StretchAllocate();
-}
-
-void AudioStream::AllocateBuffer()
-{
-  // Stretcher can produce a large amount of samples from few samples when running slow, so allocate a larger buffer.
-  // In most cases it's not going to be used, but better to have a larger buffer and not need it than overrun.
-  const u32 multiplier = (m_parameters.stretch_mode == AudioStretchMode::TimeStretch) ?
-                           16 :
-                           ((m_parameters.stretch_mode == AudioStretchMode::Off) ? 1 : 2);
-  m_buffer_size = GetAlignedBufferSize(((m_parameters.buffer_ms * multiplier) * m_sample_rate) / 1000);
-  m_target_buffer_size = GetAlignedBufferSize((m_sample_rate * m_parameters.buffer_ms) / 1000u);
-
-  m_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, m_buffer_size * NUM_CHANNELS);
-  m_staging_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
-  m_float_buffer = Common::make_unique_aligned_for_overwrite<float[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
-
-  DEV_LOG("Allocated buffer of {} frames for buffer of {} ms [stretch {}, target size {}].", m_buffer_size,
-          m_parameters.buffer_ms, GetStretchModeName(m_parameters.stretch_mode), m_target_buffer_size);
-}
-
-void AudioStream::DestroyBuffer()
-{
-  m_staging_buffer.reset();
-  m_float_buffer.reset();
-  m_buffer.reset();
-  m_buffer_size = 0;
-  m_wpos.store(0, std::memory_order_release);
-  m_rpos.store(0, std::memory_order_release);
-}
-
-void AudioStream::EmptyBuffer()
-{
-  if (IsStretchEnabled())
-  {
-    soundtouch_clear(m_soundtouch);
-    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
-      soundtouch_setTempo(m_soundtouch, m_nominal_rate);
-  }
-
-  m_wpos.store(m_rpos.load(std::memory_order_acquire), std::memory_order_release);
-}
-
-void AudioStream::SetNominalRate(float tempo)
-{
-  m_nominal_rate = tempo;
-  if (m_parameters.stretch_mode == AudioStretchMode::Resample)
-    soundtouch_setRate(m_soundtouch, tempo);
-  else if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch && !m_stretch_inactive)
-    soundtouch_setTempo(m_soundtouch, tempo);
-}
-
-void AudioStream::SetStretchMode(AudioStretchMode mode)
-{
-  if (m_parameters.stretch_mode == mode)
-    return;
-
-  // can't resize the buffers while paused
-  bool paused = m_paused;
-  if (!paused)
-    SetPaused(true);
-
-  DestroyBuffer();
-  StretchDestroy();
-  m_parameters.stretch_mode = mode;
-
-  AllocateBuffer();
-  if (m_parameters.stretch_mode != AudioStretchMode::Off)
-    StretchAllocate();
-
-  if (!paused)
-    SetPaused(false);
-}
-
-void AudioStream::SetPaused(bool paused)
-{
-  m_paused = paused;
-}
-
-void AudioStream::SetOutputVolume(u32 volume)
-{
-  m_volume = volume;
-}
-
-void AudioStream::BeginWrite(SampleType** buffer_ptr, u32* num_frames)
-{
-  // TODO: Write directly to buffer when not using stretching.
-  *buffer_ptr = &m_staging_buffer[m_staging_buffer_pos];
-  *num_frames = CHUNK_SIZE - (m_staging_buffer_pos / NUM_CHANNELS);
-}
-
-static void S16ChunkToFloat(const s16* src, float* dst, u32 num_samples)
-{
-  constexpr GSVector4 S16_TO_FLOAT_V = GSVector4::cxpr(1.0f / 32767.0f);
-
-  const u32 iterations = (num_samples + 7) / 8;
-  for (u32 i = 0; i < iterations; i++)
-  {
-    const GSVector4i sv = GSVector4i::load<true>(src);
-    src += 8;
-
-    GSVector4i iv1 = sv.upl16(sv);  // [0, 0, 1, 1, 2, 2, 3, 3]
-    GSVector4i iv2 = sv.uph16(sv);  // [4, 4, 5, 5, 6, 6, 7, 7]
-    iv1 = iv1.sra32<16>();          // [0, 1, 2, 3]
-    iv2 = iv2.sra32<16>();          // [4, 5, 6, 7]
-    GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
-    GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
-    fv1 = fv1 * S16_TO_FLOAT_V;
-    fv2 = fv2 * S16_TO_FLOAT_V;
-
-    GSVector4::store<true>(dst + 0, fv1);
-    GSVector4::store<true>(dst + 4, fv2);
-    dst += 8;
-  }
-}
-
-static void FloatChunkToS16(s16* dst, const float* src, u32 num_samples)
-{
-  const GSVector4 FLOAT_TO_S16_V = GSVector4::cxpr(32767.0f);
-
-  const u32 iterations = (num_samples + 7) / 8;
-  for (u32 i = 0; i < iterations; i++)
-  {
-    GSVector4 fv1 = GSVector4::load<true>(src + 0);
-    GSVector4 fv2 = GSVector4::load<true>(src + 4);
-    src += 8;
-
-    fv1 = fv1 * FLOAT_TO_S16_V;
-    fv2 = fv2 * FLOAT_TO_S16_V;
-    GSVector4i iv1 = GSVector4i(fv1);
-    GSVector4i iv2 = GSVector4i(fv2);
-
-    const GSVector4i iv = iv1.ps32(iv2);
-    GSVector4i::store<true>(dst, iv);
-    dst += 8;
-  }
-}
-
-void AudioStream::EndWrite(u32 num_frames)
-{
-  // don't bother committing anything when muted
-  if (m_volume == 0)
-    return;
-
-  m_staging_buffer_pos += num_frames * NUM_CHANNELS;
-  DebugAssert(m_staging_buffer_pos <= (CHUNK_SIZE * NUM_CHANNELS));
-  if ((m_staging_buffer_pos / NUM_CHANNELS) < CHUNK_SIZE)
-    return;
-
-  m_staging_buffer_pos = 0;
-
-  if (!IsStretchEnabled())
-  {
-    InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
-    return;
-  }
-
-  S16ChunkToFloat(m_staging_buffer.get(), m_float_buffer.get(), CHUNK_SIZE * NUM_CHANNELS);
-  StretchWriteBlock(m_float_buffer.get());
-}
-
-// Time stretching algorithm based on PCSX2 implementation.
-
-template<class T>
-ALWAYS_INLINE static bool IsInRange(const T& val, const T& min, const T& max)
-{
-  return (min <= val && val <= max);
-}
-
-void AudioStream::StretchAllocate()
-{
-  if (m_parameters.stretch_mode == AudioStretchMode::Off)
-    return;
-
-  m_soundtouch = soundtouch_createInstance();
-  soundtouch_setSampleRate(m_soundtouch, m_sample_rate);
-  soundtouch_setChannels(m_soundtouch, NUM_CHANNELS);
-
-  soundtouch_setSetting(m_soundtouch, SETTING_USE_QUICKSEEK, m_parameters.stretch_use_quickseek);
-  soundtouch_setSetting(m_soundtouch, SETTING_USE_AA_FILTER, m_parameters.stretch_use_aa_filter);
-
-  soundtouch_setSetting(m_soundtouch, SETTING_SEQUENCE_MS, m_parameters.stretch_sequence_length_ms);
-  soundtouch_setSetting(m_soundtouch, SETTING_SEEKWINDOW_MS, m_parameters.stretch_seekwindow_ms);
-  soundtouch_setSetting(m_soundtouch, SETTING_OVERLAP_MS, m_parameters.stretch_overlap_ms);
-
-  if (m_parameters.stretch_mode == AudioStretchMode::Resample)
-    soundtouch_setRate(m_soundtouch, m_nominal_rate);
-  else
-    soundtouch_setTempo(m_soundtouch, m_nominal_rate);
-
-  m_stretch_reset = STRETCH_RESET_THRESHOLD;
-  m_stretch_inactive = false;
-  m_stretch_ok_count = 0;
-  m_dynamic_target_usage = 0.0f;
-  m_average_position = 0;
-  m_average_available = 0;
-
-  m_staging_buffer_pos = 0;
-}
-
-void AudioStream::StretchDestroy()
-{
-  if (m_soundtouch)
-  {
-    soundtouch_destroyInstance(m_soundtouch);
-    m_soundtouch = nullptr;
-  }
-}
-
-void AudioStream::StretchWriteBlock(const float* block)
-{
-  if (IsStretchEnabled())
-  {
-    soundtouch_putSamples(m_soundtouch, block, CHUNK_SIZE);
-
-    u32 tempProgress;
-    while (tempProgress = soundtouch_receiveSamples(m_soundtouch, m_float_buffer.get(), CHUNK_SIZE), tempProgress != 0)
-    {
-      FloatChunkToS16(m_staging_buffer.get(), m_float_buffer.get(), tempProgress * NUM_CHANNELS);
-      InternalWriteFrames(m_staging_buffer.get(), tempProgress);
-    }
-
-    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
-      UpdateStretchTempo();
-  }
-  else
-  {
-    FloatChunkToS16(m_staging_buffer.get(), block, CHUNK_SIZE * NUM_CHANNELS);
-    InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
-  }
-}
-
-float AudioStream::AddAndGetAverageTempo(float val)
-{
-  static constexpr u32 AVERAGING_WINDOW = 50;
-
-  // Build up a circular buffer for tempo averaging to prevent rapid tempo oscillations.
-  if (m_average_available < AVERAGING_BUFFER_SIZE)
-    m_average_available++;
-
-  m_average_fullness[m_average_position] = val;
-  m_average_position = (m_average_position + 1U) % AVERAGING_BUFFER_SIZE;
-
-  // The + AVERAGING_BUFFER_SIZE ensures we don't go negative when using modulo arithmetic.
-  const u32 actual_window = std::min<u32>(m_average_available, AVERAGING_WINDOW);
-  u32 index = (m_average_position - actual_window + AVERAGING_BUFFER_SIZE) % AVERAGING_BUFFER_SIZE;
-  float sum = 0.0f;
-  u32 count = 0;
-
-#ifdef CPU_ARCH_SIMD
-  GSVector4 vsum = GSVector4::zero();
-  const u32 vcount = Common::AlignDownPow2(actual_window, 4);
-  for (; count < vcount; count += 4)
-  {
-    if ((index + 4) > AVERAGING_BUFFER_SIZE)
-    {
-      // wraparound
-      for (u32 i = 0; i < 4; i++)
-      {
-        sum += m_average_fullness[index];
-        index = (index + 1) % AVERAGING_BUFFER_SIZE;
-      }
-    }
-    else
-    {
-      vsum += GSVector4::load<false>(&m_average_fullness[index]);
-      index = (index + 4) % AVERAGING_BUFFER_SIZE;
-    }
-  }
-  sum += vsum.addv();
+#ifndef __ANDROID__
+    case AudioBackend::Cubeb:
+      ret = GetCubebDriverNames();
+      break;
 #endif
-  for (; count < actual_window; count++)
-  {
-    sum += m_average_fullness[index];
-    index = (index + 1) % AVERAGING_BUFFER_SIZE;
-  }
-  sum /= static_cast<float>(actual_window);

-  return (sum != 0.0f) ? sum : 1.0f;
+    default:
+      break;
+  }
+
+  return ret;
 }

-void AudioStream::UpdateStretchTempo()
+std::vector<AudioStream::DeviceInfo> AudioStream::GetOutputDevices(AudioBackend backend, const char* driver,
+                                                                   u32 sample_rate)
 {
-  static constexpr float MIN_TEMPO = 0.05f;
-  static constexpr float MAX_TEMPO = 500.0f;
-
-  // Hysteresis thresholds to prevent stretcher from constantly toggling on/off.
-  // i.e. this is the range we will run in 1:1 mode for.
-  static constexpr float INACTIVE_GOOD_FACTOR = 1.04f;
-  static constexpr float INACTIVE_BAD_FACTOR = 1.2f;
-
-  // Require sustained good performance before deactivating.
-  static constexpr u32 INACTIVE_MIN_OK_COUNT = 50;
-  static constexpr u32 COMPENSATION_DIVIDER = 100;
-
-  // Controls how aggressively we adjust the dynamic target. We want to keep the same target size regardless
-  // of the target speed, but need additional buffering when intentionally running below 100%.
-  float base_target_usage = static_cast<float>(m_target_buffer_size) / std::min(m_nominal_rate, 1.0f);
-
-  // tempo = current_buffer / target_buffer.
-  const u32 ibuffer_usage = GetBufferedFramesRelaxed();
-  float buffer_usage = static_cast<float>(ibuffer_usage);
-  float tempo = buffer_usage / m_dynamic_target_usage;
-
-  // Prevents the system from getting stuck in a bad state due to accumulated errors.
-  if (m_stretch_reset >= STRETCH_RESET_THRESHOLD)
+  std::vector<AudioStream::DeviceInfo> ret;
+  switch (backend)
  {
-    VERBOSE_LOG("___ Stretcher is being reset.");
-    m_stretch_inactive = false;
-    m_stretch_ok_count = 0;
-    m_dynamic_target_usage = base_target_usage;
-    m_average_available = 0;
-    m_average_position = 0;
-    m_stretch_reset = 0;
-    tempo = m_nominal_rate;
-  }
-  else if (m_stretch_reset > 0)
-  {
-    // Back off resets if enough time has passed. That way a very occasional lag/overflow
-    // doesn't cascade into unnecessary tempo adjustment.
-    const u64 now = Timer::GetCurrentValue();
-    if (Timer::ConvertValueToSeconds(now - m_stretch_reset_time) >= 2.0f)
-    {
-      m_stretch_reset--;
-      m_stretch_reset_time = now;
-    }
+#ifndef __ANDROID__
+    case AudioBackend::Cubeb:
+      ret = GetCubebOutputDevices(driver, sample_rate);
+      break;
+#endif
+
+    default:
+      break;
  }

-  // Apply temporal smoothing to prevent rapid tempo changes that cause artifacts.
-  tempo = AddAndGetAverageTempo(tempo);
-
-  // Apply non-linear dampening when close to target to reduce oscillation.
-  if (tempo < 2.0f)
-    tempo = std::sqrt(tempo);
-
-  tempo = std::clamp(tempo, MIN_TEMPO, MAX_TEMPO);
-
-  if (tempo < 1.0f)
-    base_target_usage /= std::sqrt(tempo);
-
-  // Gradually adjust our dynamic target toward what would give us the desired tempo.
-  m_dynamic_target_usage +=
-    static_cast<float>(base_target_usage / tempo - m_dynamic_target_usage) / static_cast<float>(COMPENSATION_DIVIDER);
-
-  // Snap back to baseline if we're very close.
-  if (IsInRange(tempo, 0.9f, 1.1f) &&
-      IsInRange(m_dynamic_target_usage, base_target_usage * 0.9f, base_target_usage * 1.1f))
-  {
-    m_dynamic_target_usage = base_target_usage;
-  }
-
-  // Are we changing the active state?
-  if (!m_stretch_inactive)
-  {
-    if (IsInRange(tempo, 1.0f / INACTIVE_GOOD_FACTOR, INACTIVE_GOOD_FACTOR))
-      m_stretch_ok_count++;
-    else
-      m_stretch_ok_count = 0;
-
-    if (m_stretch_ok_count >= INACTIVE_MIN_OK_COUNT)
-    {
-      VERBOSE_LOG("=== Stretcher is now inactive.");
-      m_stretch_inactive = true;
-    }
-  }
-  else if (!IsInRange(tempo, 1.0f / INACTIVE_BAD_FACTOR, INACTIVE_BAD_FACTOR))
-  {
-    VERBOSE_LOG("~~~ Stretcher is now active @ tempo {}.", tempo);
-    m_stretch_inactive = false;
-    m_stretch_ok_count = 0;
-  }
-
-  // If we're inactive, we don't want to change the tempo.
-  if (m_stretch_inactive)
-    tempo = m_nominal_rate;
-
-  if constexpr (LOG_TIMESTRETCH_STATS)
-  {
-    static float min_tempo = 0.0f;
-    static float max_tempo = 0.0f;
-    static float acc_tempo = 0.0f;
-    static u32 acc_cnt = 0;
-    acc_tempo += tempo;
-    acc_cnt++;
-    min_tempo = std::min(min_tempo, tempo);
-    max_tempo = std::max(max_tempo, tempo);
-
-    static int iterations = 0;
-    static u64 last_log_time = 0;
-
-    const u64 now = Timer::GetCurrentValue();
-
-    if (Timer::ConvertValueToSeconds(now - last_log_time) > 1.0f)
-    {
-      const float avg_tempo = (acc_cnt > 0) ? (acc_tempo / static_cast<float>(acc_cnt)) : 0.0f;
-
-      VERBOSE_LOG("{:3d} ms ({:3.0f}%), tempo: avg={:.2f} min={:.2f} max={:.2f}, comp: {:2.3f}, iters: {}, reset:{}",
-                  (ibuffer_usage * 1000u) / m_sample_rate, 100.0f * buffer_usage / base_target_usage, avg_tempo,
-                  min_tempo, max_tempo, m_dynamic_target_usage / base_target_usage, iterations, m_stretch_reset);
-
-      last_log_time = now;
-      iterations = 0;
-
-      min_tempo = std::numeric_limits<float>::max();
-      max_tempo = std::numeric_limits<float>::min();
-      acc_tempo = 0.0f;
-      acc_cnt = 0;
-    }
-
-    iterations++;
-  }
-
-  soundtouch_setTempo(m_soundtouch, tempo);
+  return ret;
 }

-void AudioStream::StretchUnderrun()
+std::unique_ptr<AudioStream> AudioStream::CreateStream(AudioBackend backend, u32 sample_rate, u32 channels,
+                                                       u32 output_latency_frames, bool output_latency_minimal,
+                                                       const char* driver_name, const char* device_name,
+                                                       AudioStreamSource* source, bool auto_start, Error* error)
 {
-  // Didn't produce enough frames in time.
-  m_stretch_reset++;
-  if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
-    m_stretch_reset_time = Timer::GetCurrentValue();
-}
-
-void AudioStream::StretchOverrun()
-{
-  // Produced more frames than can fit in the buffer.
-  m_stretch_reset++;
-  if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
-    m_stretch_reset_time = Timer::GetCurrentValue();
-
-  // Drop two packets to give the time stretcher a bit more time to slow things down.
-  // This prevents a cascading overrun situation where each overrun makes the next one more likely.
-  const u32 discard = CHUNK_SIZE * 2;
-  m_rpos.store((m_rpos.load(std::memory_order_acquire) + discard) % m_buffer_size, std::memory_order_release);
-}
-
-void AudioStream::EmptyStretchBuffers()
-{
-  if (!IsStretchEnabled())
-    return;
-
-  m_stretch_reset = STRETCH_RESET_THRESHOLD;
-
-  // Wipe soundtouch samples. If we don't do this and we're switching from a high tempo to low,
-  // we'll still have quite a large buffer of samples that will be played back at a low tempo,
-  // resulting in a long delay before the audio starts playing at the new tempo.
-  soundtouch_clear(m_soundtouch);
+  switch (backend)
+  {
+#ifndef __ANDROID__
+    case AudioBackend::Cubeb:
+      return CreateCubebAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, driver_name,
+                                    device_name, source, auto_start, error);
+
+    case AudioBackend::SDL:
+      return CreateSDLAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, source,
+                                  auto_start, error);
+#else
+    case AudioBackend::AAudio:
+      return CreateAAudioAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, source,
+                                     auto_start, error);
+
+    case AudioBackend::OpenSLES:
+      return CreateOpenSLESAudioStream(sample_rate, channels, output_latency_frames, output_latency_minimal, source,
+                                       auto_start, error);
+#endif
+
+    default:
+      Error::SetStringView(error, "Unknown audio backend.");
+      return nullptr;
+  }
 }
--- a/src/util/audio_stream.h
+++ b/src/util/audio_stream.h
@@ -1,24 +1,13 @@
-// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
+// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
 // SPDX-License-Identifier: CC-BY-NC-ND-4.0

 #pragma once

-#include "common/align.h"
 #include "common/types.h"

-#include <array>
-#include <atomic>
 #include <memory>
-#include <optional>
-#include <string>
-#include <vector>

 class Error;
-class SettingsInterface;
-
-namespace soundtouch {
-class SoundTouch;
-}

 enum class AudioBackend : u8
 {
@@ -33,59 +22,18 @@ enum class AudioBackend : u8
  Count
 };

-enum class AudioStretchMode : u8
+class AudioStreamSource
 {
-  Off,
-  Resample,
-  TimeStretch,
-  Count
-};
+public:
+  using SampleType = s16;

-struct AudioStreamParameters
-{
-  AudioStretchMode stretch_mode = DEFAULT_STRETCH_MODE;
-  bool output_latency_minimal = DEFAULT_OUTPUT_LATENCY_MINIMAL;
-  u16 output_latency_ms = DEFAULT_OUTPUT_LATENCY_MS;
-  u16 buffer_ms = DEFAULT_BUFFER_MS;
-
-  u16 stretch_sequence_length_ms = DEFAULT_STRETCH_SEQUENCE_LENGTH;
-  u16 stretch_seekwindow_ms = DEFAULT_STRETCH_SEEKWINDOW;
-  u16 stretch_overlap_ms = DEFAULT_STRETCH_OVERLAP;
-  bool stretch_use_quickseek = DEFAULT_STRETCH_USE_QUICKSEEK;
-  bool stretch_use_aa_filter = DEFAULT_STRETCH_USE_AA_FILTER;
-
-  static constexpr AudioStretchMode DEFAULT_STRETCH_MODE = AudioStretchMode::TimeStretch;
-#ifndef __ANDROID__
-  static constexpr u16 DEFAULT_BUFFER_MS = 50;
-  static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
-#else
-  static constexpr u16 DEFAULT_BUFFER_MS = 100;
-  static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
-#endif
-  static constexpr bool DEFAULT_OUTPUT_LATENCY_MINIMAL = false;
-
-  static constexpr u16 DEFAULT_STRETCH_SEQUENCE_LENGTH = 30;
-  static constexpr u16 DEFAULT_STRETCH_SEEKWINDOW = 20;
-  static constexpr u16 DEFAULT_STRETCH_OVERLAP = 10;
-
-  static constexpr bool DEFAULT_STRETCH_USE_QUICKSEEK = false;
-  static constexpr bool DEFAULT_STRETCH_USE_AA_FILTER = false;
-
-  void Load(const SettingsInterface& si, const char* section);
-  void Save(SettingsInterface& si, const char* section) const;
-  void Clear(SettingsInterface& si, const char* section);
-
-  bool operator==(const AudioStreamParameters& rhs) const;
-  bool operator!=(const AudioStreamParameters& rhs) const;
+  virtual void ReadFrames(SampleType* samples, u32 num_frames) = 0;
 };

 class AudioStream
 {
 public:
-  using SampleType = s16;
-
-  static constexpr u32 NUM_CHANNELS = 2;
-  static constexpr u32 CHUNK_SIZE = 64;
+  using SampleType = AudioStreamSource::SampleType;

 #ifndef __ANDROID__
  static constexpr AudioBackend DEFAULT_BACKEND = AudioBackend::Cubeb;
@@ -103,129 +51,52 @@ public:
    ~DeviceInfo();
  };

-public:
  virtual ~AudioStream();

-  static u32 GetAlignedBufferSize(u32 size);
-  static u32 GetBufferSizeForMS(u32 sample_rate, u32 ms);
-  static u32 GetMSForBufferSize(u32 sample_rate, u32 buffer_size);
-
  static std::optional<AudioBackend> ParseBackendName(const char* str);
  static const char* GetBackendName(AudioBackend backend);
  static const char* GetBackendDisplayName(AudioBackend backend);

-  static const char* GetStretchModeName(AudioStretchMode mode);
-  static const char* GetStretchModeDisplayName(AudioStretchMode mode);
-  static std::optional<AudioStretchMode> ParseStretchMode(const char* name);
+  static u32 FramesToMS(u32 sample_rate, u32 frames);

-  ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
-  ALWAYS_INLINE u32 GetBufferSize() const { return m_buffer_size; }
-  ALWAYS_INLINE u32 GetTargetBufferSize() const { return m_target_buffer_size; }
-  ALWAYS_INLINE u32 GetOutputVolume() const { return m_volume; }
-  ALWAYS_INLINE float GetNominalTempo() const { return m_nominal_rate; }
-  ALWAYS_INLINE bool IsPaused() const { return m_paused; }
+  /// Returns a list of available driver names for the specified backend.
+  static std::vector<std::pair<std::string, std::string>> GetDriverNames(AudioBackend backend);

-  u32 GetBufferedFramesRelaxed() const;
+  /// Returns a list of available output devices for the specified backend and driver.
+  static std::vector<DeviceInfo> GetOutputDevices(AudioBackend backend, const char* driver, u32 sample_rate);
+
+  /// Creates an audio stream with the specified parameters.
+  static std::unique_ptr<AudioStream> CreateStream(AudioBackend backend, u32 sample_rate, u32 channels,
+                                                   u32 output_latency_frames, bool output_latency_minimal,
+                                                   const char* driver_name, const char* device_name,
+                                                   AudioStreamSource* source, bool auto_start, Error* error);
+
+  /// Starts the stream, allowing it to request data.
+  virtual bool Start(Error* error) = 0;

  /// Temporarily pauses the stream, preventing it from requesting data.
-  virtual void SetPaused(bool paused);
-
-  void SetOutputVolume(u32 volume);
-
-  void BeginWrite(SampleType** buffer_ptr, u32* num_frames);
-  void EndWrite(u32 num_frames);
-
-  void EmptyBuffer();
-
-  /// Nominal rate is used for both resampling and timestretching, input samples are assumed to be this amount faster
-  /// than the sample rate.
-  void SetNominalRate(float tempo);
-
-  void SetStretchMode(AudioStretchMode mode);
-
-  /// Wipes out the time stretching buffer, call when reducing target speed.
-  void EmptyStretchBuffers();
-
-  static std::vector<std::pair<std::string, std::string>> GetDriverNames(AudioBackend backend);
-  static std::vector<DeviceInfo> GetOutputDevices(AudioBackend backend, const char* driver, u32 sample_rate);
-  static std::unique_ptr<AudioStream> CreateStream(AudioBackend backend, u32 sample_rate,
-                                                   const AudioStreamParameters& parameters, const char* driver_name,
-                                                   const char* device_name, Error* error = nullptr);
-  static std::unique_ptr<AudioStream> CreateNullStream(u32 sample_rate, u32 buffer_ms);
+  virtual bool Stop(Error* error) = 0;

 protected:
-  AudioStream(u32 sample_rate, const AudioStreamParameters& parameters);
-  void BaseInitialize();
-
-  void ReadFrames(SampleType* samples, u32 num_frames);
-
-  u32 m_sample_rate = 0;
-  u32 m_volume = 100;
-  AudioStreamParameters m_parameters;
-  bool m_stretch_inactive = false;
-  bool m_filling = false;
-  bool m_paused = false;
+  AudioStream();

 private:
-  static constexpr u32 AVERAGING_BUFFER_SIZE = 256;
-  static constexpr u32 STRETCH_RESET_THRESHOLD = 5;
-
 #ifndef __ANDROID__
  static std::vector<std::pair<std::string, std::string>> GetCubebDriverNames();
  static std::vector<DeviceInfo> GetCubebOutputDevices(const char* driver, u32 sample_rate);
-  static std::unique_ptr<AudioStream> CreateCubebAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
-                                                             const char* driver_name, const char* device_name,
-                                                             Error* error);
-  static std::unique_ptr<AudioStream> CreateSDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
-                                                           Error* error);
+  static std::unique_ptr<AudioStream> CreateCubebAudioStream(u32 sample_rate, u32 channels, u32 output_latency_frames,
+                                                             bool output_latency_minimal, const char* driver_name,
+                                                             const char* device_name, AudioStreamSource* source,
+                                                             bool auto_start, Error* error);
+  static std::unique_ptr<AudioStream> CreateSDLAudioStream(u32 sample_rate, u32 channels, u32 output_latency_frames,
+                                                           bool output_latency_minimal, AudioStreamSource* source,
+                                                           bool auto_start, Error* error);
 #else
-  static std::unique_ptr<AudioStream> CreateAAudioAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
-                                                              Error* error);
-  static std::unique_ptr<AudioStream> CreateOpenSLESAudioStream(u32 sample_rate,
-                                                                const AudioStreamParameters& parameters, Error* error);
+  static std::unique_ptr<AudioStream> CreateAAudioAudioStream(u32 sample_rate, u32 output_latency_frames,
+                                                              bool output_latency_minimal, AudioStreamSource* source,
+                                                              bool auto_start, Error* error);
+  static std::unique_ptr<AudioStream> CreateOpenSLESAudioStream(u32 sample_rate, u32 output_latency_frames,
+                                                                bool output_latency_minimal, AudioStreamSource* source,
+                                                                bool auto_start, Error* error);
 #endif
-
-  ALWAYS_INLINE bool IsStretchEnabled() const { return m_parameters.stretch_mode != AudioStretchMode::Off; }
-
-  void AllocateBuffer();
-  void DestroyBuffer();
-
-  void InternalWriteFrames(SampleType* samples, u32 num_frames);
-
-  void StretchAllocate();
-  void StretchDestroy();
-  void StretchWriteBlock(const float* block);
-  void StretchUnderrun();
-  void StretchOverrun();
-
-  float AddAndGetAverageTempo(float val);
-  void UpdateStretchTempo();
-
-  u32 m_buffer_size = 0;
-  Common::unique_aligned_ptr<s16[]> m_buffer;
-
-  std::atomic<u32> m_rpos{0};
-  std::atomic<u32> m_wpos{0};
-
-  void* m_soundtouch = nullptr;
-
-  u32 m_target_buffer_size = 0;
-  u32 m_stretch_reset = STRETCH_RESET_THRESHOLD;
-  u64 m_stretch_reset_time = 0;
-
-  u32 m_stretch_ok_count = 0;
-  float m_nominal_rate = 1.0f;
-  float m_dynamic_target_usage = 0.0f;
-
-  u32 m_average_position = 0;
-  u32 m_average_available = 0;
-  u32 m_staging_buffer_pos = 0;
-
-  std::array<float, AVERAGING_BUFFER_SIZE> m_average_fullness = {};
-
-  // temporary staging buffer, used for timestretching
-  Common::unique_aligned_ptr<s16[]> m_staging_buffer;
-
-  // float buffer, soundtouch only accepts float samples as input
-  Common::unique_aligned_ptr<float[]> m_float_buffer;
 };
--- a/src/util/core_audio_stream.cpp
+++ b/src/util/core_audio_stream.cpp
@@ -0,0 +1,832 @@
+// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
+// SPDX-License-Identifier: CC-BY-NC-ND-4.0
+
+#include "core_audio_stream.h"
+#include "translation.h"
+
+#include "common/align.h"
+#include "common/assert.h"
+#include "common/error.h"
+#include "common/gsvector.h"
+#include "common/log.h"
+#include "common/settings_interface.h"
+#include "common/timer.h"
+
+#include "soundtouch/SoundTouch.h"
+#include "soundtouch/SoundTouchDLL.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <limits>
+
+LOG_CHANNEL(AudioStream);
+
+static constexpr bool LOG_TIMESTRETCH_STATS = false;
+
+void AudioStreamParameters::Load(const SettingsInterface& si, const char* section)
+{
+  stretch_mode =
+    CoreAudioStream::ParseStretchMode(
+      si.GetStringValue(section, "StretchMode", CoreAudioStream::GetStretchModeName(DEFAULT_STRETCH_MODE)).c_str())
+      .value_or(DEFAULT_STRETCH_MODE);
+  output_latency_ms = static_cast<u16>(std::min<u32>(
+    si.GetUIntValue(section, "OutputLatencyMS", DEFAULT_OUTPUT_LATENCY_MS), std::numeric_limits<u16>::max()));
+  output_latency_minimal = si.GetBoolValue(section, "OutputLatencyMinimal", DEFAULT_OUTPUT_LATENCY_MINIMAL);
+  buffer_ms = static_cast<u16>(
+    std::min<u32>(si.GetUIntValue(section, "BufferMS", DEFAULT_BUFFER_MS), std::numeric_limits<u16>::max()));
+
+  stretch_sequence_length_ms =
+    static_cast<u16>(std::min<u32>(si.GetUIntValue(section, "StretchSequenceLengthMS", DEFAULT_STRETCH_SEQUENCE_LENGTH),
+                                   std::numeric_limits<u16>::max()));
+  stretch_seekwindow_ms = static_cast<u16>(std::min<u32>(
+    si.GetUIntValue(section, "StretchSeekWindowMS", DEFAULT_STRETCH_SEEKWINDOW), std::numeric_limits<u16>::max()));
+  stretch_overlap_ms = static_cast<u16>(std::min<u32>(
+    si.GetUIntValue(section, "StretchOverlapMS", DEFAULT_STRETCH_OVERLAP), std::numeric_limits<u16>::max()));
+  stretch_use_quickseek = si.GetBoolValue(section, "StretchUseQuickSeek", DEFAULT_STRETCH_USE_QUICKSEEK);
+  stretch_use_aa_filter = si.GetBoolValue(section, "StretchUseAAFilter", DEFAULT_STRETCH_USE_AA_FILTER);
+}
+
+void AudioStreamParameters::Save(SettingsInterface& si, const char* section) const
+{
+  si.SetStringValue(section, "StretchMode", CoreAudioStream::GetStretchModeName(stretch_mode));
+  si.SetUIntValue(section, "BufferMS", buffer_ms);
+  si.SetUIntValue(section, "OutputLatencyMS", output_latency_ms);
+  si.SetBoolValue(section, "OutputLatencyMinimal", output_latency_minimal);
+
+  si.SetUIntValue(section, "StretchSequenceLengthMS", stretch_sequence_length_ms);
+  si.SetUIntValue(section, "StretchSeekWindowMS", stretch_seekwindow_ms);
+  si.SetUIntValue(section, "StretchOverlapMS", stretch_overlap_ms);
+  si.SetBoolValue(section, "StretchUseQuickSeek", stretch_use_quickseek);
+  si.SetBoolValue(section, "StretchUseAAFilter", stretch_use_aa_filter);
+}
+
+void AudioStreamParameters::Clear(SettingsInterface& si, const char* section)
+{
+  si.DeleteValue(section, "StretchMode");
+  si.DeleteValue(section, "ExpansionMode");
+  si.DeleteValue(section, "BufferMS");
+  si.DeleteValue(section, "OutputLatencyMS");
+  si.DeleteValue(section, "OutputLatencyMinimal");
+
+  si.DeleteValue(section, "StretchSequenceLengthMS");
+  si.DeleteValue(section, "StretchSeekWindowMS");
+  si.DeleteValue(section, "StretchOverlapMS");
+  si.DeleteValue(section, "StretchUseQuickSeek");
+  si.DeleteValue(section, "StretchUseAAFilter");
+}
+
+bool AudioStreamParameters::operator!=(const AudioStreamParameters& rhs) const
+{
+  return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
+}
+
+bool AudioStreamParameters::operator==(const AudioStreamParameters& rhs) const
+{
+  return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
+}
+
+CoreAudioStream::CoreAudioStream() = default;
+
+CoreAudioStream::~CoreAudioStream()
+{
+  Destroy();
+}
+
+bool CoreAudioStream::Initialize(AudioBackend backend, u32 sample_rate, const AudioStreamParameters& params,
+                                 const char* driver_name, const char* device_name, Error* error /* = nullptr */)
+{
+  Destroy();
+
+  m_sample_rate = sample_rate;
+  m_volume = 100;
+  m_parameters = params;
+  m_filling = false;
+  m_paused = false;
+
+  AllocateBuffer();
+  StretchAllocate();
+
+  const u32 output_latency_frames =
+    GetBufferSizeForMS(sample_rate, (params.output_latency_ms != 0) ? params.output_latency_ms : params.buffer_ms);
+  if (backend != AudioBackend::Null)
+  {
+    if (!(m_stream =
+            AudioStream::CreateStream(backend, sample_rate, NUM_CHANNELS, output_latency_frames,
+                                      params.output_latency_minimal, driver_name, device_name, this, true, error)))
+    {
+      Destroy();
+      return false;
+    }
+  }
+  else
+  {
+    // no point stretching with no output
+    m_parameters = AudioStreamParameters();
+    m_parameters.stretch_mode = AudioStretchMode::Off;
+    m_parameters.buffer_ms = params.buffer_ms;
+
+    // always paused to avoid output
+    m_paused = true;
+  }
+
+  return true;
+}
+
+void CoreAudioStream::Destroy()
+{
+  StretchDestroy();
+  DestroyBuffer();
+  m_stream.reset();
+  m_sample_rate = 0;
+  m_parameters = AudioStreamParameters();
+  m_volume = 0;
+  m_filling = false;
+  m_paused = true;
+}
+
+u32 CoreAudioStream::GetAlignedBufferSize(u32 size)
+{
+  static_assert(Common::IsPow2(CHUNK_SIZE));
+  return Common::AlignUpPow2(size, CHUNK_SIZE);
+}
+
+u32 CoreAudioStream::GetBufferSizeForMS(u32 sample_rate, u32 ms)
+{
+  return GetAlignedBufferSize((ms * sample_rate) / 1000u);
+}
+
+u32 CoreAudioStream::GetMSForBufferSize(u32 sample_rate, u32 buffer_size)
+{
+  buffer_size = GetAlignedBufferSize(buffer_size);
+  return (buffer_size * 1000u) / sample_rate;
+}
+
+static constexpr const std::array s_stretch_mode_names = {
+  "None",
+  "Resample",
+  "TimeStretch",
+};
+static constexpr const std::array s_stretch_mode_display_names = {
+  TRANSLATE_DISAMBIG_NOOP("Settings", "Off (Noisy)", "AudioStretchMode"),
+  TRANSLATE_DISAMBIG_NOOP("Settings", "Resampling (Pitch Shift)", "AudioStretchMode"),
+  TRANSLATE_DISAMBIG_NOOP("Settings", "Time Stretch (Tempo Change, Best Sound)", "AudioStretchMode"),
+};
+
+const char* CoreAudioStream::GetStretchModeName(AudioStretchMode mode)
+{
+  return (static_cast<size_t>(mode) < s_stretch_mode_names.size()) ? s_stretch_mode_names[static_cast<size_t>(mode)] :
+                                                                     "";
+}
+
+const char* CoreAudioStream::GetStretchModeDisplayName(AudioStretchMode mode)
+{
+  return (static_cast<size_t>(mode) < s_stretch_mode_display_names.size()) ?
+           Host::TranslateToCString("Settings", s_stretch_mode_display_names[static_cast<size_t>(mode)],
+                                    "AudioStretchMode") :
+           "";
+}
+
+std::optional<AudioStretchMode> CoreAudioStream::ParseStretchMode(const char* name)
+{
+  for (size_t i = 0; i < static_cast<u8>(AudioStretchMode::Count); i++)
+  {
+    if (std::strcmp(name, s_stretch_mode_names[i]) == 0)
+      return static_cast<AudioStretchMode>(i);
+  }
+
+  return std::nullopt;
+}
+
+u32 CoreAudioStream::GetBufferedFramesRelaxed() const
+{
+  const u32 rpos = m_rpos.load(std::memory_order_relaxed);
+  const u32 wpos = m_wpos.load(std::memory_order_relaxed);
+  return (wpos + m_buffer_size - rpos) % m_buffer_size;
+}
+
+void CoreAudioStream::ReadFrames(SampleType* samples, u32 num_frames)
+{
+  const u32 available_frames = GetBufferedFramesRelaxed();
+  u32 frames_to_read = num_frames;
+  u32 silence_frames = 0;
+
+  if (m_filling)
+  {
+    u32 toFill = m_buffer_size / ((m_parameters.stretch_mode != AudioStretchMode::TimeStretch) ? 32 : 400);
+    toFill = GetAlignedBufferSize(toFill);
+
+    if (available_frames < toFill)
+    {
+      silence_frames = num_frames;
+      frames_to_read = 0;
+    }
+    else
+    {
+      m_filling = false;
+      VERBOSE_LOG("Underrun compensation done ({} frames buffered)", toFill);
+    }
+  }
+
+  if (available_frames < frames_to_read)
+  {
+    silence_frames = frames_to_read - available_frames;
+    frames_to_read = available_frames;
+    m_filling = true;
+
+    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
+      StretchUnderrun();
+  }
+
+  if (frames_to_read > 0)
+  {
+    u32 rpos = m_rpos.load(std::memory_order_acquire);
+
+    u32 end = m_buffer_size - rpos;
+    if (end > frames_to_read)
+      end = frames_to_read;
+
+    // towards the end of the buffer
+    if (end > 0)
+    {
+      std::memcpy(samples, &m_buffer[rpos * NUM_CHANNELS], end * NUM_CHANNELS * sizeof(SampleType));
+      rpos += end;
+      rpos = (rpos == m_buffer_size) ? 0 : rpos;
+    }
+
+    // after wrapping around
+    const u32 start = frames_to_read - end;
+    if (start > 0)
+    {
+      std::memcpy(&samples[end * NUM_CHANNELS], &m_buffer[0], start * NUM_CHANNELS * sizeof(SampleType));
+      rpos = start;
+    }
+
+    m_rpos.store(rpos, std::memory_order_release);
+  }
+
+  if (silence_frames > 0)
+  {
+    if (frames_to_read > 0)
+    {
+      // super basic resampler - spread the input samples evenly across the output samples. will sound like ass and have
+      // aliasing, but better than popping by inserting silence.
+      const u32 increment =
+        static_cast<u32>(65536.0f * (static_cast<float>(frames_to_read) / static_cast<float>(num_frames)));
+
+      SampleType* resample_ptr = static_cast<SampleType*>(alloca(frames_to_read * NUM_CHANNELS * sizeof(SampleType)));
+      std::memcpy(resample_ptr, samples, frames_to_read * NUM_CHANNELS * sizeof(SampleType));
+
+      SampleType* out_ptr = samples;
+      const u32 copy_stride = sizeof(SampleType) * NUM_CHANNELS;
+      u32 resample_subpos = 0;
+      for (u32 i = 0; i < num_frames; i++)
+      {
+        std::memcpy(out_ptr, resample_ptr, copy_stride);
+        out_ptr += NUM_CHANNELS;
+
+        resample_subpos += increment;
+        resample_ptr += (resample_subpos >> 16) * NUM_CHANNELS;
+        resample_subpos %= 65536u;
+      }
+
+      VERBOSE_LOG("Audio buffer underflow, resampled {} frames to {}", frames_to_read, num_frames);
+    }
+    else
+    {
+      // no data, fall back to silence
+      std::memset(samples + (frames_to_read * NUM_CHANNELS), 0, silence_frames * NUM_CHANNELS * sizeof(s16));
+    }
+  }
+
+  if (m_volume != 100)
+  {
+    u32 num_samples = num_frames * NUM_CHANNELS;
+
+    const u32 aligned_samples = Common::AlignDownPow2(num_samples, 8);
+    num_samples -= aligned_samples;
+
+    const float volume_mult = static_cast<float>(m_volume) / 100.0f;
+    const GSVector4 volume_multv = GSVector4(volume_mult);
+    const SampleType* const aligned_samples_end = samples + aligned_samples;
+    for (; samples != aligned_samples_end; samples += 8)
+    {
+      GSVector4i iv = GSVector4i::load<false>(samples); // [0, 1, 2, 3, 4, 5, 6, 7]
+      GSVector4i iv1 = iv.upl16(iv);                    // [0, 0, 1, 1, 2, 2, 3, 3]
+      GSVector4i iv2 = iv.uph16(iv);                    // [4, 4, 5, 5, 6, 6, 7, 7]
+      iv1 = iv1.sra32<16>();                            // [0, 1, 2, 3]
+      iv2 = iv2.sra32<16>();                            // [4, 5, 6, 7]
+      GSVector4 fv1 = GSVector4(iv1);                   // [f0, f1, f2, f3]
+      GSVector4 fv2 = GSVector4(iv2);                   // [f4, f5, f6, f7]
+      fv1 = fv1 * volume_multv;                         // [f0, f1, f2, f3]
+      fv2 = fv2 * volume_multv;                         // [f4, f5, f6, f7]
+      iv1 = GSVector4i(fv1);                            // [0, 1, 2, 3]
+      iv2 = GSVector4i(fv2);                            // [4, 5, 6, 7]
+      iv = iv1.ps32(iv2);                               // [0, 1, 2, 3, 4, 5, 6, 7]
+      GSVector4i::store<false>(samples, iv);
+    }
+
+    while (num_samples > 0)
+    {
+      *samples = static_cast<s16>(std::clamp(static_cast<float>(*samples) * volume_mult, -32768.0f, 32767.0f));
+      samples++;
+      num_samples--;
+    }
+  }
+}
+
+void CoreAudioStream::InternalWriteFrames(s16* data, u32 num_frames)
+{
+  const u32 free = m_buffer_size - GetBufferedFramesRelaxed();
+  if (free <= num_frames)
+  {
+    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
+    {
+      StretchOverrun();
+    }
+    else
+    {
+      DEBUG_LOG("Buffer overrun, chunk dropped");
+      return;
+    }
+  }
+
+  u32 wpos = m_wpos.load(std::memory_order_acquire);
+
+  // wrapping around the end of the buffer?
+  if ((m_buffer_size - wpos) <= num_frames)
+  {
+    // needs to be written in two parts
+    const u32 end = m_buffer_size - wpos;
+    const u32 start = num_frames - end;
+
+    // start is zero when this chunk reaches exactly the end
+    std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, end * NUM_CHANNELS * sizeof(SampleType));
+    if (start > 0)
+      std::memcpy(&m_buffer[0], data + end * NUM_CHANNELS, start * NUM_CHANNELS * sizeof(SampleType));
+
+    wpos = start;
+  }
+  else
+  {
+    // no split
+    std::memcpy(&m_buffer[wpos * NUM_CHANNELS], data, num_frames * NUM_CHANNELS * sizeof(SampleType));
+    wpos += num_frames;
+  }
+
+  m_wpos.store(wpos, std::memory_order_release);
+}
+
+void CoreAudioStream::AllocateBuffer()
+{
+  // Stretcher can produce a large amount of samples from few samples when running slow, so allocate a larger buffer.
+  // In most cases it's not going to be used, but better to have a larger buffer and not need it than overrun.
+  const u32 multiplier = (m_parameters.stretch_mode == AudioStretchMode::TimeStretch) ?
+                           16 :
+                           ((m_parameters.stretch_mode == AudioStretchMode::Off) ? 1 : 2);
+  m_buffer_size = GetAlignedBufferSize(((m_parameters.buffer_ms * multiplier) * m_sample_rate) / 1000);
+  m_target_buffer_size = GetAlignedBufferSize((m_sample_rate * m_parameters.buffer_ms) / 1000u);
+
+  m_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, m_buffer_size * NUM_CHANNELS);
+  m_staging_buffer = Common::make_unique_aligned_for_overwrite<s16[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
+  m_float_buffer = Common::make_unique_aligned_for_overwrite<float[]>(VECTOR_ALIGNMENT, CHUNK_SIZE * NUM_CHANNELS);
+
+  DEV_LOG("Allocated buffer of {} frames for buffer of {} ms [stretch {}, target size {}].", m_buffer_size,
+          m_parameters.buffer_ms, GetStretchModeName(m_parameters.stretch_mode), m_target_buffer_size);
+}
+
+void CoreAudioStream::DestroyBuffer()
+{
+  m_staging_buffer.reset();
+  m_float_buffer.reset();
+  m_buffer.reset();
+  m_buffer_size = 0;
+  m_wpos.store(0, std::memory_order_release);
+  m_rpos.store(0, std::memory_order_release);
+}
+
+void CoreAudioStream::EmptyBuffer()
+{
+  if (IsStretchEnabled())
+  {
+    soundtouch_clear(m_soundtouch);
+    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
+      soundtouch_setTempo(m_soundtouch, m_nominal_rate);
+  }
+
+  m_wpos.store(m_rpos.load(std::memory_order_acquire), std::memory_order_release);
+}
+
+void CoreAudioStream::SetNominalRate(float tempo)
+{
+  m_nominal_rate = tempo;
+  if (m_parameters.stretch_mode == AudioStretchMode::Resample)
+    soundtouch_setRate(m_soundtouch, tempo);
+  else if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch && !m_stretch_inactive)
+    soundtouch_setTempo(m_soundtouch, tempo);
+}
+
+void CoreAudioStream::SetStretchMode(AudioStretchMode mode)
+{
+  if (m_parameters.stretch_mode == mode)
+    return;
+
+  // can't resize the buffers while paused
+  bool paused = m_paused;
+  if (!paused)
+    SetPaused(true);
+
+  DestroyBuffer();
+  StretchDestroy();
+  m_parameters.stretch_mode = mode;
+
+  AllocateBuffer();
+  if (m_parameters.stretch_mode != AudioStretchMode::Off)
+    StretchAllocate();
+
+  if (!paused)
+    SetPaused(false);
+}
+
+void CoreAudioStream::SetPaused(bool paused)
+{
+  // force state to always be paused if we're a null output
+  if (m_paused == paused || !m_stream)
+    return;
+
+  Error error;
+  if (!(paused ? m_stream->Stop(&error) : m_stream->Start(&error)))
+    ERROR_LOG("Failed to {} stream: {}", paused ? "pause" : "restart", error.GetDescription());
+  else
+    m_paused = paused;
+}
+
+void CoreAudioStream::SetOutputVolume(u32 volume)
+{
+  m_volume = volume;
+}
+
+void CoreAudioStream::BeginWrite(SampleType** buffer_ptr, u32* num_frames)
+{
+  // TODO: Write directly to buffer when not using stretching.
+  *buffer_ptr = &m_staging_buffer[m_staging_buffer_pos];
+  *num_frames = CHUNK_SIZE - (m_staging_buffer_pos / NUM_CHANNELS);
+}
+
+static void S16ChunkToFloat(const s16* src, float* dst, u32 num_samples)
+{
+  constexpr GSVector4 S16_TO_FLOAT_V = GSVector4::cxpr(1.0f / 32767.0f);
+
+  const u32 iterations = (num_samples + 7) / 8;
+  for (u32 i = 0; i < iterations; i++)
+  {
+    const GSVector4i sv = GSVector4i::load<true>(src);
+    src += 8;
+
+    GSVector4i iv1 = sv.upl16(sv);  // [0, 0, 1, 1, 2, 2, 3, 3]
+    GSVector4i iv2 = sv.uph16(sv);  // [4, 4, 5, 5, 6, 6, 7, 7]
+    iv1 = iv1.sra32<16>();          // [0, 1, 2, 3]
+    iv2 = iv2.sra32<16>();          // [4, 5, 6, 7]
+    GSVector4 fv1 = GSVector4(iv1); // [f0, f1, f2, f3]
+    GSVector4 fv2 = GSVector4(iv2); // [f4, f5, f6, f7]
+    fv1 = fv1 * S16_TO_FLOAT_V;
+    fv2 = fv2 * S16_TO_FLOAT_V;
+
+    GSVector4::store<true>(dst + 0, fv1);
+    GSVector4::store<true>(dst + 4, fv2);
+    dst += 8;
+  }
+}
+
+static void FloatChunkToS16(s16* dst, const float* src, u32 num_samples)
+{
+  const GSVector4 FLOAT_TO_S16_V = GSVector4::cxpr(32767.0f);
+
+  const u32 iterations = (num_samples + 7) / 8;
+  for (u32 i = 0; i < iterations; i++)
+  {
+    GSVector4 fv1 = GSVector4::load<true>(src + 0);
+    GSVector4 fv2 = GSVector4::load<true>(src + 4);
+    src += 8;
+
+    fv1 = fv1 * FLOAT_TO_S16_V;
+    fv2 = fv2 * FLOAT_TO_S16_V;
+    GSVector4i iv1 = GSVector4i(fv1);
+    GSVector4i iv2 = GSVector4i(fv2);
+
+    const GSVector4i iv = iv1.ps32(iv2);
+    GSVector4i::store<true>(dst, iv);
+    dst += 8;
+  }
+}
+
+void CoreAudioStream::EndWrite(u32 num_frames)
+{
+  // don't bother committing anything when muted
+  if (m_volume == 0 || m_paused)
+    return;
+
+  m_staging_buffer_pos += num_frames * NUM_CHANNELS;
+  DebugAssert(m_staging_buffer_pos <= (CHUNK_SIZE * NUM_CHANNELS));
+  if ((m_staging_buffer_pos / NUM_CHANNELS) < CHUNK_SIZE)
+    return;
+
+  m_staging_buffer_pos = 0;
+
+  if (!IsStretchEnabled())
+  {
+    InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
+    return;
+  }
+
+  S16ChunkToFloat(m_staging_buffer.get(), m_float_buffer.get(), CHUNK_SIZE * NUM_CHANNELS);
+  StretchWriteBlock(m_float_buffer.get());
+}
+
+// Time stretching algorithm based on PCSX2 implementation.
+
+template<class T>
+ALWAYS_INLINE static bool IsInRange(const T& val, const T& min, const T& max)
+{
+  return (min <= val && val <= max);
+}
+
+void CoreAudioStream::StretchAllocate()
+{
+  if (m_parameters.stretch_mode == AudioStretchMode::Off)
+    return;
+
+  m_soundtouch = soundtouch_createInstance();
+  soundtouch_setSampleRate(m_soundtouch, m_sample_rate);
+  soundtouch_setChannels(m_soundtouch, NUM_CHANNELS);
+
+  soundtouch_setSetting(m_soundtouch, SETTING_USE_QUICKSEEK, m_parameters.stretch_use_quickseek);
+  soundtouch_setSetting(m_soundtouch, SETTING_USE_AA_FILTER, m_parameters.stretch_use_aa_filter);
+
+  soundtouch_setSetting(m_soundtouch, SETTING_SEQUENCE_MS, m_parameters.stretch_sequence_length_ms);
+  soundtouch_setSetting(m_soundtouch, SETTING_SEEKWINDOW_MS, m_parameters.stretch_seekwindow_ms);
+  soundtouch_setSetting(m_soundtouch, SETTING_OVERLAP_MS, m_parameters.stretch_overlap_ms);
+
+  if (m_parameters.stretch_mode == AudioStretchMode::Resample)
+    soundtouch_setRate(m_soundtouch, m_nominal_rate);
+  else
+    soundtouch_setTempo(m_soundtouch, m_nominal_rate);
+
+  m_stretch_reset = STRETCH_RESET_THRESHOLD;
+  m_stretch_inactive = false;
+  m_stretch_ok_count = 0;
+  m_dynamic_target_usage = 0.0f;
+  m_average_position = 0;
+  m_average_available = 0;
+
+  m_staging_buffer_pos = 0;
+}
+
+void CoreAudioStream::StretchDestroy()
+{
+  if (m_soundtouch)
+  {
+    soundtouch_destroyInstance(m_soundtouch);
+    m_soundtouch = nullptr;
+  }
+}
+
+void CoreAudioStream::StretchWriteBlock(const float* block)
+{
+  if (IsStretchEnabled())
+  {
+    soundtouch_putSamples(m_soundtouch, block, CHUNK_SIZE);
+
+    u32 tempProgress;
+    while (tempProgress = soundtouch_receiveSamples(m_soundtouch, m_float_buffer.get(), CHUNK_SIZE), tempProgress != 0)
+    {
+      FloatChunkToS16(m_staging_buffer.get(), m_float_buffer.get(), tempProgress * NUM_CHANNELS);
+      InternalWriteFrames(m_staging_buffer.get(), tempProgress);
+    }
+
+    if (m_parameters.stretch_mode == AudioStretchMode::TimeStretch)
+      UpdateStretchTempo();
+  }
+  else
+  {
+    FloatChunkToS16(m_staging_buffer.get(), block, CHUNK_SIZE * NUM_CHANNELS);
+    InternalWriteFrames(m_staging_buffer.get(), CHUNK_SIZE);
+  }
+}
+
+float CoreAudioStream::AddAndGetAverageTempo(float val)
+{
+  static constexpr u32 AVERAGING_WINDOW = 50;
+
+  // Build up a circular buffer for tempo averaging to prevent rapid tempo oscillations.
+  if (m_average_available < AVERAGING_BUFFER_SIZE)
+    m_average_available++;
+
+  m_average_fullness[m_average_position] = val;
+  m_average_position = (m_average_position + 1U) % AVERAGING_BUFFER_SIZE;
+
+  // The + AVERAGING_BUFFER_SIZE ensures we don't go negative when using modulo arithmetic.
+  const u32 actual_window = std::min<u32>(m_average_available, AVERAGING_WINDOW);
+  u32 index = (m_average_position - actual_window + AVERAGING_BUFFER_SIZE) % AVERAGING_BUFFER_SIZE;
+  float sum = 0.0f;
+  u32 count = 0;
+
+#ifdef CPU_ARCH_SIMD
+  GSVector4 vsum = GSVector4::zero();
+  const u32 vcount = Common::AlignDownPow2(actual_window, 4);
+  for (; count < vcount; count += 4)
+  {
+    if ((index + 4) > AVERAGING_BUFFER_SIZE)
+    {
+      // wraparound
+      for (u32 i = 0; i < 4; i++)
+      {
+        sum += m_average_fullness[index];
+        index = (index + 1) % AVERAGING_BUFFER_SIZE;
+      }
+    }
+    else
+    {
+      vsum += GSVector4::load<false>(&m_average_fullness[index]);
+      index = (index + 4) % AVERAGING_BUFFER_SIZE;
+    }
+  }
+  sum += vsum.addv();
+#endif
+  for (; count < actual_window; count++)
+  {
+    sum += m_average_fullness[index];
+    index = (index + 1) % AVERAGING_BUFFER_SIZE;
+  }
+  sum /= static_cast<float>(actual_window);
+
+  return (sum != 0.0f) ? sum : 1.0f;
+}
+
+void CoreAudioStream::UpdateStretchTempo()
+{
+  static constexpr float MIN_TEMPO = 0.05f;
+  static constexpr float MAX_TEMPO = 500.0f;
+
+  // Hysteresis thresholds to prevent stretcher from constantly toggling on/off.
+  // i.e. this is the range we will run in 1:1 mode for.
+  static constexpr float INACTIVE_GOOD_FACTOR = 1.04f;
+  static constexpr float INACTIVE_BAD_FACTOR = 1.2f;
+
+  // Require sustained good performance before deactivating.
+  static constexpr u32 INACTIVE_MIN_OK_COUNT = 50;
+  static constexpr u32 COMPENSATION_DIVIDER = 100;
+
+  // Controls how aggressively we adjust the dynamic target. We want to keep the same target size regardless
+  // of the target speed, but need additional buffering when intentionally running below 100%.
+  float base_target_usage = static_cast<float>(m_target_buffer_size) / std::min(m_nominal_rate, 1.0f);
+
+  // tempo = current_buffer / target_buffer.
+  const u32 ibuffer_usage = GetBufferedFramesRelaxed();
+  float buffer_usage = static_cast<float>(ibuffer_usage);
+  float tempo = buffer_usage / m_dynamic_target_usage;
+
+  // Prevents the system from getting stuck in a bad state due to accumulated errors.
+  if (m_stretch_reset >= STRETCH_RESET_THRESHOLD)
+  {
+    VERBOSE_LOG("___ Stretcher is being reset.");
+    m_stretch_inactive = false;
+    m_stretch_ok_count = 0;
+    m_dynamic_target_usage = base_target_usage;
+    m_average_available = 0;
+    m_average_position = 0;
+    m_stretch_reset = 0;
+    tempo = m_nominal_rate;
+  }
+  else if (m_stretch_reset > 0)
+  {
+    // Back off resets if enough time has passed. That way a very occasional lag/overflow
+    // doesn't cascade into unnecessary tempo adjustment.
+    const u64 now = Timer::GetCurrentValue();
+    if (Timer::ConvertValueToSeconds(now - m_stretch_reset_time) >= 2.0f)
+    {
+      m_stretch_reset--;
+      m_stretch_reset_time = now;
+    }
+  }
+
+  // Apply temporal smoothing to prevent rapid tempo changes that cause artifacts.
+  tempo = AddAndGetAverageTempo(tempo);
+
+  // Apply non-linear dampening when close to target to reduce oscillation.
+  if (tempo < 2.0f)
+    tempo = std::sqrt(tempo);
+
+  tempo = std::clamp(tempo, MIN_TEMPO, MAX_TEMPO);
+
+  if (tempo < 1.0f)
+    base_target_usage /= std::sqrt(tempo);
+
+  // Gradually adjust our dynamic target toward what would give us the desired tempo.
+  m_dynamic_target_usage +=
+    static_cast<float>(base_target_usage / tempo - m_dynamic_target_usage) / static_cast<float>(COMPENSATION_DIVIDER);
+
+  // Snap back to baseline if we're very close.
+  if (IsInRange(tempo, 0.9f, 1.1f) &&
+      IsInRange(m_dynamic_target_usage, base_target_usage * 0.9f, base_target_usage * 1.1f))
+  {
+    m_dynamic_target_usage = base_target_usage;
+  }
+
+  // Are we changing the active state?
+  if (!m_stretch_inactive)
+  {
+    if (IsInRange(tempo, 1.0f / INACTIVE_GOOD_FACTOR, INACTIVE_GOOD_FACTOR))
+      m_stretch_ok_count++;
+    else
+      m_stretch_ok_count = 0;
+
+    if (m_stretch_ok_count >= INACTIVE_MIN_OK_COUNT)
+    {
+      VERBOSE_LOG("=== Stretcher is now inactive.");
+      m_stretch_inactive = true;
+    }
+  }
+  else if (!IsInRange(tempo, 1.0f / INACTIVE_BAD_FACTOR, INACTIVE_BAD_FACTOR))
+  {
+    VERBOSE_LOG("~~~ Stretcher is now active @ tempo {}.", tempo);
+    m_stretch_inactive = false;
+    m_stretch_ok_count = 0;
+  }
+
+  // If we're inactive, we don't want to change the tempo.
+  if (m_stretch_inactive)
+    tempo = m_nominal_rate;
+
+  if constexpr (LOG_TIMESTRETCH_STATS)
+  {
+    static float min_tempo = 0.0f;
+    static float max_tempo = 0.0f;
+    static float acc_tempo = 0.0f;
+    static u32 acc_cnt = 0;
+    acc_tempo += tempo;
+    acc_cnt++;
+    min_tempo = std::min(min_tempo, tempo);
+    max_tempo = std::max(max_tempo, tempo);
+
+    static int iterations = 0;
+    static u64 last_log_time = 0;
+
+    const u64 now = Timer::GetCurrentValue();
+
+    if (Timer::ConvertValueToSeconds(now - last_log_time) > 1.0f)
+    {
+      const float avg_tempo = (acc_cnt > 0) ? (acc_tempo / static_cast<float>(acc_cnt)) : 0.0f;
+
+      VERBOSE_LOG("{:3d} ms ({:3.0f}%), tempo: avg={:.2f} min={:.2f} max={:.2f}, comp: {:2.3f}, iters: {}, reset:{}",
+                  (ibuffer_usage * 1000u) / m_sample_rate, 100.0f * buffer_usage / base_target_usage, avg_tempo,
+                  min_tempo, max_tempo, m_dynamic_target_usage / base_target_usage, iterations, m_stretch_reset);
+
+      last_log_time = now;
+      iterations = 0;
+
+      min_tempo = std::numeric_limits<float>::max();
+      max_tempo = std::numeric_limits<float>::min();
+      acc_tempo = 0.0f;
+      acc_cnt = 0;
+    }
+
+    iterations++;
+  }
+
+  soundtouch_setTempo(m_soundtouch, tempo);
+}
+
+void CoreAudioStream::StretchUnderrun()
+{
+  // Didn't produce enough frames in time.
+  m_stretch_reset++;
+  if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
+    m_stretch_reset_time = Timer::GetCurrentValue();
+}
+
+void CoreAudioStream::StretchOverrun()
+{
+  // Produced more frames than can fit in the buffer.
+  m_stretch_reset++;
+  if (m_stretch_reset < STRETCH_RESET_THRESHOLD)
+    m_stretch_reset_time = Timer::GetCurrentValue();
+
+  // Drop two packets to give the time stretcher a bit more time to slow things down.
+  // This prevents a cascading overrun situation where each overrun makes the next one more likely.
+  const u32 discard = CHUNK_SIZE * 2;
+  m_rpos.store((m_rpos.load(std::memory_order_acquire) + discard) % m_buffer_size, std::memory_order_release);
+}
+
+void CoreAudioStream::EmptyStretchBuffers()
+{
+  if (!IsStretchEnabled())
+    return;
+
+  m_stretch_reset = STRETCH_RESET_THRESHOLD;
+
+  // Wipe soundtouch samples. If we don't do this and we're switching from a high tempo to low,
+  // we'll still have quite a large buffer of samples that will be played back at a low tempo,
+  // resulting in a long delay before the audio starts playing at the new tempo.
+  soundtouch_clear(m_soundtouch);
+}
--- a/src/util/core_audio_stream.h
+++ b/src/util/core_audio_stream.h
@@ -0,0 +1,173 @@
+// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
+// SPDX-License-Identifier: CC-BY-NC-ND-4.0
+
+#pragma once
+
+#include "audio_stream.h"
+
+#include "common/align.h"
+
+#include <array>
+#include <atomic>
+#include <memory>
+#include <optional>
+
+class Error;
+class SettingsInterface;
+
+enum class AudioStretchMode : u8
+{
+  Off,
+  Resample,
+  TimeStretch,
+  Count
+};
+
+struct AudioStreamParameters
+{
+  AudioStretchMode stretch_mode = DEFAULT_STRETCH_MODE;
+  bool output_latency_minimal = DEFAULT_OUTPUT_LATENCY_MINIMAL;
+  u16 output_latency_ms = DEFAULT_OUTPUT_LATENCY_MS;
+  u16 buffer_ms = DEFAULT_BUFFER_MS;
+
+  u16 stretch_sequence_length_ms = DEFAULT_STRETCH_SEQUENCE_LENGTH;
+  u16 stretch_seekwindow_ms = DEFAULT_STRETCH_SEEKWINDOW;
+  u16 stretch_overlap_ms = DEFAULT_STRETCH_OVERLAP;
+  bool stretch_use_quickseek = DEFAULT_STRETCH_USE_QUICKSEEK;
+  bool stretch_use_aa_filter = DEFAULT_STRETCH_USE_AA_FILTER;
+
+  static constexpr AudioStretchMode DEFAULT_STRETCH_MODE = AudioStretchMode::TimeStretch;
+#ifndef __ANDROID__
+  static constexpr u16 DEFAULT_BUFFER_MS = 50;
+  static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
+#else
+  static constexpr u16 DEFAULT_BUFFER_MS = 100;
+  static constexpr u16 DEFAULT_OUTPUT_LATENCY_MS = 20;
+#endif
+  static constexpr bool DEFAULT_OUTPUT_LATENCY_MINIMAL = false;
+
+  static constexpr u16 DEFAULT_STRETCH_SEQUENCE_LENGTH = 30;
+  static constexpr u16 DEFAULT_STRETCH_SEEKWINDOW = 20;
+  static constexpr u16 DEFAULT_STRETCH_OVERLAP = 10;
+
+  static constexpr bool DEFAULT_STRETCH_USE_QUICKSEEK = false;
+  static constexpr bool DEFAULT_STRETCH_USE_AA_FILTER = false;
+
+  void Load(const SettingsInterface& si, const char* section);
+  void Save(SettingsInterface& si, const char* section) const;
+  void Clear(SettingsInterface& si, const char* section);
+
+  bool operator==(const AudioStreamParameters& rhs) const;
+  bool operator!=(const AudioStreamParameters& rhs) const;
+};
+
+class CoreAudioStream final : private AudioStreamSource
+{
+public:
+  using SampleType = AudioStreamSource::SampleType;
+
+  static constexpr u32 NUM_CHANNELS = 2;
+  static constexpr u32 CHUNK_SIZE = 64;
+
+  CoreAudioStream();
+  ~CoreAudioStream();
+
+  static u32 GetAlignedBufferSize(u32 size);
+  static u32 GetBufferSizeForMS(u32 sample_rate, u32 ms);
+  static u32 GetMSForBufferSize(u32 sample_rate, u32 buffer_size);
+
+  static const char* GetStretchModeName(AudioStretchMode mode);
+  static const char* GetStretchModeDisplayName(AudioStretchMode mode);
+  static std::optional<AudioStretchMode> ParseStretchMode(const char* name);
+
+  ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
+  ALWAYS_INLINE u32 GetBufferSize() const { return m_buffer_size; }
+  ALWAYS_INLINE u32 GetTargetBufferSize() const { return m_target_buffer_size; }
+  ALWAYS_INLINE u32 GetOutputVolume() const { return m_volume; }
+  ALWAYS_INLINE float GetNominalTempo() const { return m_nominal_rate; }
+  ALWAYS_INLINE bool IsPaused() const { return m_paused; }
+
+  u32 GetBufferedFramesRelaxed() const;
+
+  /// Creation/destruction.
+  bool Initialize(AudioBackend backend, u32 sample_rate, const AudioStreamParameters& params, const char* driver_name,
+                  const char* device_name, Error* error);
+  void Destroy();
+
+  /// Temporarily pauses the stream, preventing it from requesting data.
+  void SetPaused(bool paused);
+
+  void SetOutputVolume(u32 volume);
+
+  void BeginWrite(SampleType** buffer_ptr, u32* num_frames);
+  void EndWrite(u32 num_frames);
+
+  void EmptyBuffer();
+
+  /// Nominal rate is used for both resampling and timestretching, input samples are assumed to be this amount faster
+  /// than the sample rate.
+  void SetNominalRate(float tempo);
+
+  void SetStretchMode(AudioStretchMode mode);
+
+  /// Wipes out the time stretching buffer, call when reducing target speed.
+  void EmptyStretchBuffers();
+
+private:
+  static constexpr u32 AVERAGING_BUFFER_SIZE = 256;
+  static constexpr u32 STRETCH_RESET_THRESHOLD = 5;
+
+  ALWAYS_INLINE bool IsStretchEnabled() const { return m_parameters.stretch_mode != AudioStretchMode::Off; }
+
+  void AllocateBuffer();
+  void DestroyBuffer();
+
+  void InternalWriteFrames(SampleType* samples, u32 num_frames);
+
+  void StretchAllocate();
+  void StretchDestroy();
+  void StretchWriteBlock(const float* block);
+  void StretchUnderrun();
+  void StretchOverrun();
+
+  float AddAndGetAverageTempo(float val);
+  void UpdateStretchTempo();
+
+  void ReadFrames(SampleType* samples, u32 num_frames) override;
+
+  std::unique_ptr<AudioStream> m_stream;
+  u32 m_sample_rate = 0;
+  u32 m_volume = 0;
+  AudioStreamParameters m_parameters;
+  bool m_stretch_inactive = false;
+  bool m_filling = false;
+  bool m_paused = false;
+
+  u32 m_buffer_size = 0;
+  Common::unique_aligned_ptr<s16[]> m_buffer;
+
+  // temporary staging buffer, used for timestretching
+  Common::unique_aligned_ptr<s16[]> m_staging_buffer;
+
+  // float buffer, soundtouch only accepts float samples as input
+  Common::unique_aligned_ptr<float[]> m_float_buffer;
+
+  std::atomic<u32> m_rpos{0};
+  std::atomic<u32> m_wpos{0};
+
+  void* m_soundtouch = nullptr;
+
+  u32 m_target_buffer_size = 0;
+  u32 m_stretch_reset = STRETCH_RESET_THRESHOLD;
+  u64 m_stretch_reset_time = 0;
+
+  u32 m_stretch_ok_count = 0;
+  float m_nominal_rate = 1.0f;
+  float m_dynamic_target_usage = 0.0f;
+
+  u32 m_average_position = 0;
+  u32 m_average_available = 0;
+  u32 m_staging_buffer_pos = 0;
+
+  std::array<float, AVERAGING_BUFFER_SIZE> m_average_fullness = {};
+};
--- a/src/util/cubeb_audio_stream.cpp
+++ b/src/util/cubeb_audio_stream.cpp
@@ -19,15 +19,18 @@ LOG_CHANNEL(CubebAudioStream);

 namespace {

-class CubebAudioStream : public AudioStream
+class CubebAudioStream final : public AudioStream
 {
 public:
-  CubebAudioStream(u32 sample_rate, const AudioStreamParameters& parameters);
-  ~CubebAudioStream();
+  CubebAudioStream();
+  ~CubebAudioStream() override;

-  void SetPaused(bool paused) override;
+  bool Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
+                  const char* driver_name, const char* device_name, AudioStreamSource* source, bool auto_start,
+                  Error* error);

-  bool Initialize(const char* driver_name, const char* device_name, Error* error);
+  bool Start(Error* error) override;
+  bool Stop(Error* error) override;

 private:
  static void LogCallback(const char* fmt, ...);
@@ -35,20 +38,18 @@ private:
                           long nframes);
  static void StateCallback(cubeb_stream* stream, void* user_ptr, cubeb_state state);

-  void DestroyContextAndStream();
-
  cubeb* m_context = nullptr;
  cubeb_stream* stream = nullptr;
 };
 } // namespace

-static TinyString GetCubebErrorString(int rv)
+static void FormatCubebError(Error* error, const char* prefix, int rv)
 {
-  TinyString ret;
+  const char* str;
  switch (rv)
  {
    // clang-format off
-#define C(e) case e: ret.assign(#e); break
+#define C(e) case e: str = #e; break
    // clang-format on

    C(CUBEB_OK);
@@ -59,37 +60,18 @@ static TinyString GetCubebErrorString(int rv)
    C(CUBEB_ERROR_DEVICE_UNAVAILABLE);

    default:
-      ret = "CUBEB_ERROR_UNKNOWN";
+      str = "CUBEB_ERROR_UNKNOWN";
      break;

 #undef C
  }

-  ret.append_format(" ({})", rv);
-  return ret;
+  Error::SetStringFmt(error, "{}: {} ({})", prefix, str, rv);
 }

-CubebAudioStream::CubebAudioStream(u32 sample_rate, const AudioStreamParameters& parameters)
-  : AudioStream(sample_rate, parameters)
-{
-}
+CubebAudioStream::CubebAudioStream() = default;

 CubebAudioStream::~CubebAudioStream()
-{
-  DestroyContextAndStream();
-}
-
-void CubebAudioStream::LogCallback(const char* fmt, ...)
-{
-  LargeString str;
-  std::va_list ap;
-  va_start(ap, fmt);
-  str.vsprintf(fmt, ap);
-  va_end(ap);
-  DEV_LOG(str);
-}
-
-void CubebAudioStream::DestroyContextAndStream()
 {
  if (stream)
  {
@@ -105,63 +87,71 @@ void CubebAudioStream::DestroyContextAndStream()
  }
 }

-bool CubebAudioStream::Initialize(const char* driver_name, const char* device_name, Error* error)
+void CubebAudioStream::LogCallback(const char* fmt, ...)
+{
+  LargeString str;
+  std::va_list ap;
+  va_start(ap, fmt);
+  str.vsprintf(fmt, ap);
+  va_end(ap);
+  DEV_LOG(str);
+}
+
+bool CubebAudioStream::Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
+                                  const char* driver_name, const char* device_name, AudioStreamSource* source,
+                                  bool auto_start, Error* error)
 {
  cubeb_set_log_callback(CUBEB_LOG_NORMAL, LogCallback);

-  int rv =
-    cubeb_init(&m_context, "DuckStation", g_settings.audio_driver.empty() ? nullptr : g_settings.audio_driver.c_str());
+  int rv = cubeb_init(&m_context, "DuckStation", (driver_name && *driver_name != '\0') ? driver_name : nullptr);
  if (rv != CUBEB_OK)
  {
-    Error::SetStringFmt(error, "Could not initialize cubeb context: {}", GetCubebErrorString(rv));
+    FormatCubebError(error, "Could not initialize cubeb context: ", rv);
    return false;
  }

  cubeb_stream_params params = {};
  params.format = CUBEB_SAMPLE_S16LE;
-  params.rate = m_sample_rate;
-  params.channels = NUM_CHANNELS;
+  params.rate = sample_rate;
+  params.channels = channels;
  params.layout = CUBEB_LAYOUT_STEREO;
  params.prefs = CUBEB_STREAM_PREF_NONE;

-  u32 latency_frames = GetBufferSizeForMS(
-    m_sample_rate, (m_parameters.output_latency_ms == 0) ? m_parameters.buffer_ms : m_parameters.output_latency_ms);
  u32 min_latency_frames = 0;
  rv = cubeb_get_min_latency(m_context, &params, &min_latency_frames);
  if (rv == CUBEB_ERROR_NOT_SUPPORTED)
  {
    DEV_LOG("Cubeb backend does not support latency queries, using latency of {} ms ({} frames).",
-            m_parameters.buffer_ms, latency_frames);
+            FramesToMS(sample_rate, output_latency_frames), output_latency_frames);
  }
  else
  {
    if (rv != CUBEB_OK)
    {
-      Error::SetStringFmt(error, "cubeb_get_min_latency() failed: {}", GetCubebErrorString(rv));
-      DestroyContextAndStream();
+      FormatCubebError(error, "cubeb_get_min_latency() failed: {}", rv);
      return false;
    }

-    const u32 minimum_latency_ms = GetMSForBufferSize(m_sample_rate, min_latency_frames);
-    DEV_LOG("Minimum latency: {} ms ({} audio frames)", minimum_latency_ms, min_latency_frames);
-    if (m_parameters.output_latency_minimal)
+    if (output_latency_minimal)
    {
      // use minimum
-      latency_frames = min_latency_frames;
+      output_latency_frames = min_latency_frames;
    }
-    else if (minimum_latency_ms > m_parameters.output_latency_ms)
+    else if (min_latency_frames > output_latency_frames)
    {
      WARNING_LOG("Minimum latency is above requested latency: {} vs {}, adjusting to compensate.", min_latency_frames,
-                  latency_frames);
-      latency_frames = min_latency_frames;
+                  output_latency_frames);
+      output_latency_frames = min_latency_frames;
    }
  }

+  DEV_LOG("Output latency: {} ms ({} audio frames)", FramesToMS(sample_rate, output_latency_frames),
+          min_latency_frames);
+
  cubeb_devid selected_device = nullptr;
-  const std::string& selected_device_name = g_settings.audio_output_device;
  cubeb_device_collection devices;
  bool devices_valid = false;
-  if (!selected_device_name.empty())
+  if (device_name && *device_name != '\0')
  {
    rv = cubeb_enumerate_devices(m_context, CUBEB_DEVICE_TYPE_OUTPUT, &devices);
    devices_valid = (rv == CUBEB_OK);
@@ -170,7 +160,7 @@ bool CubebAudioStream::Initialize(const char* driver_name, const char* device_na
      for (size_t i = 0; i < devices.count; i++)
      {
        const cubeb_device_info& di = devices.device[i];
-        if (di.device_id && selected_device_name == di.device_id)
+        if (di.device_id && std::strcmp(device_name, di.device_id) == 0)
        {
          INFO_LOG("Using output device '{}' ({}).", di.device_id, di.friendly_name ? di.friendly_name : di.device_id);
          selected_device = di.devid;
@@ -180,41 +170,41 @@ bool CubebAudioStream::Initialize(const char* driver_name, const char* device_na

      if (!selected_device)
      {
-        Host::AddOSDMessage(
-          OSDMessageType::Error,
-          fmt::format("Requested audio output device '{}' not found, using default.", selected_device_name));
+        Host::AddOSDMessage(OSDMessageType::Error,
+                            fmt::format("Requested audio output device '{}' not found, using default.", device_name));
      }
    }
    else
    {
-      WARNING_LOG("cubeb_enumerate_devices() returned {}, using default device.", GetCubebErrorString(rv));
+      Error enumerate_error;
+      FormatCubebError(&enumerate_error, "cubeb_enumerate_devices() failed: ", rv);
+      WARNING_LOG("{}, using default device.", enumerate_error.GetDescription());
    }
  }

-  BaseInitialize();
-
  char stream_name[32];
  std::snprintf(stream_name, sizeof(stream_name), "%p", this);

-  rv = cubeb_stream_init(m_context, &stream, stream_name, nullptr, nullptr, selected_device, &params, latency_frames,
-                         &CubebAudioStream::DataCallback, StateCallback, this);
+  rv = cubeb_stream_init(m_context, &stream, stream_name, nullptr, nullptr, selected_device, &params,
+                         output_latency_frames, &CubebAudioStream::DataCallback, StateCallback, source);

  if (devices_valid)
    cubeb_device_collection_destroy(m_context, &devices);

  if (rv != CUBEB_OK)
  {
-    Error::SetStringFmt(error, "cubeb_stream_init() failed: {}", GetCubebErrorString(rv));
-    DestroyContextAndStream();
+    FormatCubebError(error, "cubeb_stream_init() failed: ", rv);
    return false;
  }

-  rv = cubeb_stream_start(stream);
-  if (rv != CUBEB_OK)
+  if (auto_start)
  {
-    Error::SetStringFmt(error, "cubeb_stream_start() failed: {}", GetCubebErrorString(rv));
-    DestroyContextAndStream();
-    return false;
+    rv = cubeb_stream_start(stream);
+    if (rv != CUBEB_OK)
+    {
+      FormatCubebError(error, "cubeb_stream_start() failed: ", rv);
+      return false;
+    }
  }

  return true;
@@ -228,33 +218,47 @@ void CubebAudioStream::StateCallback(cubeb_stream* stream, void* user_ptr, cubeb
 long CubebAudioStream::DataCallback(cubeb_stream* stm, void* user_ptr, const void* input_buffer, void* output_buffer,
                                    long nframes)
 {
-  static_cast<CubebAudioStream*>(user_ptr)->ReadFrames(static_cast<s16*>(output_buffer), static_cast<u32>(nframes));
+  static_cast<AudioStreamSource*>(user_ptr)->ReadFrames(static_cast<s16*>(output_buffer), static_cast<u32>(nframes));
  return nframes;
 }

-void CubebAudioStream::SetPaused(bool paused)
+bool CubebAudioStream::Start(Error* error)
 {
-  if (paused == m_paused || !stream)
-    return;
-
-  const int rv = paused ? cubeb_stream_stop(stream) : cubeb_stream_start(stream);
+  const int rv = cubeb_stream_start(stream);
  if (rv != CUBEB_OK)
  {
-    ERROR_LOG("Could not {} stream: {}", paused ? "pause" : "resume", rv);
-    return;
+    FormatCubebError(error, "cubeb_stream_start() failed: ", rv);
+    return false;
  }

-  m_paused = paused;
+  return true;
 }

-std::unique_ptr<AudioStream> AudioStream::CreateCubebAudioStream(u32 sample_rate,
-                                                                 const AudioStreamParameters& parameters,
+bool CubebAudioStream::Stop(Error* error)
+{
+  const int rv = cubeb_stream_stop(stream);
+  if (rv != CUBEB_OK)
+  {
+    FormatCubebError(error, "cubeb_stream_stop() failed: ", rv);
+    return false;
+  }
+
+  return true;
+}
+
+std::unique_ptr<AudioStream> AudioStream::CreateCubebAudioStream(u32 sample_rate, u32 channels,
+                                                                 u32 output_latency_frames, bool output_latency_minimal,
                                                                 const char* driver_name, const char* device_name,
+                                                                 AudioStreamSource* source, bool auto_start,
                                                                 Error* error)
 {
-  std::unique_ptr<CubebAudioStream> stream = std::make_unique<CubebAudioStream>(sample_rate, parameters);
-  if (!stream->Initialize(driver_name, device_name, error))
+  std::unique_ptr<CubebAudioStream> stream = std::make_unique<CubebAudioStream>();
+  if (!stream->Initialize(sample_rate, channels, output_latency_frames, output_latency_minimal, driver_name,
+                          device_name, source, auto_start, error))
+  {
    stream.reset();
+  }
+
  return stream;
 }

@@ -271,6 +275,8 @@ std::vector<std::pair<std::string, std::string>> AudioStream::GetCubebDriverName

 std::vector<AudioStream::DeviceInfo> AudioStream::GetCubebOutputDevices(const char* driver, u32 sample_rate)
 {
+  Error error;
+
  std::vector<AudioStream::DeviceInfo> ret;
  ret.emplace_back(std::string(), TRANSLATE_STR("AudioStream", "Default"), 0);

@@ -278,7 +284,8 @@ std::vector<AudioStream::DeviceInfo> AudioStream::GetCubebOutputDevices(const ch
  int rv = cubeb_init(&context, "DuckStation", (driver && *driver) ? driver : nullptr);
  if (rv != CUBEB_OK)
  {
-    ERROR_LOG("cubeb_init() failed: {}", GetCubebErrorString(rv));
+    FormatCubebError(&error, "cubeb_init() failed: ", rv);
+    ERROR_LOG(error.GetDescription());
    return ret;
  }

@@ -288,7 +295,8 @@ std::vector<AudioStream::DeviceInfo> AudioStream::GetCubebOutputDevices(const ch
  rv = cubeb_enumerate_devices(context, CUBEB_DEVICE_TYPE_OUTPUT, &devices);
  if (rv != CUBEB_OK)
  {
-    ERROR_LOG("cubeb_enumerate_devices() failed: {}", GetCubebErrorString(rv));
+    FormatCubebError(&error, "cubeb_enumerate_devices() failed: ", rv);
+    ERROR_LOG(error.GetDescription());
    return ret;
  }

--- a/src/util/sdl_audio_stream.cpp
+++ b/src/util/sdl_audio_stream.cpp
@@ -16,18 +16,21 @@ namespace {
 class SDLAudioStream final : public AudioStream
 {
 public:
-  SDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters);
-  ~SDLAudioStream();
+  SDLAudioStream(AudioStreamSource* source, u32 channels);
+  ~SDLAudioStream() override;

-  void SetPaused(bool paused) override;
+  bool Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
+                  bool auto_start, Error* error);

-  bool OpenDevice(Error* error);
-  void CloseDevice();
+  bool Start(Error* error) override;
+  bool Stop(Error* error) override;

 protected:
  static void AudioCallback(void* userdata, SDL_AudioStream* stream, int additional_amount, int total_amount);

+  AudioStreamSource* m_source;
  SDL_AudioStream* m_sdl_stream = nullptr;
+  u32 m_channels;
 };
 } // namespace

@@ -50,60 +53,11 @@ static bool InitializeSDLAudio(Error* error)
  return true;
 }

-SDLAudioStream::SDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters)
-  : AudioStream(sample_rate, parameters)
+SDLAudioStream::SDLAudioStream(AudioStreamSource* source, u32 channels) : m_source(source), m_channels(channels)
 {
 }

 SDLAudioStream::~SDLAudioStream()
-{
-  SDLAudioStream::CloseDevice();
-}
-
-std::unique_ptr<AudioStream> AudioStream::CreateSDLAudioStream(u32 sample_rate, const AudioStreamParameters& parameters,
-                                                               Error* error)
-{
-  if (!InitializeSDLAudio(error))
-    return {};
-
-  std::unique_ptr<SDLAudioStream> stream = std::make_unique<SDLAudioStream>(sample_rate, parameters);
-  if (!stream->OpenDevice(error))
-    stream.reset();
-
-  return stream;
-}
-
-bool SDLAudioStream::OpenDevice(Error* error)
-{
-  DebugAssert(!m_sdl_stream);
-
-  const SDL_AudioSpec spec = {
-    .format = SDL_AUDIO_S16LE, .channels = NUM_CHANNELS, .freq = static_cast<int>(m_sample_rate)};
-
-  m_sdl_stream =
-    SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec, AudioCallback, static_cast<void*>(this));
-  if (!m_sdl_stream)
-  {
-    Error::SetStringFmt(error, "SDL_OpenAudioDeviceStream() failed: {}", SDL_GetError());
-    return false;
-  }
-
-  BaseInitialize();
-  SDL_ResumeAudioDevice(SDL_GetAudioStreamDevice(m_sdl_stream));
-
-  return true;
-}
-
-void SDLAudioStream::SetPaused(bool paused)
-{
-  if (m_paused == paused)
-    return;
-
-  paused ? SDL_PauseAudioStreamDevice(m_sdl_stream) : SDL_ResumeAudioStreamDevice(m_sdl_stream);
-  m_paused = paused;
-}
-
-void SDLAudioStream::CloseDevice()
 {
  if (m_sdl_stream)
  {
@@ -112,6 +66,47 @@ void SDLAudioStream::CloseDevice()
  }
 }

+bool SDLAudioStream::Initialize(u32 sample_rate, u32 channels, u32 output_latency_frames, bool output_latency_minimal,
+                                bool auto_start, Error* error)
+{
+  const SDL_AudioSpec spec = {
+    .format = SDL_AUDIO_S16LE, .channels = static_cast<int>(channels), .freq = static_cast<int>(sample_rate)};
+
+  m_sdl_stream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec, AudioCallback, this);
+  if (!m_sdl_stream)
+  {
+    Error::SetStringFmt(error, "SDL_OpenAudioDeviceStream() failed: {}", SDL_GetError());
+    return false;
+  }
+
+  if (auto_start)
+    SDL_ResumeAudioDevice(SDL_GetAudioStreamDevice(m_sdl_stream));
+
+  return true;
+}
+
+bool SDLAudioStream::Start(Error* error)
+{
+  if (!SDL_ResumeAudioStreamDevice(m_sdl_stream))
+  {
+    Error::SetStringFmt(error, "SDL_ResumeAudioStreamDevice() failed: {}", SDL_GetError());
+    return false;
+  }
+
+  return true;
+}
+
+bool SDLAudioStream::Stop(Error* error)
+{
+  if (!SDL_PauseAudioStreamDevice(m_sdl_stream))
+  {
+    Error::SetStringFmt(error, "SDL_PauseAudioStreamDevice() failed: {}", SDL_GetError());
+    return false;
+  }
+
+  return true;
+}
+
 void SDLAudioStream::AudioCallback(void* userdata, SDL_AudioStream* stream, int additional_amount, int total_amount)
 {
  if (additional_amount == 0)
@@ -121,9 +116,23 @@ void SDLAudioStream::AudioCallback(void* userdata, SDL_AudioStream* stream, int
  if (data)
  {
    SDLAudioStream* const this_ptr = static_cast<SDLAudioStream*>(userdata);
-    const u32 num_frames = static_cast<u32>(additional_amount) / (sizeof(SampleType) * NUM_CHANNELS);
-    this_ptr->ReadFrames(reinterpret_cast<SampleType*>(data), num_frames);
+    const u32 num_frames = static_cast<u32>(additional_amount) / (sizeof(SampleType) * this_ptr->m_channels);
+    this_ptr->m_source->ReadFrames(reinterpret_cast<SampleType*>(data), num_frames);
    SDL_PutAudioStreamData(stream, data, additional_amount);
    SDL_stack_free(data);
  }
 }
+
+std::unique_ptr<AudioStream> AudioStream::CreateSDLAudioStream(u32 sample_rate, u32 channels, u32 output_latency_frames,
+                                                               bool output_latency_minimal, AudioStreamSource* source,
+                                                               bool auto_start, Error* error)
+{
+  if (!InitializeSDLAudio(error))
+    return {};
+
+  std::unique_ptr<SDLAudioStream> stream = std::make_unique<SDLAudioStream>(source, channels);
+  if (!stream->Initialize(sample_rate, channels, output_latency_frames, output_latency_minimal, auto_start, error))
+    stream.reset();
+
+  return stream;
+}
--- a/src/util/util.vcxproj
+++ b/src/util/util.vcxproj
@@ -4,6 +4,7 @@
  <ItemGroup>
    <ClInclude Include="animated_image.h" />
    <ClInclude Include="compress_helpers.h" />
+    <ClInclude Include="core_audio_stream.h" />
    <ClInclude Include="dyn_shaderc.h" />
    <ClInclude Include="dyn_spirv_cross.h" />
    <ClInclude Include="elf_file.h" />
@@ -127,6 +128,7 @@
    <ClCompile Include="cd_image_memory.cpp" />
    <ClCompile Include="cd_image_pbp.cpp" />
    <ClCompile Include="compress_helpers.cpp" />
+    <ClCompile Include="core_audio_stream.cpp" />
    <ClCompile Include="cubeb_audio_stream.cpp" />
    <ClCompile Include="cue_parser.cpp" />
    <ClCompile Include="cd_image_ppf.cpp" />
--- a/src/util/util.vcxproj.filters
+++ b/src/util/util.vcxproj.filters
@@ -80,6 +80,7 @@
    <ClInclude Include="postprocessing_shader_slang.h" />
    <ClInclude Include="imgui_gsvector.h" />
    <ClInclude Include="translation.h" />
+    <ClInclude Include="core_audio_stream.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="state_wrapper.cpp" />
@@ -165,6 +166,7 @@
    <ClCompile Include="spirv_module.cpp" />
    <ClCompile Include="postprocessing_shader_slang.cpp" />
    <ClCompile Include="translation.cpp" />
+    <ClCompile Include="core_audio_stream.cpp" />
  </ItemGroup>
  <ItemGroup>
    <None Include="metal_shaders.metal" />