SoundEffectManager: Upmix and resample sound effects when needed

Allows users to provide files at other sample rates/channels.
2026-02-14 02:14:35 +00:00 · 2025-12-29 20:59:37 +10:00
parent 5cd5853435
commit 3f1ab1cb8a
4 changed files with 381 additions and 92 deletions
--- a/dep/cubeb/CMakeLists.txt
+++ b/dep/cubeb/CMakeLists.txt
@@ -46,6 +46,8 @@ target_include_directories(cubeb
    EXPORT=
    RANDOM_PREFIX=speex
  )
+  add_library(speex_resampler_headers INTERFACE)
+  target_include_directories(speex_resampler_headers INTERFACE subprojects)

 # $<BUILD_INTERFACE:> required because of https://gitlab.kitware.com/cmake/cmake/-/issues/15415
 target_link_libraries(cubeb PRIVATE $<BUILD_INTERFACE:speex>)
@@ -178,4 +180,4 @@ if(HAVE_SYS_SOUNDCARD_H)
    endif()
  endif()
 endif()
-endif()
+endif()
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -149,10 +149,9 @@ set(RECOMPILER_SRCS
 )

 target_precompile_headers(core PRIVATE "pch.h")
-target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
 target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
 target_link_libraries(core PUBLIC Threads::Threads common util)
-target_link_libraries(core PRIVATE xxhash imgui rapidyaml rcheevos cpuinfo::cpuinfo ZLIB::ZLIB zstd::libzstd_shared libzip::zip)
+target_link_libraries(core PRIVATE xxhash imgui rapidyaml rcheevos cpuinfo::cpuinfo ZLIB::ZLIB zstd::libzstd_shared libzip::zip speex_resampler_headers)

 if(CPU_ARCH_X64)
  target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1")
--- a/src/core/core.props
+++ b/src/core/core.props
@@ -7,7 +7,7 @@
      <PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM' Or '$(Platform)'=='ARM64')">ENABLE_RECOMPILER=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>

-      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include;$(SolutionDir)dep\cubeb\subprojects</AdditionalIncludeDirectories>

      <PreprocessorDefinitions>%(PreprocessorDefinitions);C4_NO_DEBUG_BREAK=1</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\rapidyaml\include</AdditionalIncludeDirectories>
--- a/src/core/sound_effect_manager.cpp
+++ b/src/core/sound_effect_manager.cpp
@@ -12,6 +12,7 @@
 #include "common/assert.h"
 #include "common/error.h"
 #include "common/gsvector.h"
+#include "common/heap_array.h"
 #include "common/log.h"
 #include "common/lru_cache.h"
 #include "common/path.h"
@@ -26,7 +27,7 @@

 LOG_CHANNEL(SoundEffectManager);

-// TODO: Resampling and channels/format conversion support
+typedef struct SpeexResamplerState_ SpeexResamplerState;

 namespace SoundEffectManager {

@@ -46,10 +47,11 @@ public:
  void ReadFrames(SampleType* samples, u32 num_frames) override;
 };

+using FrameArray = DynamicHeapArray<AudioStream::SampleType>;
+
 struct CachedEffect
 {
-  std::vector<AudioStream::SampleType> samples;
-  u32 num_frames = 0;
+  FrameArray frames;
 };
 using CachedEffectPtr = std::shared_ptr<CachedEffect>;

@@ -59,14 +61,38 @@ struct PlayingCachedEffect
  u32 current_frame = 0;
 };

-using ActiveSoundEntry = std::variant<WAVReader, PlayingCachedEffect>;
+struct SpeexResamplerStateDeleter
+{
+  void operator()(SpeexResamplerState* state) const;
+};
+using SpeexResamplerStatePtr = std::unique_ptr<SpeexResamplerState, SpeexResamplerStateDeleter>;
+
+struct ResampledStreamedEffect
+{
+  static constexpr u32 INPUT_BUFFER_SIZE =
+    (4096 - sizeof(WAVReader) - sizeof(SpeexResamplerStatePtr) - sizeof(u32) - sizeof(u32)) / NUM_CHANNELS /
+    sizeof(u16);
+
+  ResampledStreamedEffect(WAVReader&& reader_, SpeexResamplerStatePtr&& resampler_state_);
+
+  WAVReader reader;
+  SpeexResamplerStatePtr resampler_state;
+  u32 input_buffer_size = 0;
+  u32 input_buffer_pos = 0;
+  std::array<AudioStream::SampleType, INPUT_BUFFER_SIZE * NUM_CHANNELS> input_buffer;
+};
+static_assert(sizeof(ResampledStreamedEffect) == 4096);
+using PlayingResampledEffect = std::unique_ptr<ResampledStreamedEffect>;
+
+using PlayingStreamedEffect = WAVReader;
+using ActiveSoundEntry = std::variant<PlayingStreamedEffect, PlayingCachedEffect, PlayingResampledEffect>;

 struct Locals
 {
  std::mutex state_mutex;
  std::deque<ActiveSoundEntry> active_sounds;
  std::unique_ptr<AudioStream> audio_stream;
-  std::vector<AudioStream::SampleType> temp_buffer;
+  DynamicHeapArray<AudioStream::SampleType> temp_buffer;
  LRUCache<std::string, CachedEffectPtr> effect_cache{MAX_CACHE_SIZE};
  EffectAudioStream audio_stream_source;
  u32 silence_frames = 0;
@@ -93,6 +119,9 @@ static bool EnsureStreamStarted();

 /// Reads frames from an active sound entry.
 static u32 ReadEntryFrames(ActiveSoundEntry& entry, AudioStream::SampleType* samples, u32 num_frames, bool mix);
+static u32 ReadEntryFrames(PlayingCachedEffect& effect, AudioStream::SampleType* samples, u32 num_frames, bool mix);
+static u32 ReadEntryFrames(PlayingStreamedEffect& effect, AudioStream::SampleType* samples, u32 num_frames, bool mix);
+static u32 ReadEntryFrames(PlayingResampledEffect& effect, AudioStream::SampleType* samples, u32 num_frames, bool mix);

 /// Mixes multiple active sounds into the destination buffer.
 static void MixFrames(AudioStream::SampleType* dest, const AudioStream::SampleType* src, u32 num_frames);
@@ -100,6 +129,13 @@ static void MixFrames(AudioStream::SampleType* dest, const AudioStream::SampleTy
 /// Stops the stream if there are no active sounds.
 static void StopStreamIfInactive();

+static void ConvertToStereo(std::span<const AudioStream::SampleType> src, std::span<AudioStream::SampleType> dst,
+                            u32 in_channels);
+
+static bool ConvertFrames(CachedEffect& effect, u32 in_sample_rate, u32 in_channels, u32 in_frames, Error* error);
+
+static PlayingResampledEffect CreateResampledStreamedEffect(WAVReader&& reader, Error* error);
+
 ALIGN_TO_CACHE_LINE static Locals s_locals;

 } // namespace SoundEffectManager
@@ -168,7 +204,7 @@ void SoundEffectManager::EnqueueSoundEffect(std::string_view name)
      return;

    const CachedEffectPtr* effect = LookupOrLoadCachedEffect(std::move(name), lock);
-    if (effect && (*effect)->num_frames > 0 && EnsureStreamStarted())
+    if (effect && !(*effect)->frames.empty() && EnsureStreamStarted())
    {
      s_locals.active_sounds.emplace_back(PlayingCachedEffect{*effect, 0u});
      DEBUG_LOG("{} effects active", s_locals.active_sounds.size());
@@ -187,24 +223,19 @@ bool SoundEffectManager::LoadCachedEffect(const std::string& resource_name, cons
  if (!parsed.has_value())
    return false;

-  if (parsed->sample_rate != SAMPLE_RATE)
-  {
-    Error::SetStringFmt(error, "WAV file sample rate {} does not match expected {}", parsed->sample_rate, SAMPLE_RATE);
-    return false;
-  }
-
-  if (parsed->num_channels != NUM_CHANNELS)
-  {
-    Error::SetStringFmt(error, "WAV file has {} channels, expected {}", parsed->num_channels, NUM_CHANNELS);
-    return false;
-  }
-
-  effect->num_frames = parsed->num_frames;
-  effect->samples.resize(parsed->num_frames * parsed->num_channels);
+  effect->frames.resize(parsed->num_frames * parsed->num_channels);
  if (parsed->num_frames > 0)
  {
-    std::memcpy(effect->samples.data(), parsed->sample_data, parsed->num_frames * parsed->bytes_per_frame);
-    DEV_LOG("Loaded effect '{}' with {} frames.", resource_name, effect->num_frames);
+    std::memcpy(effect->frames.data(), parsed->sample_data, parsed->num_frames * parsed->bytes_per_frame);
+
+    SpeexResamplerStatePtr resampler_state;
+    if (parsed->sample_rate != SAMPLE_RATE || parsed->num_channels != NUM_CHANNELS)
+    {
+      if (!ConvertFrames(*effect, parsed->sample_rate, parsed->num_channels, parsed->num_frames, error))
+        return false;
+    }
+
+    DEV_LOG("Loaded effect '{}' with {} frames.", resource_name, effect->frames.size() / NUM_CHANNELS);
  }
  else
  {
@@ -253,13 +284,28 @@ void SoundEffectManager::StreamSoundEffect(std::string path)
      return;
    }

+    PlayingResampledEffect resampled;
+    if (reader.GetSampleRate() != SAMPLE_RATE || reader.GetNumChannels() != NUM_CHANNELS)
+    {
+      resampled = CreateResampledStreamedEffect(std::move(reader), &error);
+      if (!resampled)
+      {
+        ERROR_LOG("Failed to open sound effect '{}': {}", Path::GetFileName(path), error.GetDescription());
+        return;
+      }
+    }
+
    std::lock_guard lock(s_locals.state_mutex);
    if (!LockedIsInitialized())
      return;

    if (EnsureStreamStarted())
    {
-      s_locals.active_sounds.emplace_back(std::move(reader));
+      if (resampled)
+        s_locals.active_sounds.emplace_back(std::move(resampled));
+      else
+        s_locals.active_sounds.emplace_back(std::move(reader));
+
      DEBUG_LOG("{} effects active", s_locals.active_sounds.size());
    }
  });
@@ -270,28 +316,8 @@ bool SoundEffectManager::OpenFileForStreaming(const char* path, WAVReader* reade
  if (!reader->Open(path, error))
    return false;

-  if (reader->GetSampleRate() != SAMPLE_RATE)
-  {
-    Error::SetStringFmt(error, "WAV file sample rate {} does not match expected {}", reader->GetSampleRate(),
-                        SAMPLE_RATE);
-    return false;
-  }
-
-  if (reader->GetNumChannels() != NUM_CHANNELS)
-  {
-    Error::SetStringFmt(error, "WAV file has {} channels, expected {}", reader->GetNumChannels(), NUM_CHANNELS);
-    return false;
-  }
-
-  std::lock_guard lock(s_locals.state_mutex);
-
-  if (!s_locals.audio_stream)
-  {
-    Error::SetStringView(error, "Audio stream not initialized.");
-    return false;
-  }
-
-  DEV_LOG("Streaming WAV file '{}': {} frames", Path::GetFileName(path), reader->GetNumFrames());
+  DEV_LOG("Streaming WAV file '{}': {} frames @ {}hz, {} channels", Path::GetFileName(path), reader->GetNumFrames(),
+          reader->GetSampleRate(), reader->GetNumChannels());
  return true;
 }

@@ -415,48 +441,11 @@ u32 SoundEffectManager::ReadEntryFrames(ActiveSoundEntry& entry, AudioStream::Sa
 {
  u32 frames_read;
  if (std::holds_alternative<PlayingCachedEffect>(entry))
-  {
-    PlayingCachedEffect& reader = std::get<PlayingCachedEffect>(entry);
-    const u32 frames_available = reader.effect->num_frames - reader.current_frame;
-    if (frames_available == 0)
-      return 0;
-
-    frames_read = std::min(frames_available, num_frames);
-    const AudioStream::SampleType* src_ptr = reader.effect->samples.data() + (reader.current_frame * NUM_CHANNELS);
-    reader.current_frame += frames_read;
-    if (mix)
-      MixFrames(samples, src_ptr, frames_read);
-    else
-      std::memcpy(samples, src_ptr, frames_read * BYTES_PER_FRAME);
-  }
+    frames_read = ReadEntryFrames(std::get<PlayingCachedEffect>(entry), samples, num_frames, mix);
+  else if (std::holds_alternative<PlayingResampledEffect>(entry))
+    frames_read = ReadEntryFrames(std::get<PlayingResampledEffect>(entry), samples, num_frames, mix);
  else
-  {
-    DebugAssert(std::holds_alternative<WAVReader>(entry));
-    WAVReader& reader = std::get<WAVReader>(entry);
-
-    const u32 num_samples = num_frames * NUM_CHANNELS;
-    if (!mix && num_samples > s_locals.temp_buffer.size())
-      s_locals.temp_buffer.resize(num_samples);
-
-    Error error;
-    const std::optional<u32> frames =
-      reader.ReadFrames(mix ? s_locals.temp_buffer.data() : samples, num_frames, &error);
-    if (!frames.has_value())
-    {
-      ERROR_LOG("Error reading wave file: {}", error.GetDescription());
-      return 0;
-    }
-
-    frames_read = frames.value();
-    if (frames_read == 0)
-    {
-      // reached end of file
-      return 0;
-    }
-
-    if (mix)
-      MixFrames(samples, s_locals.temp_buffer.data(), frames_read);
-  }
+    frames_read = ReadEntryFrames(std::get<PlayingStreamedEffect>(entry), samples, num_frames, mix);

  if (!mix)
  {
@@ -469,12 +458,59 @@ u32 SoundEffectManager::ReadEntryFrames(ActiveSoundEntry& entry, AudioStream::Sa
  return frames_read;
 }

+u32 SoundEffectManager::ReadEntryFrames(PlayingCachedEffect& effect, AudioStream::SampleType* samples, u32 num_frames,
+                                        bool mix)
+{
+  const u32 frames_available = static_cast<u32>(effect.effect->frames.size() / NUM_CHANNELS) - effect.current_frame;
+  if (frames_available == 0)
+    return 0;
+
+  const u32 frames_read = std::min(frames_available, num_frames);
+  const AudioStream::SampleType* src_ptr = effect.effect->frames.data() + (effect.current_frame * NUM_CHANNELS);
+  effect.current_frame += frames_read;
+  if (mix)
+    MixFrames(samples, src_ptr, frames_read);
+  else
+    std::memcpy(samples, src_ptr, frames_read * BYTES_PER_FRAME);
+
+  return frames_read;
+}
+
+u32 SoundEffectManager::ReadEntryFrames(PlayingStreamedEffect& effect, AudioStream::SampleType* samples, u32 num_frames,
+                                        bool mix)
+{
+  const u32 num_samples = num_frames * NUM_CHANNELS;
+  if (mix && num_samples > s_locals.temp_buffer.size())
+    s_locals.temp_buffer.resize(num_samples);
+
+  Error error;
+  const std::optional<u32> frames = effect.ReadFrames(mix ? s_locals.temp_buffer.data() : samples, num_frames, &error);
+  if (!frames.has_value())
+  {
+    ERROR_LOG("Error reading wave file: {}", error.GetDescription());
+    return 0;
+  }
+
+  if (frames.value() == 0)
+  {
+    // reached end of file
+    return 0;
+  }
+
+  if (mix)
+    MixFrames(samples, s_locals.temp_buffer.data(), frames.value());
+
+  return frames.value();
+}
+
 void SoundEffectManager::MixFrames(AudioStream::SampleType* dest, const AudioStream::SampleType* src, u32 num_frames)
 {
-  static constexpr u32 SAMPLES_PER_VEC = 8;
  const u32 num_samples = num_frames * NUM_CHANNELS;
-  const u32 num_samples_aligned = Common::AlignDown(num_samples, SAMPLES_PER_VEC);
  u32 i = 0;
+
+#ifdef CPU_ARCH_SIMD
+  static constexpr u32 SAMPLES_PER_VEC = 8;
+  const u32 num_samples_aligned = Common::AlignDown(num_samples, SAMPLES_PER_VEC);
  for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
  {
    GSVector4i vsrc = GSVector4i::load<false>(src);
@@ -484,6 +520,7 @@ void SoundEffectManager::MixFrames(AudioStream::SampleType* dest, const AudioStr
    src += SAMPLES_PER_VEC;
    dest += SAMPLES_PER_VEC;
  }
+#endif

  for (; i < num_samples; i++)
  {
@@ -493,3 +530,254 @@ void SoundEffectManager::MixFrames(AudioStream::SampleType* dest, const AudioStr
                 static_cast<s32>(std::numeric_limits<AudioStream::SampleType>::max())));
  }
 }
+
+void SoundEffectManager::ConvertToStereo(std::span<const AudioStream::SampleType> src,
+                                         std::span<AudioStream::SampleType> dst, u32 in_channels)
+{
+  const u32 num_frames = static_cast<u32>(src.size() / in_channels);
+  DebugAssert(num_frames == static_cast<u32>(dst.size() / NUM_CHANNELS));
+  DebugAssert(in_channels > 0);
+
+  if (in_channels == 1)
+  {
+    // Upmix mono -> Stereo, optimized
+    const AudioStream::SampleType* src_ptr = src.data();
+    AudioStream::SampleType* dst_ptr = dst.data();
+    u32 i = 0;
+
+#ifdef CPU_ARCH_SIMD
+    const u32 aligned_frames = Common::AlignDownPow2(num_frames, 8);
+    for (; i < aligned_frames; i += 8)
+    {
+      const GSVector4i vsrc = GSVector4i::load<false>(src_ptr);
+      const GSVector4i low = vsrc.upl16(vsrc);
+      const GSVector4i high = vsrc.uph16(vsrc);
+      GSVector4i::store<false>(dst_ptr, low);
+      GSVector4i::store<false>(dst_ptr + 8, high);
+      src_ptr += 8;
+      dst_ptr += 16;
+    }
+#endif
+
+    for (; i < num_frames; i++)
+    {
+      const AudioStream::SampleType sample = *(src_ptr++);
+      *(dst_ptr++) = sample;
+      *(dst_ptr++) = sample;
+    }
+  }
+  else
+  {
+    // Downmix case, drop the other channels. Not ideal, but who's using surround wavs...
+    const AudioStream::SampleType* src_ptr = src.data();
+    AudioStream::SampleType* dst_ptr = dst.data();
+    const u32 skip = in_channels - 2;
+    for (u32 frame = 0; frame < num_frames; frame++)
+    {
+      *(dst_ptr++) = *(src_ptr++);
+      *(dst_ptr++) = *(src_ptr++);
+      src_ptr += skip;
+    }
+  }
+}
+
+// Defined in here to avoid polluting the command line.
+#define OUTSIDE_SPEEX
+#define FLOATING_POINT
+#define EXPORT
+#define RANDOM_PREFIX speex
+#include "speex/speex_resampler.h"
+
+void SoundEffectManager::SpeexResamplerStateDeleter::operator()(SpeexResamplerState* state) const
+{
+  speex_resampler_destroy(state);
+}
+
+bool SoundEffectManager::ConvertFrames(CachedEffect& effect, u32 in_sample_rate, u32 in_channels, u32 in_frames,
+                                       Error* error)
+{
+  DynamicHeapArray<AudioStream::SampleType> temp_frames;
+
+  if (in_channels != NUM_CHANNELS)
+  {
+    temp_frames.resize(in_frames * NUM_CHANNELS);
+    ConvertToStereo(effect.frames, temp_frames, in_channels);
+    effect.frames.swap(temp_frames);
+  }
+
+  if (in_sample_rate != SAMPLE_RATE)
+  {
+    const auto num_resampled_frames = [](u32 num_frames, u32 in_sample_rate) {
+      return static_cast<u32>(((static_cast<u64>(num_frames) * SAMPLE_RATE) + (in_sample_rate - 1)) / in_sample_rate);
+    };
+
+    // since this is on a worker thread, use max quality
+    int errcode;
+    const SpeexResamplerStatePtr resampler_state(
+      speex_resampler_init(NUM_CHANNELS, in_sample_rate, SAMPLE_RATE, SPEEX_RESAMPLER_QUALITY_MAX, &errcode));
+    if (!resampler_state)
+    {
+      Error::SetStringFmt(error, "speex_resampler_init() failed: {} ({})", speex_resampler_strerror(errcode), errcode);
+      return false;
+    }
+
+    // reserve a bit extra for the last part of the resample
+    if (const u32 min_buffer_size = num_resampled_frames(in_frames + 2048u, in_sample_rate) * NUM_CHANNELS;
+        temp_frames.size() < min_buffer_size)
+    {
+      temp_frames.resize(min_buffer_size);
+    }
+
+    u32 input_frames_count = 0;
+    u32 output_frame_count = 0;
+    for (;;)
+    {
+      const u32 expected_output_frames =
+        num_resampled_frames(std::max(in_frames - input_frames_count, 1024u), in_sample_rate);
+      if (const u32 min_buffer_size = (output_frame_count + expected_output_frames) * NUM_CHANNELS;
+          temp_frames.size() < min_buffer_size)
+      {
+        temp_frames.resize(min_buffer_size);
+      }
+
+      unsigned int frames_processed = in_frames - input_frames_count;
+      unsigned int frames_generated = (static_cast<u32>(temp_frames.size() / NUM_CHANNELS) - output_frame_count);
+      const int ret = speex_resampler_process_interleaved_int(
+        resampler_state.get(), (frames_processed > 0) ? &effect.frames[input_frames_count * NUM_CHANNELS] : nullptr,
+        &frames_processed, &temp_frames[output_frame_count * NUM_CHANNELS], &frames_generated);
+      if (ret != RESAMPLER_ERR_SUCCESS)
+      {
+        Error::SetStringFmt(error, "speex_resampler_process_interleaved_int() failed: {} ({})",
+                            speex_resampler_strerror(ret), ret);
+        return false;
+      }
+
+      input_frames_count += frames_processed;
+      output_frame_count += frames_generated;
+      if (frames_generated == 0)
+        break;
+    }
+
+    temp_frames.resize(output_frame_count * NUM_CHANNELS);
+    effect.frames.swap(temp_frames);
+  }
+
+  return true;
+}
+
+SoundEffectManager::ResampledStreamedEffect::ResampledStreamedEffect(WAVReader&& reader_,
+                                                                     SpeexResamplerStatePtr&& resampler_state_)
+  : reader(std::move(reader_)), resampler_state(std::move(resampler_state_))
+{
+}
+
+SoundEffectManager::PlayingResampledEffect SoundEffectManager::CreateResampledStreamedEffect(WAVReader&& reader,
+                                                                                             Error* error)
+{
+  SpeexResamplerStatePtr resampler;
+  if (reader.GetSampleRate() != SAMPLE_RATE)
+  {
+    int errcode;
+    resampler = SpeexResamplerStatePtr(speex_resampler_init(NUM_CHANNELS, reader.GetSampleRate(), SAMPLE_RATE,
+                                                            SPEEX_RESAMPLER_QUALITY_DESKTOP, &errcode));
+    if (!resampler)
+    {
+      Error::SetStringFmt(error, "speex_resampler_init() failed: {} ({})", speex_resampler_strerror(errcode), errcode);
+      return {};
+    }
+  }
+
+  return std::make_unique<ResampledStreamedEffect>(std::move(reader), std::move(resampler));
+}
+
+u32 SoundEffectManager::ReadEntryFrames(PlayingResampledEffect& effect, AudioStream::SampleType* samples,
+                                        u32 num_frames, bool mix)
+{
+  u32 frames_read = 0;
+  do
+  {
+    // fill input buffer if needed
+    if (effect->input_buffer_pos == effect->input_buffer_size && effect->reader.GetRemainingFrames() > 0)
+    {
+      const bool needs_upmix = (effect->reader.GetNumChannels() != NUM_CHANNELS);
+      if (needs_upmix)
+      {
+        const u32 required_buffer_space = ResampledStreamedEffect::INPUT_BUFFER_SIZE * effect->reader.GetNumChannels();
+        if (required_buffer_space > s_locals.temp_buffer.size())
+          s_locals.temp_buffer.resize(required_buffer_space);
+      }
+
+      Error error;
+      const std::optional<u32> frames =
+        effect->reader.ReadFrames(needs_upmix ? s_locals.temp_buffer.data() : effect->input_buffer.data(),
+                                  ResampledStreamedEffect::INPUT_BUFFER_SIZE, &error);
+      if (!frames.has_value())
+      {
+        ERROR_LOG("Error reading wave file: {}", error.GetDescription());
+        break;
+      }
+
+      if (needs_upmix && frames.value() > 0)
+      {
+        ConvertToStereo(s_locals.temp_buffer.cspan(0, frames.value() * effect->reader.GetNumChannels()),
+                        std::span(effect->input_buffer).subspan(0, frames.value() * NUM_CHANNELS),
+                        effect->reader.GetNumChannels());
+      }
+
+      effect->input_buffer_pos = 0;
+      effect->input_buffer_size = frames.value();
+    }
+
+    const u32 input_frames_available = effect->input_buffer_size - effect->input_buffer_pos;
+    const u32 output_frames_requested = num_frames - frames_read;
+    const AudioStream::SampleType* const src_ptr = &effect->input_buffer[effect->input_buffer_pos * NUM_CHANNELS];
+
+    // not resampling? just mix
+    if (!effect->resampler_state)
+    {
+      const u32 frames_to_copy = std::min(input_frames_available, output_frames_requested);
+      if (frames_to_copy == 0)
+        break;
+
+      if (mix)
+        MixFrames(samples, src_ptr, frames_to_copy);
+      else
+        std::memcpy(samples, src_ptr, frames_to_copy * BYTES_PER_FRAME);
+
+      DebugAssert((effect->input_buffer_pos + frames_to_copy) <= effect->input_buffer_size);
+      effect->input_buffer_pos += frames_to_copy;
+      samples += frames_to_copy * NUM_CHANNELS;
+      frames_read += frames_to_copy;
+      continue;
+    }
+
+    unsigned int frames_processed = input_frames_available;
+    unsigned int frames_generated = output_frames_requested;
+    if (mix && (frames_generated * NUM_CHANNELS) > s_locals.temp_buffer.size())
+      s_locals.temp_buffer.resize(frames_generated * NUM_CHANNELS);
+
+    const int ret = speex_resampler_process_interleaved_int(
+      effect->resampler_state.get(), (frames_processed > 0) ? src_ptr : nullptr, &frames_processed,
+      mix ? s_locals.temp_buffer.data() : samples, &frames_generated);
+    if (ret != RESAMPLER_ERR_SUCCESS)
+    {
+      ERROR_LOG("speex_resampler_process_interleaved_int() failed: {} ({})", speex_resampler_strerror(ret), ret);
+      return 0;
+    }
+
+    DebugAssert((effect->input_buffer_pos + frames_processed) <= effect->input_buffer_size);
+    effect->input_buffer_pos += frames_processed;
+
+    // end of file?
+    if (frames_generated == 0)
+      break;
+
+    if (mix)
+      MixFrames(samples, s_locals.temp_buffer.data(), frames_generated);
+
+    frames_read += frames_generated;
+    samples += frames_generated * NUM_CHANNELS;
+  } while (frames_read < num_frames);
+
+  return frames_read;
+}