SoundEffectManager: Support converting bit depth

2026-02-04 05:04:33 +00:00 · 2026-01-06 00:51:04 +10:00
parent 2e659d8cf8
commit 4522449156
3 changed files with 183 additions and 31 deletions
--- a/src/core/sound_effect_manager.cpp
+++ b/src/core/sound_effect_manager.cpp
@@ -129,10 +129,12 @@ static void MixFrames(AudioStream::SampleType* dest, const AudioStream::SampleTy
 /// Stops the stream if there are no active sounds.
 static void StopStreamIfInactive();

+static void ConvertTo16Bit(const void* src, std::span<AudioStream::SampleType> dst, u32 in_bits_per_sample);
 static void ConvertToStereo(std::span<const AudioStream::SampleType> src, std::span<AudioStream::SampleType> dst,
                            u32 in_channels);

-static bool ConvertFrames(CachedEffect& effect, u32 in_sample_rate, u32 in_channels, u32 in_frames, Error* error);
+static bool ConvertFrames(CachedEffect& effect, const void* frames_in, u32 in_bits_per_sample, u32 in_sample_rate,
+                          u32 in_channels, u32 in_frames, Error* error);

 static PlayingResampledEffect CreateResampledStreamedEffect(WAVReader&& reader, Error* error);

@@ -226,14 +228,19 @@ bool SoundEffectManager::LoadCachedEffect(const std::string& resource_name, cons
  effect->frames.resize(parsed->num_frames * parsed->num_channels);
  if (parsed->num_frames > 0)
  {
-    std::memcpy(effect->frames.data(), parsed->sample_data, parsed->num_frames * parsed->bytes_per_frame);
-
    SpeexResamplerStatePtr resampler_state;
-    if (parsed->sample_rate != SAMPLE_RATE || parsed->num_channels != NUM_CHANNELS)
+    if (parsed->bits_per_sample != 16 || parsed->sample_rate != SAMPLE_RATE || parsed->num_channels != NUM_CHANNELS)
+    {
+      if (!ConvertFrames(*effect, parsed->sample_data, parsed->bits_per_sample, parsed->sample_rate,
+                         parsed->num_channels, parsed->num_frames, error))
      {
-      if (!ConvertFrames(*effect, parsed->sample_rate, parsed->num_channels, parsed->num_frames, error))
        return false;
      }
+    }
+    else
+    {
+      std::memcpy(effect->frames.data(), parsed->sample_data, parsed->num_frames * parsed->bytes_per_frame);
+    }

    DEV_LOG("Loaded effect '{}' with {} frames.", resource_name, effect->frames.size() / NUM_CHANNELS);
  }
@@ -285,7 +292,8 @@ void SoundEffectManager::StreamSoundEffect(std::string path)
    }

    PlayingResampledEffect resampled;
-    if (reader.GetSampleRate() != SAMPLE_RATE || reader.GetNumChannels() != NUM_CHANNELS)
+    if (reader.GetSampleRate() != SAMPLE_RATE || reader.GetNumChannels() != NUM_CHANNELS ||
+        reader.GetBitsPerSample() != 16)
    {
      resampled = CreateResampledStreamedEffect(std::move(reader), &error);
      if (!resampled)
@@ -531,6 +539,95 @@ void SoundEffectManager::MixFrames(AudioStream::SampleType* dest, const AudioStr
  }
 }

+void SoundEffectManager::ConvertTo16Bit(const void* src, std::span<AudioStream::SampleType> dst, u32 in_bits_per_sample)
+{
+  const u32 num_samples = static_cast<u32>(dst.size());
+  DebugAssert(in_bits_per_sample != 16);
+  if (in_bits_per_sample == 8)
+  {
+    // Convert 8-bit unsigned to 16-bit signed
+    const u8* src_ptr = static_cast<const u8*>(src);
+    AudioStream::SampleType* dst_ptr = dst.data();
+
+    u32 i = 0;
+#ifdef CPU_ARCH_SIMD
+    static constexpr u32 SAMPLES_PER_VEC = 16;
+    const u32 num_samples_aligned = Common::AlignDownPow2(num_samples, SAMPLES_PER_VEC);
+    for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
+    {
+      const GSVector4i vsrc =
+        GSVector4i::load<false>(src_ptr) ^ GSVector4i::cxpr8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
+                                                             -128, -128, -128, -128, -128, -128);
+      const GSVector4i vlow = vsrc.upl8(vsrc).sll16<8>();
+      const GSVector4i vhi = vsrc.uph8(vsrc).sll16<8>();
+      GSVector4i::store<false>(dst_ptr, vlow);
+      GSVector4i::store<false>(dst_ptr + 8, vhi);
+      src_ptr += SAMPLES_PER_VEC;
+      dst_ptr += SAMPLES_PER_VEC;
+    }
+#endif
+
+    for (; i < num_samples; i++)
+    {
+      const s16 sample = (static_cast<s16>(*(src_ptr++)) ^ 0x80) << 8;
+      *(dst_ptr++) = sample;
+    }
+  }
+  else if (in_bits_per_sample == 24)
+  {
+    // Convert 24-bit signed to 16-bit signed
+    const u8* src_ptr = static_cast<const u8*>(src);
+    AudioStream::SampleType* dst_ptr = dst.data();
+    for (u32 i = 0; i < num_samples; i++)
+    {
+      const s32 sample =
+        (static_cast<s32>(src_ptr[0]) | (static_cast<s32>(src_ptr[1]) << 8) | (static_cast<s32>(src_ptr[2]) << 16));
+      // sign extend
+      const s32 signed_sample = (sample << 8) >> 8;
+      *(dst_ptr++) = static_cast<s16>(signed_sample >> 8);
+      src_ptr += 3;
+    }
+  }
+  else if (in_bits_per_sample == 32)
+  {
+    // Convert 32-bit signed to 16-bit signed
+    const s32* src_ptr = static_cast<const s32*>(src);
+    AudioStream::SampleType* dst_ptr = dst.data();
+
+    u32 i = 0;
+#ifdef CPU_ARCH_SIMD
+    static constexpr u32 SAMPLES_PER_VEC = 8;
+    const u32 num_samples_aligned = Common::AlignDownPow2(num_samples, SAMPLES_PER_VEC);
+    for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
+    {
+      const GSVector4i vsrclow = GSVector4i::load<false>(src_ptr).sra32<16>();
+      src_ptr += 4;
+      const GSVector4i vsrchigh = GSVector4i::load<false>(src_ptr).sra32<16>();
+      src_ptr += 4;
+
+      const GSVector4i vdest = vsrclow.ps32(vsrchigh);
+      GSVector4i::store<false>(dst_ptr, vdest);
+      dst_ptr += SAMPLES_PER_VEC;
+    }
+#endif
+
+    i = 0;
+    src_ptr = static_cast<const s32*>(src);
+    dst_ptr = dst.data();
+
+    for (; i < num_samples; i++)
+    {
+      const s32 sample = *(src_ptr++);
+      *(dst_ptr++) = static_cast<s16>(sample >> 16);
+    }
+  }
+  else
+  {
+    ERROR_LOG("Unsupported bits per sample: {}", in_bits_per_sample);
+    std::memset(dst.data(), 0, num_samples * sizeof(AudioStream::SampleType));
+  }
+}
+
 void SoundEffectManager::ConvertToStereo(std::span<const AudioStream::SampleType> src,
                                         std::span<AudioStream::SampleType> dst, u32 in_channels)
 {
@@ -593,16 +690,31 @@ void SoundEffectManager::SpeexResamplerStateDeleter::operator()(SpeexResamplerSt
  speex_resampler_destroy(state);
 }

-bool SoundEffectManager::ConvertFrames(CachedEffect& effect, u32 in_sample_rate, u32 in_channels, u32 in_frames,
-                                       Error* error)
+bool SoundEffectManager::ConvertFrames(CachedEffect& effect, const void* frames_in, u32 in_bits_per_sample,
+                                       u32 in_sample_rate, u32 in_channels, u32 in_frames, Error* error)
 {
  DynamicHeapArray<AudioStream::SampleType> temp_frames;

+  std::span<const s16> frames_in_span;
+
+  if (in_bits_per_sample != 16)
+  {
+    temp_frames.resize(in_frames * in_channels);
+    ConvertTo16Bit(frames_in, temp_frames, in_bits_per_sample);
+    effect.frames.swap(temp_frames);
+    frames_in_span = effect.frames;
+  }
+  else
+  {
+    frames_in_span = std::span<const s16>(static_cast<const s16*>(frames_in), in_frames * in_channels);
+  }
+
  if (in_channels != NUM_CHANNELS)
  {
    temp_frames.resize(in_frames * NUM_CHANNELS);
-    ConvertToStereo(effect.frames, temp_frames, in_channels);
+    ConvertToStereo(frames_in_span, temp_frames, in_channels);
    effect.frames.swap(temp_frames);
+    frames_in_span = effect.frames;
  }

  if (in_sample_rate != SAMPLE_RATE)
@@ -643,7 +755,7 @@ bool SoundEffectManager::ConvertFrames(CachedEffect& effect, u32 in_sample_rate,
      unsigned int frames_processed = in_frames - input_frames_count;
      unsigned int frames_generated = (static_cast<u32>(temp_frames.size() / NUM_CHANNELS) - output_frame_count);
      const int ret = speex_resampler_process_interleaved_int(
-        resampler_state.get(), (frames_processed > 0) ? &effect.frames[input_frames_count * NUM_CHANNELS] : nullptr,
+        resampler_state.get(), (frames_processed > 0) ? &frames_in_span[input_frames_count * NUM_CHANNELS] : nullptr,
        &frames_processed, &temp_frames[output_frame_count * NUM_CHANNELS], &frames_generated);
      if (ret != RESAMPLER_ERR_SUCCESS)
      {
@@ -700,16 +812,25 @@ u32 SoundEffectManager::ReadEntryFrames(PlayingResampledEffect& effect, AudioStr
    if (effect->input_buffer_pos == effect->input_buffer_size && effect->reader.GetRemainingFrames() > 0)
    {
      const bool needs_upmix = (effect->reader.GetNumChannels() != NUM_CHANNELS);
-      if (needs_upmix)
+      const bool needs_shift = (effect->reader.GetBitsPerSample() != 16);
+      if (needs_upmix || needs_shift)
      {
-        const u32 required_buffer_space = ResampledStreamedEffect::INPUT_BUFFER_SIZE * effect->reader.GetNumChannels();
+        u32 required_buffer_space = ((ResampledStreamedEffect::INPUT_BUFFER_SIZE * effect->reader.GetNumChannels() *
+                                      effect->reader.GetBytesPerFrame()) +
+                                     (sizeof(s16) - 1)) /
+                                    sizeof(s16);
+
+        // use second half of buffer as scratch when doing both
+        if (needs_upmix && needs_shift)
+          required_buffer_space += ResampledStreamedEffect::INPUT_BUFFER_SIZE * effect->reader.GetNumChannels();
+
        if (required_buffer_space > s_locals.temp_buffer.size())
          s_locals.temp_buffer.resize(required_buffer_space);
      }

      Error error;
-      const std::optional<u32> frames =
-        effect->reader.ReadFrames(needs_upmix ? s_locals.temp_buffer.data() : effect->input_buffer.data(),
+      const std::optional<u32> frames = effect->reader.ReadFrames(
+        (needs_upmix || needs_shift) ? s_locals.temp_buffer.data() : effect->input_buffer.data(),
        ResampledStreamedEffect::INPUT_BUFFER_SIZE, &error);
      if (!frames.has_value())
      {
@@ -717,11 +838,26 @@ u32 SoundEffectManager::ReadEntryFrames(PlayingResampledEffect& effect, AudioStr
        break;
      }

-      if (needs_upmix && frames.value() > 0)
+      if (frames.value() > 0)
      {
-        ConvertToStereo(s_locals.temp_buffer.cspan(0, frames.value() * effect->reader.GetNumChannels()),
-                        std::span(effect->input_buffer).subspan(0, frames.value() * NUM_CHANNELS),
-                        effect->reader.GetNumChannels());
+        const u32 total_samples = frames.value() * effect->reader.GetNumChannels();
+        const std::span<AudioStream::SampleType> final_frames_out =
+          std::span(effect->input_buffer).subspan(0, frames.value() * NUM_CHANNELS);
+        std::span<AudioStream::SampleType> frames_in;
+        if (needs_shift)
+        {
+          std::span<AudioStream::SampleType> frames_out =
+            needs_upmix ? s_locals.temp_buffer.span(s_locals.temp_buffer.size() - total_samples) : final_frames_out;
+          ConvertTo16Bit(s_locals.temp_buffer.data(), frames_out, effect->reader.GetBitsPerSample());
+          frames_in = frames_out;
+        }
+        else
+        {
+          frames_in = s_locals.temp_buffer.span(0, total_samples);
+        }
+
+        if (needs_upmix)
+          ConvertToStereo(frames_in, final_frames_out, effect->reader.GetNumChannels());
      }

      effect->input_buffer_pos = 0;
--- a/src/util/wav_reader_writer.cpp
+++ b/src/util/wav_reader_writer.cpp
@@ -64,9 +64,10 @@ WAVReader::WAVReader(WAVReader&& move)
 {
  m_file = std::exchange(move.m_file, nullptr);
  m_frames_start = std::exchange(move.m_frames_start, 0);
+  m_bits_per_sample = std::exchange(move.m_bits_per_sample, static_cast<u8>(0));
+  m_num_channels = std::exchange(move.m_num_channels, static_cast<u8>(0));
  m_bytes_per_frame = std::exchange(move.m_bytes_per_frame, static_cast<u16>(0));
  m_sample_rate = std::exchange(move.m_sample_rate, 0);
-  m_num_channels = std::exchange(move.m_num_channels, static_cast<u16>(0));
  m_num_frames = std::exchange(move.m_num_frames, 0);
  m_current_frame = std::exchange(move.m_current_frame, 0);
 }
@@ -81,8 +82,9 @@ WAVReader& WAVReader::operator=(WAVReader&& move)
 {
  m_file = std::exchange(move.m_file, nullptr);
  m_frames_start = std::exchange(move.m_frames_start, 0);
+  m_bits_per_sample = std::exchange(move.m_bits_per_sample, static_cast<u8>(0));
+  m_num_channels = std::exchange(move.m_num_channels, static_cast<u8>(0));
  m_bytes_per_frame = std::exchange(move.m_bytes_per_frame, static_cast<u16>(0));
-  m_num_channels = std::exchange(move.m_num_channels, static_cast<u16>(0));
  m_sample_rate = std::exchange(move.m_sample_rate, 0);
  m_num_frames = std::exchange(move.m_num_frames, 0);
  m_current_frame = std::exchange(move.m_current_frame, 0);
@@ -162,7 +164,9 @@ bool WAVReader::Open(const char* path, Error* error /*= nullptr*/)
    return false;
  }

-  if (format.sample_rate == 0 || format.num_channels == 0 || format.bits_per_sample != 16)
+  if (format.sample_rate == 0 || format.num_channels == 0 ||
+      (format.bits_per_sample != 8 && format.bits_per_sample != 16 && format.bits_per_sample != 24 &&
+       format.bits_per_sample != 32))
  {
    Error::SetStringFmt(error, "Unsupported file format samplerate={} channels={} bits={}", format.sample_rate,
                        format.num_channels, format.bits_per_sample);
@@ -176,7 +180,8 @@ bool WAVReader::Open(const char* path, Error* error /*= nullptr*/)
    return false;
  }

-  const u32 num_frames = data.chunk_size / (sizeof(s16) * format.num_channels);
+  const u32 bytes_per_frame = (format.bits_per_sample / 8) * format.num_channels;
+  const u32 num_frames = (bytes_per_frame > 0) ? (data.chunk_size / bytes_per_frame) : 0;
  if (num_frames == 0)
  {
    Error::SetStringFmt(error, "File has no frames");
@@ -185,9 +190,10 @@ bool WAVReader::Open(const char* path, Error* error /*= nullptr*/)

  m_file = fp.release();
  m_frames_start = FileSystem::FTell64(m_file);
+  m_bits_per_sample = static_cast<u8>(format.bits_per_sample);
+  m_num_channels = static_cast<u8>(format.num_channels);
+  m_bytes_per_frame = static_cast<s16>(bytes_per_frame);
  m_sample_rate = format.sample_rate;
-  m_bytes_per_frame = sizeof(s16) * format.num_channels;
-  m_num_channels = format.num_channels;
  m_num_frames = num_frames;
  m_current_frame = 0;
  return true;
@@ -201,8 +207,9 @@ void WAVReader::Close()
  std::fclose(m_file);
  m_file = nullptr;
  m_frames_start = 0;
-  m_bytes_per_frame = 0;
+  m_bits_per_sample = 0;
  m_num_channels = 0;
+  m_bytes_per_frame = 0;
  m_sample_rate = 0;
  m_num_frames = 0;
  m_current_frame = 0;
@@ -213,7 +220,9 @@ std::FILE* WAVReader::TakeFile()
  std::FILE* ret = std::exchange(m_file, nullptr);
  m_frames_start = 0;
  m_bytes_per_frame = 0;
+  m_bits_per_sample = 0;
  m_num_channels = 0;
+  m_bytes_per_frame = 0;
  m_sample_rate = 0;
  m_num_frames = 0;
  m_current_frame = 0;
@@ -349,7 +358,9 @@ std::optional<WAVReader::MemoryParseResult> WAVReader::ParseMemory(const void* d
    return result;
  }

-  if (format.sample_rate == 0 || format.num_channels == 0 || format.bits_per_sample != 16)
+  if (format.sample_rate == 0 || format.num_channels == 0 ||
+      (format.bits_per_sample != 8 && format.bits_per_sample != 16 && format.bits_per_sample != 24 &&
+       format.bits_per_sample != 32))
  {
    Error::SetStringFmt(error, "Unsupported file format samplerate={} channels={} bits={}", format.sample_rate,
                        format.num_channels, format.bits_per_sample);
@@ -364,7 +375,8 @@ std::optional<WAVReader::MemoryParseResult> WAVReader::ParseMemory(const void* d
    return result;
  }

-  const u32 num_frames = static_cast<u32>(sample_data->size() / (sizeof(s16) * format.num_channels));
+  const u32 bytes_per_frame = (format.bits_per_sample / 8) * format.num_channels;
+  const u32 num_frames = (bytes_per_frame > 0) ? (static_cast<u32>(sample_data->size() / bytes_per_frame)) : 0;
  if (num_frames == 0)
  {
    Error::SetStringFmt(error, "File has no frames");
@@ -372,8 +384,9 @@ std::optional<WAVReader::MemoryParseResult> WAVReader::ParseMemory(const void* d
  }

  result.emplace();
-  result->bytes_per_frame = sizeof(s16) * format.num_channels;
+  result->bits_per_sample = format.bits_per_sample;
  result->sample_rate = format.sample_rate;
+  result->bytes_per_frame = bytes_per_frame;
  result->num_channels = format.num_channels;
  result->num_frames = num_frames;
  result->sample_data = sample_data->data();
--- a/src/util/wav_reader_writer.h
+++ b/src/util/wav_reader_writer.h
@@ -25,6 +25,7 @@ public:
  ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
  ALWAYS_INLINE u32 GetNumChannels() const { return m_num_channels; }
  ALWAYS_INLINE u32 GetNumFrames() const { return m_num_frames; }
+  ALWAYS_INLINE u32 GetBitsPerSample() const { return m_bits_per_sample; }
  ALWAYS_INLINE u32 GetBytesPerFrame() const { return m_bytes_per_frame; }
  ALWAYS_INLINE u64 GetFramesStartOffset() const { return m_frames_start; }
  ALWAYS_INLINE bool IsOpen() const { return (m_file != nullptr); }
@@ -43,8 +44,9 @@ public:

  struct MemoryParseResult
  {
-    u32 bytes_per_frame;
+    u32 bits_per_sample;
    u32 sample_rate;
+    u32 bytes_per_frame;
    u32 num_channels;
    u32 num_frames;
    const void* sample_data;
@@ -57,8 +59,9 @@ private:

  std::FILE* m_file = nullptr;
  s64 m_frames_start = 0;
+  u8 m_bits_per_sample = 0;
+  u8 m_num_channels = 0;
  u16 m_bytes_per_frame = 0;
-  u16 m_num_channels = 0;
  u32 m_sample_rate = 0;
  u32 m_num_frames = 0;
  u32 m_current_frame = 0;