SoundEffectManager: Support converting float wave files too

2026-02-04 05:04:33 +00:00 · 2026-01-06 01:15:22 +10:00
parent 4522449156
commit cd9e3ffb85
3 changed files with 177 additions and 121 deletions
--- a/src/core/sound_effect_manager.cpp
+++ b/src/core/sound_effect_manager.cpp
@@ -129,12 +129,13 @@ static void MixFrames(AudioStream::SampleType* dest, const AudioStream::SampleTy
 /// Stops the stream if there are no active sounds.
 static void StopStreamIfInactive();

-static void ConvertTo16Bit(const void* src, std::span<AudioStream::SampleType> dst, u32 in_bits_per_sample);
+static void ConvertTo16Bit(const void* src, std::span<AudioStream::SampleType> dst, WAVReader::Format in_format,
+                           u32 in_bits_per_sample);
 static void ConvertToStereo(std::span<const AudioStream::SampleType> src, std::span<AudioStream::SampleType> dst,
                            u32 in_channels);

-static bool ConvertFrames(CachedEffect& effect, const void* frames_in, u32 in_bits_per_sample, u32 in_sample_rate,
-                          u32 in_channels, u32 in_frames, Error* error);
+static bool ConvertFrames(CachedEffect& effect, const void* frames_in, WAVReader::Format in_format,
+                          u32 in_bits_per_sample, u32 in_sample_rate, u32 in_channels, u32 in_frames, Error* error);

 static PlayingResampledEffect CreateResampledStreamedEffect(WAVReader&& reader, Error* error);

@@ -229,9 +230,10 @@ bool SoundEffectManager::LoadCachedEffect(const std::string& resource_name, cons
  if (parsed->num_frames > 0)
  {
    SpeexResamplerStatePtr resampler_state;
-    if (parsed->bits_per_sample != 16 || parsed->sample_rate != SAMPLE_RATE || parsed->num_channels != NUM_CHANNELS)
+    if (parsed->format != WAVReader::PCMFormat || parsed->bits_per_sample != 16 || parsed->sample_rate != SAMPLE_RATE ||
+        parsed->num_channels != NUM_CHANNELS)
    {
-      if (!ConvertFrames(*effect, parsed->sample_data, parsed->bits_per_sample, parsed->sample_rate,
+      if (!ConvertFrames(*effect, parsed->sample_data, parsed->format, parsed->bits_per_sample, parsed->sample_rate,
                         parsed->num_channels, parsed->num_frames, error))
      {
        return false;
@@ -292,8 +294,8 @@ void SoundEffectManager::StreamSoundEffect(std::string path)
    }

    PlayingResampledEffect resampled;
-    if (reader.GetSampleRate() != SAMPLE_RATE || reader.GetNumChannels() != NUM_CHANNELS ||
-        reader.GetBitsPerSample() != 16)
+    if (reader.GetFormat() != WAVReader::PCMFormat || reader.GetSampleRate() != SAMPLE_RATE ||
+        reader.GetNumChannels() != NUM_CHANNELS || reader.GetBitsPerSample() != 16)
    {
      resampled = CreateResampledStreamedEffect(std::move(reader), &error);
      if (!resampled)
@@ -539,93 +541,132 @@ void SoundEffectManager::MixFrames(AudioStream::SampleType* dest, const AudioStr
  }
 }

-void SoundEffectManager::ConvertTo16Bit(const void* src, std::span<AudioStream::SampleType> dst, u32 in_bits_per_sample)
+void SoundEffectManager::ConvertTo16Bit(const void* src, std::span<AudioStream::SampleType> dst,
+                                        WAVReader::Format in_format, u32 in_bits_per_sample)
 {
  const u32 num_samples = static_cast<u32>(dst.size());
-  DebugAssert(in_bits_per_sample != 16);
-  if (in_bits_per_sample == 8)
+  if (in_format == WAVReader::PCMFormat)
  {
-    // Convert 8-bit unsigned to 16-bit signed
-    const u8* src_ptr = static_cast<const u8*>(src);
-    AudioStream::SampleType* dst_ptr = dst.data();
-
-    u32 i = 0;
-#ifdef CPU_ARCH_SIMD
-    static constexpr u32 SAMPLES_PER_VEC = 16;
-    const u32 num_samples_aligned = Common::AlignDownPow2(num_samples, SAMPLES_PER_VEC);
-    for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
+    DebugAssert(in_bits_per_sample != 16);
+    if (in_bits_per_sample == 8)
    {
-      const GSVector4i vsrc =
-        GSVector4i::load<false>(src_ptr) ^ GSVector4i::cxpr8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
-                                                             -128, -128, -128, -128, -128, -128);
-      const GSVector4i vlow = vsrc.upl8(vsrc).sll16<8>();
-      const GSVector4i vhi = vsrc.uph8(vsrc).sll16<8>();
-      GSVector4i::store<false>(dst_ptr, vlow);
-      GSVector4i::store<false>(dst_ptr + 8, vhi);
-      src_ptr += SAMPLES_PER_VEC;
-      dst_ptr += SAMPLES_PER_VEC;
-    }
+      // Convert 8-bit unsigned to 16-bit signed
+      const u8* src_ptr = static_cast<const u8*>(src);
+      AudioStream::SampleType* dst_ptr = dst.data();
+
+      u32 i = 0;
+#ifdef CPU_ARCH_SIMD
+      static constexpr u32 SAMPLES_PER_VEC = 16;
+      const u32 num_samples_aligned = Common::AlignDownPow2(num_samples, SAMPLES_PER_VEC);
+      for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
+      {
+        const GSVector4i vsrc =
+          GSVector4i::load<false>(src_ptr) ^ GSVector4i::cxpr8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
+                                                               -128, -128, -128, -128, -128, -128, -128);
+        const GSVector4i vlow = vsrc.upl8(vsrc).sll16<8>();
+        const GSVector4i vhi = vsrc.uph8(vsrc).sll16<8>();
+        GSVector4i::store<false>(dst_ptr, vlow);
+        GSVector4i::store<false>(dst_ptr + 8, vhi);
+        src_ptr += SAMPLES_PER_VEC;
+        dst_ptr += SAMPLES_PER_VEC;
+      }
 #endif

-    for (; i < num_samples; i++)
-    {
-      const s16 sample = (static_cast<s16>(*(src_ptr++)) ^ 0x80) << 8;
-      *(dst_ptr++) = sample;
-    }
-  }
-  else if (in_bits_per_sample == 24)
-  {
-    // Convert 24-bit signed to 16-bit signed
-    const u8* src_ptr = static_cast<const u8*>(src);
-    AudioStream::SampleType* dst_ptr = dst.data();
-    for (u32 i = 0; i < num_samples; i++)
-    {
-      const s32 sample =
-        (static_cast<s32>(src_ptr[0]) | (static_cast<s32>(src_ptr[1]) << 8) | (static_cast<s32>(src_ptr[2]) << 16));
-      // sign extend
-      const s32 signed_sample = (sample << 8) >> 8;
-      *(dst_ptr++) = static_cast<s16>(signed_sample >> 8);
-      src_ptr += 3;
-    }
-  }
-  else if (in_bits_per_sample == 32)
-  {
-    // Convert 32-bit signed to 16-bit signed
-    const s32* src_ptr = static_cast<const s32*>(src);
-    AudioStream::SampleType* dst_ptr = dst.data();
+      for (; i < num_samples; i++)
+      {
+        const s16 sample = (static_cast<s16>(*(src_ptr++)) ^ 0x80) << 8;
+        *(dst_ptr++) = sample;
+      }

-    u32 i = 0;
+      return;
+    }
+    else if (in_bits_per_sample == 24)
+    {
+      // Convert 24-bit signed to 16-bit signed
+      const u8* src_ptr = static_cast<const u8*>(src);
+      AudioStream::SampleType* dst_ptr = dst.data();
+      for (u32 i = 0; i < num_samples; i++)
+      {
+        const s32 sample =
+          (static_cast<s32>(src_ptr[0]) | (static_cast<s32>(src_ptr[1]) << 8) | (static_cast<s32>(src_ptr[2]) << 16));
+        // sign extend
+        const s32 signed_sample = (sample << 8) >> 8;
+        *(dst_ptr++) = static_cast<s16>(signed_sample >> 8);
+        src_ptr += 3;
+      }
+
+      return;
+    }
+    else if (in_bits_per_sample == 32)
+    {
+      // Convert 32-bit signed to 16-bit signed
+      const s32* src_ptr = static_cast<const s32*>(src);
+      AudioStream::SampleType* dst_ptr = dst.data();
+
+      u32 i = 0;
 #ifdef CPU_ARCH_SIMD
-    static constexpr u32 SAMPLES_PER_VEC = 8;
-    const u32 num_samples_aligned = Common::AlignDownPow2(num_samples, SAMPLES_PER_VEC);
-    for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
-    {
-      const GSVector4i vsrclow = GSVector4i::load<false>(src_ptr).sra32<16>();
-      src_ptr += 4;
-      const GSVector4i vsrchigh = GSVector4i::load<false>(src_ptr).sra32<16>();
-      src_ptr += 4;
+      static constexpr u32 SAMPLES_PER_VEC = 8;
+      const u32 num_samples_aligned = Common::AlignDownPow2(num_samples, SAMPLES_PER_VEC);
+      for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
+      {
+        const GSVector4i vsrclow = GSVector4i::load<false>(src_ptr).sra32<16>();
+        src_ptr += 4;
+        const GSVector4i vsrchigh = GSVector4i::load<false>(src_ptr).sra32<16>();
+        src_ptr += 4;

-      const GSVector4i vdest = vsrclow.ps32(vsrchigh);
-      GSVector4i::store<false>(dst_ptr, vdest);
-      dst_ptr += SAMPLES_PER_VEC;
-    }
+        const GSVector4i vdest = vsrclow.ps32(vsrchigh);
+        GSVector4i::store<false>(dst_ptr, vdest);
+        dst_ptr += SAMPLES_PER_VEC;
+      }
 #endif

-    i = 0;
-    src_ptr = static_cast<const s32*>(src);
-    dst_ptr = dst.data();
+      for (; i < num_samples; i++)
+      {
+        const s32 sample = *(src_ptr++);
+        *(dst_ptr++) = static_cast<s16>(sample >> 16);
+      }

-    for (; i < num_samples; i++)
-    {
-      const s32 sample = *(src_ptr++);
-      *(dst_ptr++) = static_cast<s16>(sample >> 16);
+      return;
    }
  }
-  else
+  else if (in_format == WAVReader::FloatFormat)
  {
-    ERROR_LOG("Unsupported bits per sample: {}", in_bits_per_sample);
-    std::memset(dst.data(), 0, num_samples * sizeof(AudioStream::SampleType));
+    if (in_bits_per_sample == 32)
+    {
+      // Convert 32-bit float to 16-bit signed
+      const float* src_ptr = static_cast<const float*>(src);
+      AudioStream::SampleType* dst_ptr = dst.data();
+
+      u32 i = 0;
+#ifdef CPU_ARCH_SIMD
+      static constexpr u32 SAMPLES_PER_VEC = 8;
+      static constexpr GSVector4 MULT = GSVector4::cxpr(32767.0f);
+      const u32 num_samples_aligned = Common::AlignDownPow2(num_samples, SAMPLES_PER_VEC);
+      for (; i < num_samples_aligned; i += SAMPLES_PER_VEC)
+      {
+        const GSVector4i vsrclow = GSVector4i(GSVector4::load<false>(src_ptr) * MULT);
+        src_ptr += 4;
+        const GSVector4i vsrchigh = GSVector4i(GSVector4::load<false>(src_ptr) * MULT);
+        src_ptr += 4;
+
+        const GSVector4i vdest = vsrclow.ps32(vsrchigh);
+        GSVector4i::store<false>(dst_ptr, vdest);
+        dst_ptr += SAMPLES_PER_VEC;
+      }
+#endif
+
+      for (; i < num_samples; i++)
+      {
+        const float sample = *(src_ptr++);
+        *(dst_ptr++) = static_cast<s16>(sample * 32767.0f);
+      }
+
+      return;
+    }
  }
+
+  ERROR_LOG("Unsupported format/bits per sample: {}/{}", static_cast<u32>(in_format), in_bits_per_sample);
+  std::memset(dst.data(), 0, num_samples * sizeof(AudioStream::SampleType));
 }

 void SoundEffectManager::ConvertToStereo(std::span<const AudioStream::SampleType> src,
@@ -690,17 +731,18 @@ void SoundEffectManager::SpeexResamplerStateDeleter::operator()(SpeexResamplerSt
  speex_resampler_destroy(state);
 }

-bool SoundEffectManager::ConvertFrames(CachedEffect& effect, const void* frames_in, u32 in_bits_per_sample,
-                                       u32 in_sample_rate, u32 in_channels, u32 in_frames, Error* error)
+bool SoundEffectManager::ConvertFrames(CachedEffect& effect, const void* frames_in, WAVReader::Format in_format,
+                                       u32 in_bits_per_sample, u32 in_sample_rate, u32 in_channels, u32 in_frames,
+                                       Error* error)
 {
  DynamicHeapArray<AudioStream::SampleType> temp_frames;

  std::span<const s16> frames_in_span;

-  if (in_bits_per_sample != 16)
+  if (in_format != WAVReader::PCMFormat || in_bits_per_sample != 16)
  {
    temp_frames.resize(in_frames * in_channels);
-    ConvertTo16Bit(frames_in, temp_frames, in_bits_per_sample);
+    ConvertTo16Bit(frames_in, temp_frames, in_format, in_bits_per_sample);
    effect.frames.swap(temp_frames);
    frames_in_span = effect.frames;
  }
@@ -848,7 +890,8 @@ u32 SoundEffectManager::ReadEntryFrames(PlayingResampledEffect& effect, AudioStr
        {
          std::span<AudioStream::SampleType> frames_out =
            needs_upmix ? s_locals.temp_buffer.span(s_locals.temp_buffer.size() - total_samples) : final_frames_out;
-          ConvertTo16Bit(s_locals.temp_buffer.data(), frames_out, effect->reader.GetBitsPerSample());
+          ConvertTo16Bit(s_locals.temp_buffer.data(), frames_out, effect->reader.GetFormat(),
+                         effect->reader.GetBitsPerSample());
          frames_in = frames_out;
        }
        else
--- a/src/util/wav_reader_writer.cpp
+++ b/src/util/wav_reader_writer.cpp
@@ -64,9 +64,10 @@ WAVReader::WAVReader(WAVReader&& move)
 {
  m_file = std::exchange(move.m_file, nullptr);
  m_frames_start = std::exchange(move.m_frames_start, 0);
+  m_format = std::exchange(move.m_format, InvalidFormat);
  m_bits_per_sample = std::exchange(move.m_bits_per_sample, static_cast<u8>(0));
  m_num_channels = std::exchange(move.m_num_channels, static_cast<u8>(0));
-  m_bytes_per_frame = std::exchange(move.m_bytes_per_frame, static_cast<u16>(0));
+  m_bytes_per_frame = std::exchange(move.m_bytes_per_frame, static_cast<u8>(0));
  m_sample_rate = std::exchange(move.m_sample_rate, 0);
  m_num_frames = std::exchange(move.m_num_frames, 0);
  m_current_frame = std::exchange(move.m_current_frame, 0);
@@ -82,9 +83,10 @@ WAVReader& WAVReader::operator=(WAVReader&& move)
 {
  m_file = std::exchange(move.m_file, nullptr);
  m_frames_start = std::exchange(move.m_frames_start, 0);
+  m_format = std::exchange(move.m_format, InvalidFormat);
  m_bits_per_sample = std::exchange(move.m_bits_per_sample, static_cast<u8>(0));
  m_num_channels = std::exchange(move.m_num_channels, static_cast<u8>(0));
-  m_bytes_per_frame = std::exchange(move.m_bytes_per_frame, static_cast<u16>(0));
+  m_bytes_per_frame = std::exchange(move.m_bytes_per_frame, static_cast<u8>(0));
  m_sample_rate = std::exchange(move.m_sample_rate, 0);
  m_num_frames = std::exchange(move.m_num_frames, 0);
  m_current_frame = std::exchange(move.m_current_frame, 0);
@@ -137,6 +139,27 @@ static bool FindChunk(std::FILE* fp, T* chunk, u32 tag, Error* error, bool skip_
  }
 }

+static bool ValidateFormat(const WAV_FULL_HEADER::FormatChunk& format, Error* error)
+{
+  if (format.audio_format != WAVReader::PCMFormat && format.audio_format != WAVReader::FloatFormat) // PCM
+  {
+    Error::SetStringFmt(error, "Unsupported audio format {}", format.audio_format);
+    return false;
+  }
+
+  if (format.sample_rate == 0 || format.num_channels == 0 || format.num_channels > 8 ||
+      (format.audio_format == WAVReader::PCMFormat && format.bits_per_sample != 8 && format.bits_per_sample != 16 &&
+       format.bits_per_sample != 24 && format.bits_per_sample != 32) ||
+      (format.audio_format == WAVReader::FloatFormat && format.bits_per_sample != 32))
+  {
+    Error::SetStringFmt(error, "Unsupported file format format={} samplerate={} channels={} bits={}",
+                        format.audio_format, format.sample_rate, format.num_channels, format.bits_per_sample);
+    return false;
+  }
+
+  return true;
+}
+
 bool WAVReader::Open(const char* path, Error* error /*= nullptr*/)
 {
  auto fp = FileSystem::OpenManagedCFile(path, "rb", error);
@@ -158,20 +181,8 @@ bool WAVReader::Open(const char* path, Error* error /*= nullptr*/)
    return false;
  }

-  if (format.audio_format != 1) // PCM
-  {
-    Error::SetStringFmt(error, "Unsupported audio format {}", format.audio_format);
+  if (!ValidateFormat(format, error))
    return false;
-  }
-
-  if (format.sample_rate == 0 || format.num_channels == 0 ||
-      (format.bits_per_sample != 8 && format.bits_per_sample != 16 && format.bits_per_sample != 24 &&
-       format.bits_per_sample != 32))
-  {
-    Error::SetStringFmt(error, "Unsupported file format samplerate={} channels={} bits={}", format.sample_rate,
-                        format.num_channels, format.bits_per_sample);
-    return false;
-  }

  WAV_CHUNK_HEADER data;
  if (!FindChunk(fp.get(), &data, DATA_VALUE, error, false))
@@ -180,7 +191,7 @@ bool WAVReader::Open(const char* path, Error* error /*= nullptr*/)
    return false;
  }

-  const u32 bytes_per_frame = (format.bits_per_sample / 8) * format.num_channels;
+  const u8 bytes_per_frame = static_cast<u8>((format.bits_per_sample / 8) * format.num_channels);
  const u32 num_frames = (bytes_per_frame > 0) ? (data.chunk_size / bytes_per_frame) : 0;
  if (num_frames == 0)
  {
@@ -190,9 +201,10 @@ bool WAVReader::Open(const char* path, Error* error /*= nullptr*/)

  m_file = fp.release();
  m_frames_start = FileSystem::FTell64(m_file);
+  m_format = static_cast<Format>(format.audio_format);
  m_bits_per_sample = static_cast<u8>(format.bits_per_sample);
  m_num_channels = static_cast<u8>(format.num_channels);
-  m_bytes_per_frame = static_cast<s16>(bytes_per_frame);
+  m_bytes_per_frame = bytes_per_frame;
  m_sample_rate = format.sample_rate;
  m_num_frames = num_frames;
  m_current_frame = 0;
@@ -207,6 +219,7 @@ void WAVReader::Close()
  std::fclose(m_file);
  m_file = nullptr;
  m_frames_start = 0;
+  m_format = InvalidFormat;
  m_bits_per_sample = 0;
  m_num_channels = 0;
  m_bytes_per_frame = 0;
@@ -219,6 +232,7 @@ std::FILE* WAVReader::TakeFile()
 {
  std::FILE* ret = std::exchange(m_file, nullptr);
  m_frames_start = 0;
+  m_format = InvalidFormat;
  m_bytes_per_frame = 0;
  m_bits_per_sample = 0;
  m_num_channels = 0;
@@ -352,20 +366,8 @@ std::optional<WAVReader::MemoryParseResult> WAVReader::ParseMemory(const void* d
    return result;
  }

-  if (format.audio_format != 1) // PCM
-  {
-    Error::SetStringFmt(error, "Unsupported audio format {}", format.audio_format);
+  if (!ValidateFormat(format, error))
    return result;
-  }
-
-  if (format.sample_rate == 0 || format.num_channels == 0 ||
-      (format.bits_per_sample != 8 && format.bits_per_sample != 16 && format.bits_per_sample != 24 &&
-       format.bits_per_sample != 32))
-  {
-    Error::SetStringFmt(error, "Unsupported file format samplerate={} channels={} bits={}", format.sample_rate,
-                        format.num_channels, format.bits_per_sample);
-    return result;
-  }

  WAV_CHUNK_HEADER data_chunk;
  std::optional<std::span<const u8>> sample_data = FindChunk(whole_file, &data_chunk, DATA_VALUE, error);
@@ -375,7 +377,7 @@ std::optional<WAVReader::MemoryParseResult> WAVReader::ParseMemory(const void* d
    return result;
  }

-  const u32 bytes_per_frame = (format.bits_per_sample / 8) * format.num_channels;
+  const u8 bytes_per_frame = static_cast<u8>((format.bits_per_sample / 8) * format.num_channels);
  const u32 num_frames = (bytes_per_frame > 0) ? (static_cast<u32>(sample_data->size() / bytes_per_frame)) : 0;
  if (num_frames == 0)
  {
@@ -384,10 +386,11 @@ std::optional<WAVReader::MemoryParseResult> WAVReader::ParseMemory(const void* d
  }

  result.emplace();
-  result->bits_per_sample = format.bits_per_sample;
-  result->sample_rate = format.sample_rate;
+  result->format = static_cast<Format>(format.audio_format);
+  result->bits_per_sample = static_cast<u8>(format.bits_per_sample);
+  result->num_channels = static_cast<u8>(format.num_channels);
  result->bytes_per_frame = bytes_per_frame;
-  result->num_channels = format.num_channels;
+  result->sample_rate = format.sample_rate;
  result->num_frames = num_frames;
  result->sample_data = sample_data->data();
  return result;
--- a/src/util/wav_reader_writer.h
+++ b/src/util/wav_reader_writer.h
@@ -14,6 +14,13 @@ class Error;
 class WAVReader
 {
 public:
+  enum Format : u8
+  {
+    InvalidFormat = 0,
+    PCMFormat = 1,
+    FloatFormat = 3,
+  };
+
  WAVReader();
  WAVReader(WAVReader&& move);
  WAVReader(const WAVReader&) = delete;
@@ -22,6 +29,7 @@ public:
  WAVReader& operator=(WAVReader&& move);
  WAVReader& operator=(const WAVReader&) = delete;

+  ALWAYS_INLINE Format GetFormat() const { return m_format; }
  ALWAYS_INLINE u32 GetSampleRate() const { return m_sample_rate; }
  ALWAYS_INLINE u32 GetNumChannels() const { return m_num_channels; }
  ALWAYS_INLINE u32 GetNumFrames() const { return m_num_frames; }
@@ -44,10 +52,11 @@ public:

  struct MemoryParseResult
  {
-    u32 bits_per_sample;
+    Format format;
+    u8 bits_per_sample;
+    u8 num_channels;
+    u8 bytes_per_frame;
    u32 sample_rate;
-    u32 bytes_per_frame;
-    u32 num_channels;
    u32 num_frames;
    const void* sample_data;
  };
@@ -59,9 +68,10 @@ private:

  std::FILE* m_file = nullptr;
  s64 m_frames_start = 0;
+  Format m_format = PCMFormat;
  u8 m_bits_per_sample = 0;
  u8 m_num_channels = 0;
-  u16 m_bytes_per_frame = 0;
+  u8 m_bytes_per_frame = 0;
  u32 m_sample_rate = 0;
  u32 m_num_frames = 0;
  u32 m_current_frame = 0;