GPU: Align the memory allocation rather than the class

2026-02-04 05:04:33 +00:00 · 2025-11-21 00:29:42 +10:00
parent defbe7061b
commit aecdfd5a1d
6 changed files with 48 additions and 25 deletions
--- a/src/common/align.h
+++ b/src/common/align.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
+// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
 // SPDX-License-Identifier: CC-BY-NC-ND-4.0

 #pragma once
@@ -118,25 +118,46 @@ namespace detail {
 template<class T>
 struct unique_aligned_ptr_deleter
 {
-  ALWAYS_INLINE void operator()(T* ptr) { Common::AlignedFree(ptr); }
+  ALWAYS_INLINE void operator()(T* ptr) const
+  {
+    // Array types - do nothing, elements must be trivially destructible
+    if constexpr (!std::is_array_v<T> && !std::is_trivially_destructible_v<T>)
+    {
+      if (!ptr)
+        return;
+
+      ptr->~T();
+    }
+
+    Common::AlignedFree(const_cast<std::remove_cv_t<T>*>(ptr));
+  }
+
+  // Allow conversion between compatible deleters for derived-to-base conversions
+  template<class U>
+    requires std::is_convertible_v<U*, T*>
+  constexpr unique_aligned_ptr_deleter(const unique_aligned_ptr_deleter<U>&) noexcept
+  {
+  }
+
+  constexpr unique_aligned_ptr_deleter() noexcept = default;
 };
-
-template<class>
-constexpr bool is_unbounded_array_v = false;
-template<class T>
-constexpr bool is_unbounded_array_v<T[]> = true;
-
-template<class>
-constexpr bool is_bounded_array_v = false;
-template<class T, std::size_t N>
-constexpr bool is_bounded_array_v<T[N]> = true;
 } // namespace detail

 template<class T>
 using unique_aligned_ptr = std::unique_ptr<T, detail::unique_aligned_ptr_deleter<std::remove_extent_t<T>>>;

 template<class T, class... Args>
-  requires(std::is_unbounded_array_v<T>, std::is_trivially_default_constructible_v<std::remove_extent_t<T>>,
+  requires(!std::is_array_v<T>)
+unique_aligned_ptr<T> make_unique_aligned(size_t alignment, Args&&... args)
+{
+  unique_aligned_ptr<T> ptr(static_cast<T*>(AlignedMalloc(sizeof(T), alignment)));
+  if (ptr)
+    new (ptr.get()) T(std::forward<Args>(args)...);
+  return ptr;
+}
+
+template<class T, class... Args>
+  requires(std::is_unbounded_array_v<T> && std::is_trivially_default_constructible_v<std::remove_extent_t<T>> &&
           std::is_trivially_destructible_v<std::remove_extent_t<T>>)
 unique_aligned_ptr<T> make_unique_aligned(size_t alignment, size_t n)
 {
@@ -148,7 +169,7 @@ unique_aligned_ptr<T> make_unique_aligned(size_t alignment, size_t n)
 }

 template<class T, class... Args>
-  requires(std::is_unbounded_array_v<T>, std::is_trivially_default_constructible_v<std::remove_extent_t<T>>,
+  requires(std::is_unbounded_array_v<T> && std::is_trivially_default_constructible_v<std::remove_extent_t<T>> &&
           std::is_trivially_destructible_v<std::remove_extent_t<T>>)
 unique_aligned_ptr<T> make_unique_aligned_for_overwrite(size_t alignment, size_t n)
 {
--- a/src/core/gpu_backend.cpp
+++ b/src/core/gpu_backend.cpp
@@ -962,7 +962,7 @@ void GPUNullBackend::DoMemoryState(StateWrapper& sw, System::MemorySaveState& ms
 {
 }

-std::unique_ptr<GPUBackend> GPUBackend::CreateNullBackend(GPUPresenter& presenter)
+Common::unique_aligned_ptr<GPUBackend> GPUBackend::CreateNullBackend(GPUPresenter& presenter)
 {
-  return std::make_unique<GPUNullBackend>(presenter);
+  return Common::make_unique_aligned<GPUNullBackend>(HOST_CACHE_LINE_SIZE, presenter);
 }
--- a/src/core/gpu_backend.h
+++ b/src/core/gpu_backend.h
@@ -5,6 +5,8 @@

 #include "util/gpu_device.h"

+#include "common/align.h"
+
 #include "gpu_thread_commands.h"

 #include <memory>
@@ -27,7 +29,7 @@ struct MemorySaveState;
 // DESIGN NOTE: Only static methods should be called on the CPU thread.
 // You specifically don't have a global pointer available for this reason.

-class ALIGN_TO_CACHE_LINE GPUBackend
+class GPUBackend
 {
 public:
  static GPUThreadCommand* NewClearVRAMCommand();
@@ -54,9 +56,9 @@ public:

  static bool IsUsingHardwareBackend();

-  static std::unique_ptr<GPUBackend> CreateHardwareBackend(GPUPresenter& presenter);
-  static std::unique_ptr<GPUBackend> CreateSoftwareBackend(GPUPresenter& presenter);
-  static std::unique_ptr<GPUBackend> CreateNullBackend(GPUPresenter& presenter);
+  static Common::unique_aligned_ptr<GPUBackend> CreateHardwareBackend(GPUPresenter& presenter);
+  static Common::unique_aligned_ptr<GPUBackend> CreateSoftwareBackend(GPUPresenter& presenter);
+  static Common::unique_aligned_ptr<GPUBackend> CreateNullBackend(GPUPresenter& presenter);

  static bool RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, bool apply_aspect_ratio, Image* out_image,
                                       Error* error);
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@@ -4345,7 +4345,7 @@ void GPU_HW::UpdatePostProcessingSettings(bool force_reload)
  }
 }

-std::unique_ptr<GPUBackend> GPUBackend::CreateHardwareBackend(GPUPresenter& presenter)
+Common::unique_aligned_ptr<GPUBackend> GPUBackend::CreateHardwareBackend(GPUPresenter& presenter)
 {
-  return std::make_unique<GPU_HW>(presenter);
+  return Common::make_unique_aligned<GPU_HW>(HOST_CACHE_LINE_SIZE, presenter);
 }
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@@ -440,7 +440,7 @@ void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
  }
 }

-std::unique_ptr<GPUBackend> GPUBackend::CreateSoftwareBackend(GPUPresenter& presenter)
+Common::unique_aligned_ptr<GPUBackend> GPUBackend::CreateSoftwareBackend(GPUPresenter& presenter)
 {
-  return std::make_unique<GPU_SW>(presenter);
+  return Common::make_unique_aligned<GPU_SW>(HOST_CACHE_LINE_SIZE, presenter);
 }
--- a/src/core/gpu_thread.cpp
+++ b/src/core/gpu_thread.cpp
@@ -109,7 +109,7 @@ struct ALIGN_TO_CACHE_LINE State
  Threading::KernelSemaphore thread_is_done_semaphore;

  // Owned by GPU thread.
-  ALIGN_TO_CACHE_LINE std::unique_ptr<GPUBackend> gpu_backend;
+  ALIGN_TO_CACHE_LINE Common::unique_aligned_ptr<GPUBackend> gpu_backend;
  std::unique_ptr<GPUPresenter> gpu_presenter;
  std::atomic<u32> command_fifo_read_ptr{0};
  u8 run_idle_reasons = 0;