GPU: Align the memory allocation rather than the class

This commit is contained in:
Stenzek
2025-11-21 00:29:42 +10:00
parent defbe7061b
commit aecdfd5a1d
6 changed files with 48 additions and 25 deletions

View File

@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
@@ -118,25 +118,46 @@ namespace detail {
template<class T>
struct unique_aligned_ptr_deleter
{
ALWAYS_INLINE void operator()(T* ptr) { Common::AlignedFree(ptr); }
ALWAYS_INLINE void operator()(T* ptr) const
{
// Array types - do nothing, elements must be trivially destructible
if constexpr (!std::is_array_v<T> && !std::is_trivially_destructible_v<T>)
{
if (!ptr)
return;
ptr->~T();
}
Common::AlignedFree(const_cast<std::remove_cv_t<T>*>(ptr));
}
// Allow conversion between compatible deleters for derived-to-base conversions
template<class U>
requires std::is_convertible_v<U*, T*>
constexpr unique_aligned_ptr_deleter(const unique_aligned_ptr_deleter<U>&) noexcept
{
}
constexpr unique_aligned_ptr_deleter() noexcept = default;
};
template<class>
constexpr bool is_unbounded_array_v = false;
template<class T>
constexpr bool is_unbounded_array_v<T[]> = true;
template<class>
constexpr bool is_bounded_array_v = false;
template<class T, std::size_t N>
constexpr bool is_bounded_array_v<T[N]> = true;
} // namespace detail
template<class T>
using unique_aligned_ptr = std::unique_ptr<T, detail::unique_aligned_ptr_deleter<std::remove_extent_t<T>>>;
template<class T, class... Args>
requires(std::is_unbounded_array_v<T>, std::is_trivially_default_constructible_v<std::remove_extent_t<T>>,
requires(!std::is_array_v<T>)
unique_aligned_ptr<T> make_unique_aligned(size_t alignment, Args&&... args)
{
unique_aligned_ptr<T> ptr(static_cast<T*>(AlignedMalloc(sizeof(T), alignment)));
if (ptr)
new (ptr.get()) T(std::forward<Args>(args)...);
return ptr;
}
template<class T, class... Args>
requires(std::is_unbounded_array_v<T> && std::is_trivially_default_constructible_v<std::remove_extent_t<T>> &&
std::is_trivially_destructible_v<std::remove_extent_t<T>>)
unique_aligned_ptr<T> make_unique_aligned(size_t alignment, size_t n)
{
@@ -148,7 +169,7 @@ unique_aligned_ptr<T> make_unique_aligned(size_t alignment, size_t n)
}
template<class T, class... Args>
requires(std::is_unbounded_array_v<T>, std::is_trivially_default_constructible_v<std::remove_extent_t<T>>,
requires(std::is_unbounded_array_v<T> && std::is_trivially_default_constructible_v<std::remove_extent_t<T>> &&
std::is_trivially_destructible_v<std::remove_extent_t<T>>)
unique_aligned_ptr<T> make_unique_aligned_for_overwrite(size_t alignment, size_t n)
{

View File

@@ -962,7 +962,7 @@ void GPUNullBackend::DoMemoryState(StateWrapper& sw, System::MemorySaveState& ms
{
}
std::unique_ptr<GPUBackend> GPUBackend::CreateNullBackend(GPUPresenter& presenter)
Common::unique_aligned_ptr<GPUBackend> GPUBackend::CreateNullBackend(GPUPresenter& presenter)
{
return std::make_unique<GPUNullBackend>(presenter);
return Common::make_unique_aligned<GPUNullBackend>(HOST_CACHE_LINE_SIZE, presenter);
}

View File

@@ -5,6 +5,8 @@
#include "util/gpu_device.h"
#include "common/align.h"
#include "gpu_thread_commands.h"
#include <memory>
@@ -27,7 +29,7 @@ struct MemorySaveState;
// DESIGN NOTE: Only static methods should be called on the CPU thread.
// You specifically don't have a global pointer available for this reason.
class ALIGN_TO_CACHE_LINE GPUBackend
class GPUBackend
{
public:
static GPUThreadCommand* NewClearVRAMCommand();
@@ -54,9 +56,9 @@ public:
static bool IsUsingHardwareBackend();
static std::unique_ptr<GPUBackend> CreateHardwareBackend(GPUPresenter& presenter);
static std::unique_ptr<GPUBackend> CreateSoftwareBackend(GPUPresenter& presenter);
static std::unique_ptr<GPUBackend> CreateNullBackend(GPUPresenter& presenter);
static Common::unique_aligned_ptr<GPUBackend> CreateHardwareBackend(GPUPresenter& presenter);
static Common::unique_aligned_ptr<GPUBackend> CreateSoftwareBackend(GPUPresenter& presenter);
static Common::unique_aligned_ptr<GPUBackend> CreateNullBackend(GPUPresenter& presenter);
static bool RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, bool apply_aspect_ratio, Image* out_image,
Error* error);

View File

@@ -4345,7 +4345,7 @@ void GPU_HW::UpdatePostProcessingSettings(bool force_reload)
}
}
std::unique_ptr<GPUBackend> GPUBackend::CreateHardwareBackend(GPUPresenter& presenter)
Common::unique_aligned_ptr<GPUBackend> GPUBackend::CreateHardwareBackend(GPUPresenter& presenter)
{
return std::make_unique<GPU_HW>(presenter);
return Common::make_unique_aligned<GPU_HW>(HOST_CACHE_LINE_SIZE, presenter);
}

View File

@@ -440,7 +440,7 @@ void GPU_SW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
}
}
std::unique_ptr<GPUBackend> GPUBackend::CreateSoftwareBackend(GPUPresenter& presenter)
Common::unique_aligned_ptr<GPUBackend> GPUBackend::CreateSoftwareBackend(GPUPresenter& presenter)
{
return std::make_unique<GPU_SW>(presenter);
return Common::make_unique_aligned<GPU_SW>(HOST_CACHE_LINE_SIZE, presenter);
}

View File

@@ -109,7 +109,7 @@ struct ALIGN_TO_CACHE_LINE State
Threading::KernelSemaphore thread_is_done_semaphore;
// Owned by GPU thread.
ALIGN_TO_CACHE_LINE std::unique_ptr<GPUBackend> gpu_backend;
ALIGN_TO_CACHE_LINE Common::unique_aligned_ptr<GPUBackend> gpu_backend;
std::unique_ptr<GPUPresenter> gpu_presenter;
std::atomic<u32> command_fifo_read_ptr{0};
u8 run_idle_reasons = 0;