Fix cross-compilation for ARM FPU variants and add RISC-V support

This commit is contained in:
Kevin Bortis
2026-03-19 12:18:41 +01:00
parent a1cf319999
commit bd01e7d8a5
6 changed files with 62 additions and 44 deletions

19
3rdparty/blake3.cmake vendored
View File

@@ -79,9 +79,22 @@ else()
message(STATUS "BLAKE3: Enabling NEON for AArch64")
target_sources(blake3 PRIVATE ${BLAKE3_C_DIRECTORY}/blake3_neon.c)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
message(STATUS "BLAKE3: Attempting NEON on 32-bit ARM")
target_sources(blake3 PRIVATE ${BLAKE3_C_DIRECTORY}/blake3_neon.c)
target_compile_options(blake3 PRIVATE -mfpu=neon)
# Test whether the target already supports NEON (via CMAKE_C_FLAGS or
# compiler defaults). We must NOT add -mfpu=neon ourselves: GCC uses
# last-wins for -mfpu, so appending it would silently override a user's
# -mfpu=vfpv3-d16 and produce a binary that crashes on non-NEON hardware.
include(CheckCSourceCompiles)
check_c_source_compiles("
#include <arm_neon.h>
int main(void) { uint32x4_t v = vdupq_n_u32(0); return vgetq_lane_u32(v, 0); }
" BLAKE3_NEON_COMPILES)
if(BLAKE3_NEON_COMPILES)
message(STATUS "BLAKE3: Enabling NEON on 32-bit ARM")
target_sources(blake3 PRIVATE ${BLAKE3_C_DIRECTORY}/blake3_neon.c)
target_compile_definitions(blake3 PRIVATE BLAKE3_USE_NEON=1)
else()
message(STATUS "BLAKE3: 32-bit ARM without NEON -> portable only")
endif()
else()
message(STATUS "BLAKE3: Unknown arch -> portable only")
endif()

17
3rdparty/xxhash.cmake vendored
View File

@@ -35,13 +35,16 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
else()
target_compile_options(xxhash PRIVATE -O3 -ffast-math)
# Enable specific optimizations for x86/x64
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64" OR
${CMAKE_SYSTEM_PROCESSOR} MATCHES "AMD64" OR
${CMAKE_SYSTEM_PROCESSOR} MATCHES "i386" OR
${CMAKE_SYSTEM_PROCESSOR} MATCHES "i686")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
target_compile_options(xxhash PRIVATE -march=core2 -mtune=westmere)
# Enable specific optimizations for native x86/x64 builds.
# Cross-compilation toolchain files provide their own -march/-mtune.
if(NOT CMAKE_CROSSCOMPILING)
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64" OR
${CMAKE_SYSTEM_PROCESSOR} MATCHES "AMD64" OR
${CMAKE_SYSTEM_PROCESSOR} MATCHES "i386" OR
${CMAKE_SYSTEM_PROCESSOR} MATCHES "i686")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
target_compile_options(xxhash PRIVATE -march=core2 -mtune=westmere)
endif()
endif()
endif()
endif()

View File

@@ -103,7 +103,8 @@ if("${CMAKE_SIZEOF_VOID_P}" MATCHES "8"
OR "${CMAKE_C_COMPILER_ARCHITECTURE_ID}" MATCHES "x64"
OR "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "x86_64"
OR "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "AMD64"
OR "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64")
OR "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64"
OR "${CMAKE_SYSTEM_PROCESSOR}" MATCHES "riscv64")
set(ARCHITECTURE_IS_64BIT TRUE)
endif()
@@ -118,33 +119,37 @@ if("${CMAKE_BUILD_TYPE}" MATCHES "Release")
else()
add_compile_options(-ffast-math -O3)
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64"
OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "i686"
OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "AMD64")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
add_compile_options(-march=core2 -mtune=westmere -mfpmath=sse)
endif()
# Architecture-specific flags are only set for native builds.
# Cross-compilation toolchain files provide their own -march/-mtune/-mfpu.
if(NOT CMAKE_CROSSCOMPILING)
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64"
OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "i686"
OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "AMD64")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
add_compile_options(-march=core2 -mtune=westmere -mfpmath=sse)
endif()
add_compile_options(-msse3)
add_compile_options(-msse3)
if(NOT "${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW")
add_compile_options(-flto)
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
add_compile_options(-march=armv8-a)
endif()
if(NOT "${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW")
add_compile_options(-flto)
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
add_compile_options(-march=armv8-a)
endif()
if(NOT "${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW")
add_compile_options(-flto)
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7l" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
add_compile_options(-march=armv7+fp -mfpu=vfpv3-d16)
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips")
if(NOT "${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW")
add_compile_options(-flto)
if(NOT "${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW")
add_compile_options(-flto)
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7l" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
if(NOT "${CMAKE_C_COMPILER_ID}" MATCHES "AppleClang")
add_compile_options(-march=armv7+fp -mfpu=vfpv3-d16)
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips")
if(NOT "${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW")
add_compile_options(-flto)
endif()
endif()
endif()
endif()

View File

@@ -86,18 +86,14 @@
#ifdef __clang__
#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc")))
#define TARGET_WITH_CRYPTO __attribute__((target("armv8-a,crypto")))
#else
#define TARGET_ARMV8_WITH_CRC __attribute__((target("arch=armv8-a+crc")))
#define TARGET_WITH_CRYPTO __attribute__((target("fpu=crypto-neon-fp-armv8")))
#endif
#endif // __ARM_ARCH >= 8
#ifdef __clang__
#define TARGET_WITH_CRYPTO __attribute__((target("armv8-a,crypto")))
#else
#define TARGET_WITH_CRYPTO __attribute__((target("fpu=crypto-neon-fp-armv8")))
#endif
#ifdef __clang__
#define TARGET_WITH_SIMD __attribute__((target("neon")))
#else

View File

@@ -32,7 +32,7 @@
#include "arm_vmull.h"
#if !defined(__MINGW32__) && (!defined(__ANDROID__) || !defined(__arm__))
#ifdef TARGET_WITH_CRYPTO
TARGET_WITH_CRYPTO static uint64x2_t sse2neon_vmull_p64_crypto(uint64x1_t _a, uint64x1_t _b)
{
poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
@@ -43,8 +43,7 @@ TARGET_WITH_CRYPTO static uint64x2_t sse2neon_vmull_p64_crypto(uint64x1_t _a, ui
TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
{
#if !defined(__MINGW32__) && (!defined(__ANDROID__) || !defined(__arm__))
// Wraps vmull_p64
#ifdef TARGET_WITH_CRYPTO
if(have_arm_crypto()) return sse2neon_vmull_p64_crypto(_a, _b);
#endif

View File

@@ -21,7 +21,9 @@
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
#ifdef TARGET_WITH_CRYPTO
TARGET_WITH_CRYPTO static uint64x2_t sse2neon_vmull_p64_crypto(uint64x1_t _a, uint64x1_t _b);
#endif
TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b);
TARGET_WITH_SIMD uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b);
TARGET_WITH_SIMD uint64x2_t mm_srli_si128(uint64x2_t a, int imm);