From fe8e157f89203d54ded7b4881f5875a123676849 Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Tue, 5 Oct 2021 04:18:11 +0100 Subject: [PATCH] Fix compilation for 32-bit ARM. --- CMakeLists.txt | 10 ++++++---- adler32_neon.c | 2 +- crc32.c | 2 ++ crc32.h | 4 +++- crc32_arm_simd.c | 2 +- crc32_vmull.c | 2 +- simd.h | 41 +++++++++++++++++++++++++++++++++++++++-- tests/crc32.cpp | 2 ++ 8 files changed, 55 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 853df53..297abc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.20) +cmake_minimum_required(VERSION 3.15) project("Aaru.Checksums.Native" C) set(CMAKE_C_STANDARD 90) @@ -14,12 +14,14 @@ if("${CMAKE_BUILD_TYPE}" MATCHES "Release") add_compile_options("/arch:SSE2") endif() else() - add_compile_options(-flto -ffast-math -O3) + add_compile_options(-ffast-math -O3) if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "i686" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "AMD64") - add_compile_options(-march=core2 -mfpmath=sse -msse3 -mtune=westmere) + add_compile_options(-march=core2 -mfpmath=sse -msse3 -mtune=westmere -flto) elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") - add_compile_options(-march=armv8-a) + add_compile_options(-march=armv8-a -flto) + elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7l") + add_compile_options(-march=armv7) endif() endif() endif() diff --git a/adler32_neon.c b/adler32_neon.c index 648de6a..62159cd 100644 --- a/adler32_neon.c +++ b/adler32_neon.c @@ -10,7 +10,7 @@ #include "adler32.h" #include "simd.h" -void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, uint32_t len) +TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, uint32_t len) { /* * Split Adler-32 into component sums. diff --git a/crc32.c b/crc32.c index 3bd0339..4566015 100644 --- a/crc32.c +++ b/crc32.c @@ -48,12 +48,14 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint #endif #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +#if __ARM_ARCH >= 8 if(have_arm_crc32()) { ctx->crc = armv8_crc32_little(ctx->crc, data, len); return 0; } +#endif if(have_neon()) { ctx->crc = ~crc32_vmull(data, len, ~ctx->crc); diff --git a/crc32.h b/crc32.h index 56d5448..1d7761f 100644 --- a/crc32.h +++ b/crc32.h @@ -271,8 +271,10 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u #endif #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +#if __ARM_ARCH >= 8 AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t crc, const unsigned char* buf, uint32_t len); -AARU_EXPORT TARGET_WITH_SIMD uint32_t AARU_CALL crc32_vmull(const uint8_t* src, long len, uint32_t initial_crc); +#endif +AARU_EXPORT TARGET_WITH_SIMD uint32_t AARU_CALL crc32_vmull(const uint8_t* src, long len, uint32_t initial_crc); #endif \ No newline at end of file diff --git a/crc32_arm_simd.c b/crc32_arm_simd.c index 8ebd1fa..5e3db0b 100644 --- a/crc32_arm_simd.c +++ b/crc32_arm_simd.c @@ -2,7 +2,7 @@ // Created by claunia on 29/9/21. // -#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +#if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && __ARM_ARCH >= 8 #include diff --git a/crc32_vmull.c b/crc32_vmull.c index 2b58aea..c4f8a80 100644 --- a/crc32_vmull.c +++ b/crc32_vmull.c @@ -112,7 +112,7 @@ FORCE_INLINE TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint6 return vreinterpretq_u64_u8(r); } -FORCE_INLINE uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b) +FORCE_INLINE TARGET_WITH_SIMD uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b) { uint8x16_t tbl = vreinterpretq_u8_u64(a); // input a uint8x16_t idx = vreinterpretq_u8_u64(b); // input b diff --git a/simd.h b/simd.h index 639dd02..786b87b 100644 --- a/simd.h +++ b/simd.h @@ -41,20 +41,57 @@ AARU_EXPORT int have_arm_crypto(void); #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +#ifndef __ARM_FEATURE_CRC32 +#define __ARM_FEATURE_CRC32 1 +#endif + #ifdef _MSC_VER #define TARGET_ARMV8_WITH_CRC +#define TARGET_WITH_CRYPTO +#define TARGET_WITH_SIMD #else // _MSC_VER #if defined(__aarch64__) || defined(_M_ARM64) + +#ifdef __clang__ +#define TARGET_ARMV8_WITH_CRC __attribute__((target("crc"))) +#else #define TARGET_ARMV8_WITH_CRC __attribute__((target("+crc"))) +#endif + +#ifdef __clang__ +#define TARGET_WITH_CRYPTO __attribute__((target("crypto"))) +#else #define TARGET_WITH_CRYPTO __attribute__((target("+crypto"))) +#endif + #define TARGET_WITH_SIMD #else + +#if __ARM_ARCH >= 8 + +#ifdef __clang__ #define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc"))) -#define TARGET_WITH_CRYPTO __attribute__((target("+crc"))) -#define TARGET_WITH_SIMD __attribute__((target("+neon"))) +#else +#define TARGET_ARMV8_WITH_CRC __attribute__((target("arch=armv8-a+crc"))) +#endif + +#endif // __ARM_ARCH >= 8 + +#ifdef __clang__ +#define TARGET_WITH_CRYPTO __attribute__((target("armv8-a,crypto"))) +#else +#define TARGET_WITH_CRYPTO __attribute__((target("fpu=crypto-neon-fp-armv8"))) +#endif + +#ifdef __clang__ +#define TARGET_WITH_SIMD __attribute__((target("neon"))) +#else +#define TARGET_WITH_SIMD __attribute__((target("fpu=neon"))) +#endif + #endif // __aarch64__ || _M_ARM64 #endif // _MSC_VER diff --git a/tests/crc32.cpp b/tests/crc32.cpp index dbb3076..ccb0c9b 100644 --- a/tests/crc32.cpp +++ b/tests/crc32.cpp @@ -81,6 +81,7 @@ TEST_F(crc32Fixture, crc32_clmul) #endif #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +#if __ARM_ARCH >= 8 TEST_F(crc32Fixture, crc32_arm_crc32) { if(!have_arm_crc32()) return; @@ -93,6 +94,7 @@ TEST_F(crc32Fixture, crc32_arm_crc32) EXPECT_EQ(crc, EXPECTED_CRC32); } +#endif TEST_F(crc32Fixture, crc32_vmull) {