From 6b45dd6e5b7fb48af1a15517263b1e31531bf3ad Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Sun, 26 Sep 2021 17:37:50 +0100 Subject: [PATCH] Condition compilation of CLMUL to IA32/AMD64 and check if it's available before executing. --- CMakeLists.txt | 2 +- crc32.c | 20 ++++++++++++------- crc32.h | 14 ++++++++----- crc32_clmul.c | 54 ++++++-------------------------------------------- crc64.c | 14 +++++++++---- crc64.h | 8 ++++++-- crc64_clmul.c | 7 ++++++- simd.c | 46 ++++++++++++++++++++++++++++++++++++++++++ simd.h | 1 + 9 files changed, 98 insertions(+), 68 deletions(-) create mode 100644 simd.c create mode 100644 simd.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4070973..9fd1653 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,4 +5,4 @@ set(CMAKE_C_STANDARD 90) add_compile_options(-flto -ffast-math -march=x86-64 -mfpmath=sse -msse3) -add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c) +add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c simd.c simd.h) diff --git a/crc32.c b/crc32.c index da8444f..6978dc3 100644 --- a/crc32.c +++ b/crc32.c @@ -21,6 +21,7 @@ #include "library.h" #include "crc32.h" +#include "simd.h" AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void) { @@ -35,15 +36,21 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void) AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len) { - ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc); - return 0; - /* + if(!ctx || !data) return -1; + +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + if(have_clmul()) + { + ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc); + + return 0; + } +#endif + // Unroll according to Intel slicing by uint8_t // http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf // http://sourceforge.net/projects/slicing-by-8/ - - if(!ctx || !data) return -1; - uint32_t crc; const uint32_t* current; const uint8_t* current_char = (const uint8_t*)data; @@ -84,7 +91,6 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint ctx->crc = crc; return 0; - */ } AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc) diff --git a/crc32.h b/crc32.h index a8af725..e75254d 100644 --- a/crc32.h +++ b/crc32.h @@ -259,11 +259,15 @@ static const uint32_t crc32_table[8][256] = { #define CRC32_ISO_POLY 0xEDB88320 #define CRC32_ISO_SEED 0xFFFFFFFF -#define CLMUL __attribute__((target("pclmul,sse4.1"))) -#define ALIGNED_(n) __attribute__((aligned(n))) - -CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc); AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(); AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len); AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc); -AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx); \ No newline at end of file +AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx); + +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) +#define CLMUL __attribute__((target("pclmul,sse4.1"))) +#define ALIGNED_(n) __attribute__((aligned(n))) + +CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc); +#endif \ No newline at end of file diff --git a/crc32_clmul.c b/crc32_clmul.c index 9222cfb..2c8785d 100644 --- a/crc32_clmul.c +++ b/crc32_clmul.c @@ -32,60 +32,16 @@ * 3. This notice may not be removed or altered from any source distribution. */ +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + #include #include #include -#ifdef _MSC_VER -#include -#else -/* - * Newer versions of GCC and clang come with cpuid.h - * (ftr GCC 4.7 in Debian Wheezy has this) - */ -#include - -#endif - #include "library.h" #include "crc32.h" -static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) -{ -#ifdef _MSC_VER - unsigned int registers[4]; - __cpuid(registers, info); - *eax = registers[0]; - *ebx = registers[1]; - *ecx = registers[2]; - *edx = registers[3]; -#else - /* GCC, clang */ - unsigned int _eax; - unsigned int _ebx; - unsigned int _ecx; - unsigned int _edx; - __cpuid(info, _eax, _ebx, _ecx, _edx); - *eax = _eax; - *ebx = _ebx; - *ecx = _ecx; - *edx = _edx; -#endif -} - -static int have_clmul(void) -{ - unsigned eax, ebx, ecx, edx; - int has_pclmulqdq; - int has_sse41; - cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx); - - has_pclmulqdq = ecx & 0x2; /* bit 1 */ - has_sse41 = ecx & 0x80000; /* bit 19 */ - - return has_pclmulqdq && has_sse41; -} - CLMUL static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3) { @@ -531,4 +487,6 @@ done: */ crc = _mm_extract_epi32(xmm_crc3, 2); return ~crc; -} \ No newline at end of file +} + +#endif \ No newline at end of file diff --git a/crc64.c b/crc64.c index 1830321..92a9a94 100644 --- a/crc64.c +++ b/crc64.c @@ -20,6 +20,7 @@ #include "library.h" #include "crc64.h" +#include "simd.h" AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void) { @@ -35,9 +36,15 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void) AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len) { - ctx->crc = ~crc64_clmul(~ctx->crc, data, len); - return 0; - /* +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + if(have_clmul()) + { + ctx->crc = ~crc64_clmul(~ctx->crc, data, len); + return 0; + } +#endif + // Unroll according to Intel slicing by uint8_t // http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf // http://sourceforge.net/projects/slicing-by-8/ @@ -73,7 +80,6 @@ AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint ctx->crc = crc; return 0; - */ } AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc) diff --git a/crc64.h b/crc64.h index 1353aca..cdd1130 100644 --- a/crc64.h +++ b/crc64.h @@ -234,8 +234,12 @@ const static uint64_t crc64_table[4][256] = { #define CRC64_ECMA_POLY 0xC96C5795D7870F42 #define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF -uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length); AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(); AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len); AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc); -AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx); \ No newline at end of file +AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx); + +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) +uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length); +#endif \ No newline at end of file diff --git a/crc64_clmul.c b/crc64_clmul.c index 7bf457d..d732512 100644 --- a/crc64_clmul.c +++ b/crc64_clmul.c @@ -1,3 +1,6 @@ +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + #include #include #include @@ -183,4 +186,6 @@ CLMUL uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length) #else return ~(((uint64_t)(uint32_t)_mm_extract_epi32(T2, 3) << 32) | (uint64_t)(uint32_t)_mm_extract_epi32(T2, 2)); #endif -} \ No newline at end of file +} + +#endif \ No newline at end of file diff --git a/simd.c b/simd.c new file mode 100644 index 0000000..0787609 --- /dev/null +++ b/simd.c @@ -0,0 +1,46 @@ +#ifdef _MSC_VER +#include +#else +/* + * Newer versions of GCC and clang come with cpuid.h + * (ftr GCC 4.7 in Debian Wheezy has this) + */ +#include + +#endif + +static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) +{ +#ifdef _MSC_VER + unsigned int registers[4]; + __cpuid(registers, info); + *eax = registers[0]; + *ebx = registers[1]; + *ecx = registers[2]; + *edx = registers[3]; +#else + /* GCC, clang */ + unsigned int _eax; + unsigned int _ebx; + unsigned int _ecx; + unsigned int _edx; + __cpuid(info, _eax, _ebx, _ecx, _edx); + *eax = _eax; + *ebx = _ebx; + *ecx = _ecx; + *edx = _edx; +#endif +} + +int have_clmul(void) +{ + unsigned eax, ebx, ecx, edx; + int has_pclmulqdq; + int has_sse41; + cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx); + + has_pclmulqdq = ecx & 0x2; /* bit 1 */ + has_sse41 = ecx & 0x80000; /* bit 19 */ + + return has_pclmulqdq && has_sse41; +} diff --git a/simd.h b/simd.h new file mode 100644 index 0000000..6d5fbf7 --- /dev/null +++ b/simd.h @@ -0,0 +1 @@ +int have_clmul(void); \ No newline at end of file