Condition compilation of CLMUL to IA32/AMD64 and check if it's available before executing.

This commit is contained in:
2021-09-26 17:37:50 +01:00
parent bf1b026a7e
commit 6b45dd6e5b
9 changed files with 98 additions and 68 deletions

View File

@@ -5,4 +5,4 @@ set(CMAKE_C_STANDARD 90)
add_compile_options(-flto -ffast-math -march=x86-64 -mfpmath=sse -msse3)
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c)
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c simd.c simd.h)

20
crc32.c
View File

@@ -21,6 +21,7 @@
#include "library.h"
#include "crc32.h"
#include "simd.h"
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
{
@@ -35,15 +36,21 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len)
{
ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc);
return 0;
/*
if(!ctx || !data) return -1;
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
if(have_clmul())
{
ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc);
return 0;
}
#endif
// Unroll according to Intel slicing by uint8_t
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
// http://sourceforge.net/projects/slicing-by-8/
if(!ctx || !data) return -1;
uint32_t crc;
const uint32_t* current;
const uint8_t* current_char = (const uint8_t*)data;
@@ -84,7 +91,6 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
ctx->crc = crc;
return 0;
*/
}
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)

12
crc32.h
View File

@@ -259,11 +259,15 @@ static const uint32_t crc32_table[8][256] = {
#define CRC32_ISO_POLY 0xEDB88320
#define CRC32_ISO_SEED 0xFFFFFFFF
#define CLMUL __attribute__((target("pclmul,sse4.1")))
#define ALIGNED_(n) __attribute__((aligned(n)))
CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc);
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init();
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc);
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
#define CLMUL __attribute__((target("pclmul,sse4.1")))
#define ALIGNED_(n) __attribute__((aligned(n)))
CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc);
#endif

View File

@@ -32,60 +32,16 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
#include <inttypes.h>
#include <smmintrin.h>
#include <wmmintrin.h>
#ifdef _MSC_VER
#include <intrin.h>
#else
/*
* Newer versions of GCC and clang come with cpuid.h
* (ftr GCC 4.7 in Debian Wheezy has this)
*/
#include <cpuid.h>
#endif
#include "library.h"
#include "crc32.h"
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
{
#ifdef _MSC_VER
unsigned int registers[4];
__cpuid(registers, info);
*eax = registers[0];
*ebx = registers[1];
*ecx = registers[2];
*edx = registers[3];
#else
/* GCC, clang */
unsigned int _eax;
unsigned int _ebx;
unsigned int _ecx;
unsigned int _edx;
__cpuid(info, _eax, _ebx, _ecx, _edx);
*eax = _eax;
*ebx = _ebx;
*ecx = _ecx;
*edx = _edx;
#endif
}
static int have_clmul(void)
{
unsigned eax, ebx, ecx, edx;
int has_pclmulqdq;
int has_sse41;
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
has_pclmulqdq = ecx & 0x2; /* bit 1 */
has_sse41 = ecx & 0x80000; /* bit 19 */
return has_pclmulqdq && has_sse41;
}
CLMUL
static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
{
@@ -532,3 +488,5 @@ done:
crc = _mm_extract_epi32(xmm_crc3, 2);
return ~crc;
}
#endif

14
crc64.c
View File

@@ -20,6 +20,7 @@
#include "library.h"
#include "crc64.h"
#include "simd.h"
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
{
@@ -35,9 +36,15 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
{
ctx->crc = ~crc64_clmul(~ctx->crc, data, len);
return 0;
/*
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
if(have_clmul())
{
ctx->crc = ~crc64_clmul(~ctx->crc, data, len);
return 0;
}
#endif
// Unroll according to Intel slicing by uint8_t
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
// http://sourceforge.net/projects/slicing-by-8/
@@ -73,7 +80,6 @@ AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint
ctx->crc = crc;
return 0;
*/
}
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)

View File

@@ -234,8 +234,12 @@ const static uint64_t crc64_table[4][256] = {
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length);
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init();
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc);
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length);
#endif

View File

@@ -1,3 +1,6 @@
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
#include <inttypes.h>
#include <smmintrin.h>
#include <wmmintrin.h>
@@ -184,3 +187,5 @@ CLMUL uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length)
return ~(((uint64_t)(uint32_t)_mm_extract_epi32(T2, 3) << 32) | (uint64_t)(uint32_t)_mm_extract_epi32(T2, 2));
#endif
}
#endif

46
simd.c Normal file
View File

@@ -0,0 +1,46 @@
#ifdef _MSC_VER
#include <intrin.h>
#else
/*
* Newer versions of GCC and clang come with cpuid.h
* (ftr GCC 4.7 in Debian Wheezy has this)
*/
#include <cpuid.h>
#endif
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
{
#ifdef _MSC_VER
unsigned int registers[4];
__cpuid(registers, info);
*eax = registers[0];
*ebx = registers[1];
*ecx = registers[2];
*edx = registers[3];
#else
/* GCC, clang */
unsigned int _eax;
unsigned int _ebx;
unsigned int _ecx;
unsigned int _edx;
__cpuid(info, _eax, _ebx, _ecx, _edx);
*eax = _eax;
*ebx = _ebx;
*ecx = _ecx;
*edx = _edx;
#endif
}
int have_clmul(void)
{
unsigned eax, ebx, ecx, edx;
int has_pclmulqdq;
int has_sse41;
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
has_pclmulqdq = ecx & 0x2; /* bit 1 */
has_sse41 = ecx & 0x80000; /* bit 19 */
return has_pclmulqdq && has_sse41;
}

1
simd.h Normal file
View File

@@ -0,0 +1 @@
int have_clmul(void);