mirror of
https://github.com/aaru-dps/Aaru.Checksums.Native.git
synced 2025-12-16 19:24:29 +00:00
Condition compilation of CLMUL to IA32/AMD64 and check if it's available before executing.
This commit is contained in:
@@ -5,4 +5,4 @@ set(CMAKE_C_STANDARD 90)
|
||||
|
||||
add_compile_options(-flto -ffast-math -march=x86-64 -mfpmath=sse -msse3)
|
||||
|
||||
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c)
|
||||
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c simd.c simd.h)
|
||||
|
||||
20
crc32.c
20
crc32.c
@@ -21,6 +21,7 @@
|
||||
|
||||
#include "library.h"
|
||||
#include "crc32.h"
|
||||
#include "simd.h"
|
||||
|
||||
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
|
||||
{
|
||||
@@ -35,15 +36,21 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
|
||||
|
||||
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
{
|
||||
ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc);
|
||||
return 0;
|
||||
/*
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
if(have_clmul())
|
||||
{
|
||||
ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Unroll according to Intel slicing by uint8_t
|
||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||
// http://sourceforge.net/projects/slicing-by-8/
|
||||
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
uint32_t crc;
|
||||
const uint32_t* current;
|
||||
const uint8_t* current_char = (const uint8_t*)data;
|
||||
@@ -84,7 +91,6 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
|
||||
|
||||
ctx->crc = crc;
|
||||
return 0;
|
||||
*/
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
|
||||
|
||||
12
crc32.h
12
crc32.h
@@ -259,11 +259,15 @@ static const uint32_t crc32_table[8][256] = {
|
||||
#define CRC32_ISO_POLY 0xEDB88320
|
||||
#define CRC32_ISO_SEED 0xFFFFFFFF
|
||||
|
||||
#define CLMUL __attribute__((target("pclmul,sse4.1")))
|
||||
#define ALIGNED_(n) __attribute__((aligned(n)))
|
||||
|
||||
CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc);
|
||||
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init();
|
||||
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc);
|
||||
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx);
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
#define CLMUL __attribute__((target("pclmul,sse4.1")))
|
||||
#define ALIGNED_(n) __attribute__((aligned(n)))
|
||||
|
||||
CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc);
|
||||
#endif
|
||||
@@ -32,60 +32,16 @@
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <smmintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
/*
|
||||
* Newer versions of GCC and clang come with cpuid.h
|
||||
* (ftr GCC 4.7 in Debian Wheezy has this)
|
||||
*/
|
||||
#include <cpuid.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include "library.h"
|
||||
#include "crc32.h"
|
||||
|
||||
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned int registers[4];
|
||||
__cpuid(registers, info);
|
||||
*eax = registers[0];
|
||||
*ebx = registers[1];
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
/* GCC, clang */
|
||||
unsigned int _eax;
|
||||
unsigned int _ebx;
|
||||
unsigned int _ecx;
|
||||
unsigned int _edx;
|
||||
__cpuid(info, _eax, _ebx, _ecx, _edx);
|
||||
*eax = _eax;
|
||||
*ebx = _ebx;
|
||||
*ecx = _ecx;
|
||||
*edx = _edx;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int have_clmul(void)
|
||||
{
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
int has_pclmulqdq;
|
||||
int has_sse41;
|
||||
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
has_pclmulqdq = ecx & 0x2; /* bit 1 */
|
||||
has_sse41 = ecx & 0x80000; /* bit 19 */
|
||||
|
||||
return has_pclmulqdq && has_sse41;
|
||||
}
|
||||
|
||||
CLMUL
|
||||
static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
|
||||
{
|
||||
@@ -532,3 +488,5 @@ done:
|
||||
crc = _mm_extract_epi32(xmm_crc3, 2);
|
||||
return ~crc;
|
||||
}
|
||||
|
||||
#endif
|
||||
14
crc64.c
14
crc64.c
@@ -20,6 +20,7 @@
|
||||
|
||||
#include "library.h"
|
||||
#include "crc64.h"
|
||||
#include "simd.h"
|
||||
|
||||
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
|
||||
{
|
||||
@@ -35,9 +36,15 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
|
||||
|
||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
{
|
||||
ctx->crc = ~crc64_clmul(~ctx->crc, data, len);
|
||||
return 0;
|
||||
/*
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
if(have_clmul())
|
||||
{
|
||||
ctx->crc = ~crc64_clmul(~ctx->crc, data, len);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Unroll according to Intel slicing by uint8_t
|
||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||
// http://sourceforge.net/projects/slicing-by-8/
|
||||
@@ -73,7 +80,6 @@ AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint
|
||||
|
||||
ctx->crc = crc;
|
||||
return 0;
|
||||
*/
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
|
||||
|
||||
6
crc64.h
6
crc64.h
@@ -234,8 +234,12 @@ const static uint64_t crc64_table[4][256] = {
|
||||
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
|
||||
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length);
|
||||
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init();
|
||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc);
|
||||
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx);
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length);
|
||||
#endif
|
||||
@@ -1,3 +1,6 @@
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <smmintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
@@ -184,3 +187,5 @@ CLMUL uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length)
|
||||
return ~(((uint64_t)(uint32_t)_mm_extract_epi32(T2, 3) << 32) | (uint64_t)(uint32_t)_mm_extract_epi32(T2, 2));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
46
simd.c
Normal file
46
simd.c
Normal file
@@ -0,0 +1,46 @@
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
/*
|
||||
* Newer versions of GCC and clang come with cpuid.h
|
||||
* (ftr GCC 4.7 in Debian Wheezy has this)
|
||||
*/
|
||||
#include <cpuid.h>
|
||||
|
||||
#endif
|
||||
|
||||
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned int registers[4];
|
||||
__cpuid(registers, info);
|
||||
*eax = registers[0];
|
||||
*ebx = registers[1];
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
/* GCC, clang */
|
||||
unsigned int _eax;
|
||||
unsigned int _ebx;
|
||||
unsigned int _ecx;
|
||||
unsigned int _edx;
|
||||
__cpuid(info, _eax, _ebx, _ecx, _edx);
|
||||
*eax = _eax;
|
||||
*ebx = _ebx;
|
||||
*ecx = _ecx;
|
||||
*edx = _edx;
|
||||
#endif
|
||||
}
|
||||
|
||||
int have_clmul(void)
|
||||
{
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
int has_pclmulqdq;
|
||||
int has_sse41;
|
||||
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
has_pclmulqdq = ecx & 0x2; /* bit 1 */
|
||||
has_sse41 = ecx & 0x80000; /* bit 19 */
|
||||
|
||||
return has_pclmulqdq && has_sse41;
|
||||
}
|
||||
Reference in New Issue
Block a user