mirror of
https://github.com/aaru-dps/Aaru.Checksums.Native.git
synced 2025-12-16 19:24:29 +00:00
Condition compilation of CLMUL to IA32/AMD64 and check if it's available before executing.
This commit is contained in:
@@ -5,4 +5,4 @@ set(CMAKE_C_STANDARD 90)
|
|||||||
|
|
||||||
add_compile_options(-flto -ffast-math -march=x86-64 -mfpmath=sse -msse3)
|
add_compile_options(-flto -ffast-math -march=x86-64 -mfpmath=sse -msse3)
|
||||||
|
|
||||||
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c)
|
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c simd.c simd.h)
|
||||||
|
|||||||
16
crc32.c
16
crc32.c
@@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "crc32.h"
|
#include "crc32.h"
|
||||||
|
#include "simd.h"
|
||||||
|
|
||||||
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
|
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
|
||||||
{
|
{
|
||||||
@@ -35,15 +36,21 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
|
|||||||
|
|
||||||
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len)
|
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||||
{
|
{
|
||||||
|
if(!ctx || !data) return -1;
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||||
|
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||||
|
if(have_clmul())
|
||||||
|
{
|
||||||
ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc);
|
ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
/*
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Unroll according to Intel slicing by uint8_t
|
// Unroll according to Intel slicing by uint8_t
|
||||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||||
// http://sourceforge.net/projects/slicing-by-8/
|
// http://sourceforge.net/projects/slicing-by-8/
|
||||||
|
|
||||||
if(!ctx || !data) return -1;
|
|
||||||
|
|
||||||
uint32_t crc;
|
uint32_t crc;
|
||||||
const uint32_t* current;
|
const uint32_t* current;
|
||||||
const uint8_t* current_char = (const uint8_t*)data;
|
const uint8_t* current_char = (const uint8_t*)data;
|
||||||
@@ -84,7 +91,6 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
|
|||||||
|
|
||||||
ctx->crc = crc;
|
ctx->crc = crc;
|
||||||
return 0;
|
return 0;
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
|
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
|
||||||
|
|||||||
12
crc32.h
12
crc32.h
@@ -259,11 +259,15 @@ static const uint32_t crc32_table[8][256] = {
|
|||||||
#define CRC32_ISO_POLY 0xEDB88320
|
#define CRC32_ISO_POLY 0xEDB88320
|
||||||
#define CRC32_ISO_SEED 0xFFFFFFFF
|
#define CRC32_ISO_SEED 0xFFFFFFFF
|
||||||
|
|
||||||
#define CLMUL __attribute__((target("pclmul,sse4.1")))
|
|
||||||
#define ALIGNED_(n) __attribute__((aligned(n)))
|
|
||||||
|
|
||||||
CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc);
|
|
||||||
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init();
|
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init();
|
||||||
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len);
|
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||||
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc);
|
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc);
|
||||||
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx);
|
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx);
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||||
|
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||||
|
#define CLMUL __attribute__((target("pclmul,sse4.1")))
|
||||||
|
#define ALIGNED_(n) __attribute__((aligned(n)))
|
||||||
|
|
||||||
|
CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc);
|
||||||
|
#endif
|
||||||
@@ -32,60 +32,16 @@
|
|||||||
* 3. This notice may not be removed or altered from any source distribution.
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||||
|
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||||
|
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#include <intrin.h>
|
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* Newer versions of GCC and clang come with cpuid.h
|
|
||||||
* (ftr GCC 4.7 in Debian Wheezy has this)
|
|
||||||
*/
|
|
||||||
#include <cpuid.h>
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "crc32.h"
|
#include "crc32.h"
|
||||||
|
|
||||||
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
|
|
||||||
{
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
unsigned int registers[4];
|
|
||||||
__cpuid(registers, info);
|
|
||||||
*eax = registers[0];
|
|
||||||
*ebx = registers[1];
|
|
||||||
*ecx = registers[2];
|
|
||||||
*edx = registers[3];
|
|
||||||
#else
|
|
||||||
/* GCC, clang */
|
|
||||||
unsigned int _eax;
|
|
||||||
unsigned int _ebx;
|
|
||||||
unsigned int _ecx;
|
|
||||||
unsigned int _edx;
|
|
||||||
__cpuid(info, _eax, _ebx, _ecx, _edx);
|
|
||||||
*eax = _eax;
|
|
||||||
*ebx = _ebx;
|
|
||||||
*ecx = _ecx;
|
|
||||||
*edx = _edx;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static int have_clmul(void)
|
|
||||||
{
|
|
||||||
unsigned eax, ebx, ecx, edx;
|
|
||||||
int has_pclmulqdq;
|
|
||||||
int has_sse41;
|
|
||||||
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
|
|
||||||
|
|
||||||
has_pclmulqdq = ecx & 0x2; /* bit 1 */
|
|
||||||
has_sse41 = ecx & 0x80000; /* bit 19 */
|
|
||||||
|
|
||||||
return has_pclmulqdq && has_sse41;
|
|
||||||
}
|
|
||||||
|
|
||||||
CLMUL
|
CLMUL
|
||||||
static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
|
static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
|
||||||
{
|
{
|
||||||
@@ -532,3 +488,5 @@ done:
|
|||||||
crc = _mm_extract_epi32(xmm_crc3, 2);
|
crc = _mm_extract_epi32(xmm_crc3, 2);
|
||||||
return ~crc;
|
return ~crc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
10
crc64.c
10
crc64.c
@@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "crc64.h"
|
#include "crc64.h"
|
||||||
|
#include "simd.h"
|
||||||
|
|
||||||
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
|
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
|
||||||
{
|
{
|
||||||
@@ -35,9 +36,15 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
|
|||||||
|
|
||||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
|
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||||
{
|
{
|
||||||
|
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||||
|
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||||
|
if(have_clmul())
|
||||||
|
{
|
||||||
ctx->crc = ~crc64_clmul(~ctx->crc, data, len);
|
ctx->crc = ~crc64_clmul(~ctx->crc, data, len);
|
||||||
return 0;
|
return 0;
|
||||||
/*
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Unroll according to Intel slicing by uint8_t
|
// Unroll according to Intel slicing by uint8_t
|
||||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||||
// http://sourceforge.net/projects/slicing-by-8/
|
// http://sourceforge.net/projects/slicing-by-8/
|
||||||
@@ -73,7 +80,6 @@ AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint
|
|||||||
|
|
||||||
ctx->crc = crc;
|
ctx->crc = crc;
|
||||||
return 0;
|
return 0;
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
|
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
|
||||||
|
|||||||
6
crc64.h
6
crc64.h
@@ -234,8 +234,12 @@ const static uint64_t crc64_table[4][256] = {
|
|||||||
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
|
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
|
||||||
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
|
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
|
||||||
|
|
||||||
uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length);
|
|
||||||
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init();
|
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init();
|
||||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len);
|
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc);
|
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc);
|
||||||
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx);
|
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx);
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||||
|
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||||
|
uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length);
|
||||||
|
#endif
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||||
|
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||||
|
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
@@ -184,3 +187,5 @@ CLMUL uint64_t crc64_clmul(uint64_t crc, const uint8_t* data, size_t length)
|
|||||||
return ~(((uint64_t)(uint32_t)_mm_extract_epi32(T2, 3) << 32) | (uint64_t)(uint32_t)_mm_extract_epi32(T2, 2));
|
return ~(((uint64_t)(uint32_t)_mm_extract_epi32(T2, 3) << 32) | (uint64_t)(uint32_t)_mm_extract_epi32(T2, 2));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
46
simd.c
Normal file
46
simd.c
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* Newer versions of GCC and clang come with cpuid.h
|
||||||
|
* (ftr GCC 4.7 in Debian Wheezy has this)
|
||||||
|
*/
|
||||||
|
#include <cpuid.h>
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
|
||||||
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
unsigned int registers[4];
|
||||||
|
__cpuid(registers, info);
|
||||||
|
*eax = registers[0];
|
||||||
|
*ebx = registers[1];
|
||||||
|
*ecx = registers[2];
|
||||||
|
*edx = registers[3];
|
||||||
|
#else
|
||||||
|
/* GCC, clang */
|
||||||
|
unsigned int _eax;
|
||||||
|
unsigned int _ebx;
|
||||||
|
unsigned int _ecx;
|
||||||
|
unsigned int _edx;
|
||||||
|
__cpuid(info, _eax, _ebx, _ecx, _edx);
|
||||||
|
*eax = _eax;
|
||||||
|
*ebx = _ebx;
|
||||||
|
*ecx = _ecx;
|
||||||
|
*edx = _edx;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int have_clmul(void)
|
||||||
|
{
|
||||||
|
unsigned eax, ebx, ecx, edx;
|
||||||
|
int has_pclmulqdq;
|
||||||
|
int has_sse41;
|
||||||
|
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
has_pclmulqdq = ecx & 0x2; /* bit 1 */
|
||||||
|
has_sse41 = ecx & 0x80000; /* bit 19 */
|
||||||
|
|
||||||
|
return has_pclmulqdq && has_sse41;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user