mirror of
https://github.com/aaru-dps/Aaru.Checksums.Native.git
synced 2025-12-16 19:24:29 +00:00
Refactor and reformat.
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
* https://github.com/rawrunprotected/crc
|
||||
*/
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
#include <inttypes.h>
|
||||
@@ -14,7 +14,9 @@
|
||||
#include <wmmintrin.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include "library.h"
|
||||
@@ -54,26 +56,26 @@ static uint64_t div129by65(uint64_t poly)
|
||||
}
|
||||
|
||||
static const uint8_t shuffleMasks[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
|
||||
};
|
||||
|
||||
CLMUL static void shiftRight128(__m128i in, size_t n, __m128i* outLeft, __m128i* outRight)
|
||||
TARGET_WITH_CLMUL static void shiftRight128(__m128i in, size_t n, __m128i *outLeft, __m128i *outRight)
|
||||
{
|
||||
const __m128i maskA = _mm_loadu_si128((const __m128i*)(shuffleMasks + (16 - n)));
|
||||
const __m128i maskA = _mm_loadu_si128((const __m128i *)(shuffleMasks + (16 - n)));
|
||||
const __m128i maskB = _mm_xor_si128(maskA, _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128()));
|
||||
|
||||
*outLeft = _mm_shuffle_epi8(in, maskB);
|
||||
*outRight = _mm_shuffle_epi8(in, maskA);
|
||||
}
|
||||
|
||||
CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
|
||||
TARGET_WITH_CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
|
||||
{
|
||||
return _mm_xor_si128(_mm_clmulepi64_si128(in, foldConstants, 0x00), _mm_clmulepi64_si128(in, foldConstants, 0x11));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Calculate the CRC-64 checksum using CLMUL instruction extension.
|
||||
* @brief Calculate the CRC-64 checksum using TARGET_WITH_CLMUL instruction extension.
|
||||
*
|
||||
* @param previous_crc The previously calculated CRC-64 checksum.
|
||||
* @param data Pointer to the input data buffer.
|
||||
@@ -81,7 +83,7 @@ CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
|
||||
*
|
||||
* @return The calculated CRC-64 checksum.
|
||||
*/
|
||||
AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* data, long length)
|
||||
AARU_EXPORT TARGET_WITH_CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t *data, long length)
|
||||
{
|
||||
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
|
||||
const uint64_t k2 = 0xdabe95afc7875f40; // bitReflect(expMod65(128, poly, 1)) << 1;
|
||||
@@ -91,18 +93,18 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
const __m128i foldConstants1 = _mm_set_epi64x(k2, k1);
|
||||
const __m128i foldConstants2 = _mm_set_epi64x(p, mu);
|
||||
|
||||
const uint8_t* end = data + length;
|
||||
const uint8_t *end = data + length;
|
||||
|
||||
// Align pointers
|
||||
const __m128i* alignedData = (const __m128i*)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const __m128i* alignedEnd = (const __m128i*)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
const __m128i *alignedData = (const __m128i *)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const __m128i *alignedEnd = (const __m128i *)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
|
||||
const size_t leadInSize = data - (const uint8_t*)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t*)alignedEnd - end;
|
||||
const size_t leadInSize = data - (const uint8_t *)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t *)alignedEnd - end;
|
||||
|
||||
const size_t alignedLength = alignedEnd - alignedData;
|
||||
|
||||
const __m128i leadInMask = _mm_loadu_si128((const __m128i*)(shuffleMasks + (16 - leadInSize)));
|
||||
const __m128i leadInMask = _mm_loadu_si128((const __m128i *)(shuffleMasks + (16 - leadInSize)));
|
||||
const __m128i data0 = _mm_blendv_epi8(_mm_setzero_si128(), _mm_load_si128(alignedData), leadInMask);
|
||||
|
||||
#if defined(_WIN64)
|
||||
@@ -122,7 +124,7 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
shiftRight128(data0, leadOutSize, &A, &B);
|
||||
|
||||
const __m128i P = _mm_xor_si128(A, crc0);
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
|
||||
_mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
|
||||
}
|
||||
else if(alignedLength == 2)
|
||||
@@ -140,7 +142,7 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
shiftRight128(data1, leadOutSize, &C, &D);
|
||||
|
||||
const __m128i P = _mm_xor_si128(_mm_xor_si128(B, C), crc0);
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
|
||||
_mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
|
||||
}
|
||||
else
|
||||
@@ -154,7 +156,7 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
shiftRight128(_mm_xor_si128(data1, crc1), leadOutSize, &C, &D);
|
||||
|
||||
const __m128i P = _mm_xor_si128(fold(A, foldConstants1), _mm_xor_si128(B, C));
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10), _mm_srli_si128(P, 8));
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10), _mm_srli_si128(P, 8));
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -177,7 +179,8 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
}
|
||||
|
||||
__m128i P;
|
||||
if(length == 16) { P = _mm_xor_si128(accumulator, _mm_load_si128(alignedData)); }
|
||||
if(length == 16)
|
||||
{ P = _mm_xor_si128(accumulator, _mm_load_si128(alignedData)); }
|
||||
else
|
||||
{
|
||||
const __m128i end0 = _mm_xor_si128(accumulator, _mm_load_si128(alignedData));
|
||||
@@ -196,7 +199,9 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
// Final Barrett reduction
|
||||
const __m128i T1 = _mm_clmulepi64_si128(R, foldConstants2, 0x00);
|
||||
const __m128i T2 =
|
||||
_mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(T1, foldConstants2, 0x10), _mm_slli_si128(T1, 8)), R);
|
||||
_mm_xor_si128(
|
||||
_mm_xor_si128(_mm_clmulepi64_si128(T1, foldConstants2, 0x10), _mm_slli_si128(T1, 8)),
|
||||
R);
|
||||
|
||||
#if defined(_WIN64)
|
||||
return ~_mm_extract_epi64(T2, 1);
|
||||
|
||||
Reference in New Issue
Block a user