Consistency of method signatures.

This commit is contained in:
2021-10-13 03:07:04 +01:00
parent 3797b44289
commit e63125ac04
14 changed files with 167 additions and 167 deletions

View File

@@ -78,7 +78,7 @@ AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data,
return 0; return 0;
} }
AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const unsigned char* data, long len) AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
{ {
uint32_t s1 = *sum1; uint32_t s1 = *sum1;
uint32_t s2 = *sum2; uint32_t s2 = *sum2;

View File

@@ -33,19 +33,19 @@ AARU_EXPORT adler32_ctx* AARU_CALL adler32_init();
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len); AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum); AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum);
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx); AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx);
AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const unsigned char* data, long len); AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ #if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len); AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len); AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
#endif #endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
AARU_EXPORT void AARU_CALL adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, uint32_t len); AARU_EXPORT void AARU_CALL adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, uint32_t len);
#endif #endif

View File

@@ -12,7 +12,7 @@
#include "adler32.h" #include "adler32.h"
#include "simd.h" #include "simd.h"
AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len) AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
{ {
uint32_t s1 = *sum1; uint32_t s1 = *sum1;
uint32_t s2 = *sum2; uint32_t s2 = *sum2;
@@ -77,7 +77,7 @@ AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, con
/* /*
* Load 32 input bytes. * Load 32 input bytes.
*/ */
const __m256i bytes = _mm256_lddqu_si256((__m256i*)(buf)); const __m256i bytes = _mm256_lddqu_si256((__m256i*)(data));
/* /*
* Add previous block byte sum to v_ps. * Add previous block byte sum to v_ps.
@@ -91,7 +91,7 @@ AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, con
const __m256i mad = _mm256_maddubs_epi16(bytes, tap); const __m256i mad = _mm256_maddubs_epi16(bytes, tap);
v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(mad, ones)); v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(mad, ones));
buf += BLOCK_SIZE; data += BLOCK_SIZE;
} while(--n); } while(--n);
__m128i sum = _mm_add_epi32(_mm256_castsi256_si128(v_s1), _mm256_extracti128_si256(v_s1, 1)); __m128i sum = _mm_add_epi32(_mm256_castsi256_si128(v_s1), _mm256_extracti128_si256(v_s1, 1));
@@ -123,25 +123,25 @@ AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, con
{ {
if(len >= 16) if(len >= 16)
{ {
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
len -= 16; len -= 16;
} }
while(len--) { s2 += (s1 += *buf++); } while(len--) { s2 += (s1 += *data++); }
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE; if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE; s2 %= ADLER_MODULE;
} }

View File

@@ -10,7 +10,7 @@
#include "adler32.h" #include "adler32.h"
#include "simd.h" #include "simd.h"
TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, uint32_t len) TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, uint32_t len)
{ {
/* /*
* Split Adler-32 into component sums. * Split Adler-32 into component sums.
@@ -20,11 +20,11 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigne
/* /*
* Serially compute s1 & s2, until the data is 16-byte aligned. * Serially compute s1 & s2, until the data is 16-byte aligned.
*/ */
if((uintptr_t)buf & 15) if((uintptr_t)data & 15)
{ {
while((uintptr_t)buf & 15) while((uintptr_t)data & 15)
{ {
s2 += (s1 += *buf++); s2 += (s1 += *data++);
--len; --len;
} }
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE; if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
@@ -60,8 +60,8 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigne
/* /*
* Load 32 input bytes. * Load 32 input bytes.
*/ */
const uint8x16_t bytes1 = vld1q_u8((uint8_t*)(buf)); const uint8x16_t bytes1 = vld1q_u8((uint8_t*)(data));
const uint8x16_t bytes2 = vld1q_u8((uint8_t*)(buf + 16)); const uint8x16_t bytes2 = vld1q_u8((uint8_t*)(data + 16));
/* /*
* Add previous block byte sum to v_s2. * Add previous block byte sum to v_s2.
*/ */
@@ -77,7 +77,7 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigne
v_column_sum_2 = vaddw_u8(v_column_sum_2, vget_high_u8(bytes1)); v_column_sum_2 = vaddw_u8(v_column_sum_2, vget_high_u8(bytes1));
v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2)); v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2));
v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2)); v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
buf += BLOCK_SIZE; data += BLOCK_SIZE;
} while(--n); } while(--n);
v_s2 = vshlq_n_u32(v_s2, 5); v_s2 = vshlq_n_u32(v_s2, 5);
/* /*
@@ -123,25 +123,25 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigne
{ {
if(len >= 16) if(len >= 16)
{ {
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
len -= 16; len -= 16;
} }
while(len--) { s2 += (s1 += *buf++); } while(len--) { s2 += (s1 += *data++); }
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE; if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE; s2 %= ADLER_MODULE;
} }

View File

@@ -51,7 +51,7 @@
#include "library.h" #include "library.h"
#include "adler32.h" #include "adler32.h"
AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len) AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
{ {
uint32_t s1 = *sum1; uint32_t s1 = *sum1;
uint32_t s2 = *sum2; uint32_t s2 = *sum2;
@@ -82,8 +82,8 @@ AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, c
/* /*
* Load 32 input bytes. * Load 32 input bytes.
*/ */
const __m128i bytes1 = _mm_loadu_si128((__m128i*)(buf)); const __m128i bytes1 = _mm_loadu_si128((__m128i*)(data));
const __m128i bytes2 = _mm_loadu_si128((__m128i*)(buf + 16)); const __m128i bytes2 = _mm_loadu_si128((__m128i*)(data + 16));
/* /*
* Add previous block byte sum to v_ps. * Add previous block byte sum to v_ps.
*/ */
@@ -98,7 +98,7 @@ AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, c
v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes2, zero)); v_s1 = _mm_add_epi32(v_s1, _mm_sad_epu8(bytes2, zero));
const __m128i mad2 = _mm_maddubs_epi16(bytes2, tap2); const __m128i mad2 = _mm_maddubs_epi16(bytes2, tap2);
v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(mad2, ones)); v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(mad2, ones));
buf += BLOCK_SIZE; data += BLOCK_SIZE;
} while(--n); } while(--n);
v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5)); v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5));
/* /*
@@ -127,25 +127,25 @@ AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, c
{ {
if(len >= 16) if(len >= 16)
{ {
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
s2 += (s1 += *buf++); s2 += (s1 += *data++);
len -= 16; len -= 16;
} }
while(len--) { s2 += (s1 += *buf++); } while(len--) { s2 += (s1 += *data++); }
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE; if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE; s2 %= ADLER_MODULE;
} }

10
crc32.c
View File

@@ -41,7 +41,7 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
if(have_clmul()) if(have_clmul())
{ {
ctx->crc = ~crc32_clmul(data, (long)len, ~ctx->crc); ctx->crc = ~crc32_clmul(~ctx->crc, data, (long)len);
return 0; return 0;
} }
@@ -58,7 +58,7 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
#endif #endif
if(have_neon()) if(have_neon())
{ {
ctx->crc = ~crc32_vmull(data, len, ~ctx->crc); ctx->crc = ~crc32_vmull(~ctx->crc, data, len);
return 0; return 0;
} }
#endif #endif
@@ -67,7 +67,7 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
return 0; return 0;
} }
AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* crc, const unsigned char* data, long len) AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t* data, long len)
{ {
// Unroll according to Intel slicing by uint8_t // Unroll according to Intel slicing by uint8_t
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf // http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
@@ -79,7 +79,7 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* crc, const unsigned char* dat
const size_t bytes_at_once = 8 * unroll; const size_t bytes_at_once = 8 * unroll;
uintptr_t unaligned_length = (4 - (((uintptr_t)current_char) & 3)) & 3; uintptr_t unaligned_length = (4 - (((uintptr_t)current_char) & 3)) & 3;
c = *crc; c = *previous_crc;
while((len != 0) && (unaligned_length != 0)) while((len != 0) && (unaligned_length != 0))
{ {
@@ -110,7 +110,7 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* crc, const unsigned char* dat
while(len-- != 0) c = (c >> 8) ^ crc32_table[0][(c & 0xFF) ^ *current_char++]; while(len-- != 0) c = (c >> 8) ^ crc32_table[0][(c & 0xFF) ^ *current_char++];
*crc = c; *previous_crc = c;
} }
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc) AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)

10
crc32.h
View File

@@ -263,18 +263,18 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init();
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len); AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc); AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc);
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx); AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx);
AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* crc, const unsigned char* data, long len); AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t* data, long len);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ #if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc); AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len);
#endif #endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
#if __ARM_ARCH >= 8 #if __ARM_ARCH >= 8
AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t crc, AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t previous_crc,
const unsigned char* buf, const uint8_t* data,
uint32_t len); uint32_t len);
#endif #endif
AARU_EXPORT TARGET_WITH_SIMD uint32_t AARU_CALL crc32_vmull(const uint8_t* src, long len, uint32_t initial_crc); AARU_EXPORT TARGET_WITH_SIMD uint32_t AARU_CALL crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len);
#endif #endif

View File

@@ -9,17 +9,17 @@
#include "library.h" #include "library.h"
#include "crc32.h" #include "crc32.h"
TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t crc, const unsigned char* buf, uint32_t len) TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const uint8_t* data, uint32_t len)
{ {
uint32_t c = (uint32_t)crc; uint32_t c = (uint32_t)previous_crc;
#if defined(__aarch64__) || defined(_M_ARM64) #if defined(__aarch64__) || defined(_M_ARM64)
while(len && ((uintptr_t)buf & 7)) while(len && ((uintptr_t)data & 7))
{ {
c = __crc32b(c, *buf++); c = __crc32b(c, *data++);
--len; --len;
} }
const uint64_t* buf8 = (const uint64_t*)buf; const uint64_t* buf8 = (const uint64_t*)data;
while(len >= 64) while(len >= 64)
{ {
c = __crc32d(c, *buf8++); c = __crc32d(c, *buf8++);
@@ -38,7 +38,7 @@ TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t crc, const unsigned c
len -= 8; len -= 8;
} }
buf = (const unsigned char*)buf8; data = (const uint8_t*)buf8;
#else // AARCH64 #else // AARCH64
while(len && ((uintptr_t)buf & 3)) while(len && ((uintptr_t)buf & 3))
{ {
@@ -64,10 +64,10 @@ TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t crc, const unsigned c
len -= 4; len -= 4;
} }
buf = (const unsigned char*)buf4; buf = (const uint8_t*)buf4;
#endif #endif
while(len--) { c = __crc32b(c, *buf++); } while(len--) { c = __crc32b(c, *data++); }
return c; return c;
} }
#endif #endif

View File

@@ -237,11 +237,11 @@ static void partial_fold(const size_t len,
*/ */
#define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial)) #define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial))
AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc) AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len)
{ {
unsigned long algn_diff; unsigned long algn_diff;
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
__m128i xmm_initial = _mm_cvtsi32_si128(initial_crc); __m128i xmm_initial = _mm_cvtsi32_si128(previous_crc);
__m128i xmm_crc0 = _mm_cvtsi32_si128(0x9db42487); __m128i xmm_crc0 = _mm_cvtsi32_si128(0x9db42487);
__m128i xmm_crc1 = _mm_setzero_si128(); __m128i xmm_crc1 = _mm_setzero_si128();
__m128i xmm_crc2 = _mm_setzero_si128(); __m128i xmm_crc2 = _mm_setzero_si128();
@@ -259,34 +259,34 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u
if(len < 16) if(len < 16)
{ {
if(len == 0) return initial_crc; if(len == 0) return previous_crc;
if(len < 4) if(len < 4)
{ {
/* /*
* no idea how to do this for <4 bytes, delegate to classic impl. * no idea how to do this for <4 bytes, delegate to classic impl.
*/ */
uint32_t crc = ~initial_crc; uint32_t crc = ~previous_crc;
switch(len) switch(len)
{ {
case 3: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *src++]; case 3: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
case 2: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *src++]; case 2: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
case 1: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *src++]; case 1: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
} }
return ~crc; return ~crc;
} }
xmm_crc_part = _mm_loadu_si128((__m128i*)src); xmm_crc_part = _mm_loadu_si128((__m128i*)data);
XOR_INITIAL(xmm_crc_part); XOR_INITIAL(xmm_crc_part);
goto partial; goto partial;
} }
/* this alignment computation would be wrong for len<16 handled above */ /* this alignment computation would be wrong for len<16 handled above */
algn_diff = (0 - (uintptr_t)src) & 0xF; algn_diff = (0 - (uintptr_t)data) & 0xF;
if(algn_diff) if(algn_diff)
{ {
xmm_crc_part = _mm_loadu_si128((__m128i*)src); xmm_crc_part = _mm_loadu_si128((__m128i*)data);
XOR_INITIAL(xmm_crc_part); XOR_INITIAL(xmm_crc_part);
src += algn_diff; data += algn_diff;
len -= algn_diff; len -= algn_diff;
partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part); partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
@@ -294,10 +294,10 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u
while((len -= 64) >= 0) while((len -= 64) >= 0)
{ {
xmm_t0 = _mm_load_si128((__m128i*)src); xmm_t0 = _mm_load_si128((__m128i*)data);
xmm_t1 = _mm_load_si128((__m128i*)src + 1); xmm_t1 = _mm_load_si128((__m128i*)data + 1);
xmm_t2 = _mm_load_si128((__m128i*)src + 2); xmm_t2 = _mm_load_si128((__m128i*)data + 2);
xmm_t3 = _mm_load_si128((__m128i*)src + 3); xmm_t3 = _mm_load_si128((__m128i*)data + 3);
XOR_INITIAL(xmm_t0); XOR_INITIAL(xmm_t0);
@@ -308,7 +308,7 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u
xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2); xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3); xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3);
src += 64; data += 64;
} }
/* /*
@@ -318,9 +318,9 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u
{ {
len += 16; len += 16;
xmm_t0 = _mm_load_si128((__m128i*)src); xmm_t0 = _mm_load_si128((__m128i*)data);
xmm_t1 = _mm_load_si128((__m128i*)src + 1); xmm_t1 = _mm_load_si128((__m128i*)data + 1);
xmm_t2 = _mm_load_si128((__m128i*)src + 2); xmm_t2 = _mm_load_si128((__m128i*)data + 2);
XOR_INITIAL(xmm_t0); XOR_INITIAL(xmm_t0);
@@ -332,14 +332,14 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u
if(len == 0) goto done; if(len == 0) goto done;
xmm_crc_part = _mm_load_si128((__m128i*)src + 3); xmm_crc_part = _mm_load_si128((__m128i*)data + 3);
} }
else if(len + 32 >= 0) else if(len + 32 >= 0)
{ {
len += 32; len += 32;
xmm_t0 = _mm_load_si128((__m128i*)src); xmm_t0 = _mm_load_si128((__m128i*)data);
xmm_t1 = _mm_load_si128((__m128i*)src + 1); xmm_t1 = _mm_load_si128((__m128i*)data + 1);
XOR_INITIAL(xmm_t0); XOR_INITIAL(xmm_t0);
@@ -350,13 +350,13 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u
if(len == 0) goto done; if(len == 0) goto done;
xmm_crc_part = _mm_load_si128((__m128i*)src + 2); xmm_crc_part = _mm_load_si128((__m128i*)data + 2);
} }
else if(len + 48 >= 0) else if(len + 48 >= 0)
{ {
len += 48; len += 48;
xmm_t0 = _mm_load_si128((__m128i*)src); xmm_t0 = _mm_load_si128((__m128i*)data);
XOR_INITIAL(xmm_t0); XOR_INITIAL(xmm_t0);
@@ -366,13 +366,13 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(const uint8_t* src, long len, u
if(len == 0) goto done; if(len == 0) goto done;
xmm_crc_part = _mm_load_si128((__m128i*)src + 1); xmm_crc_part = _mm_load_si128((__m128i*)data + 1);
} }
else else
{ {
len += 64; len += 64;
if(len == 0) goto done; if(len == 0) goto done;
xmm_crc_part = _mm_load_si128((__m128i*)src); xmm_crc_part = _mm_load_si128((__m128i*)data);
XOR_INITIAL(xmm_crc_part); XOR_INITIAL(xmm_crc_part);
} }

View File

@@ -212,14 +212,14 @@ TARGET_WITH_SIMD FORCE_INLINE void partial_fold(const size_t len,
*q_crc3 = vreinterpretq_u64_u32(ps_res); *q_crc3 = vreinterpretq_u64_u32(ps_res);
} }
TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t initial_crc) TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len)
{ {
unsigned long algn_diff; unsigned long algn_diff;
uint64x2_t q_t0; uint64x2_t q_t0;
uint64x2_t q_t1; uint64x2_t q_t1;
uint64x2_t q_t2; uint64x2_t q_t2;
uint64x2_t q_t3; uint64x2_t q_t3;
uint64x2_t q_initial = vreinterpretq_u64_u32(vsetq_lane_u32(initial_crc, vdupq_n_u32(0), 0)); uint64x2_t q_initial = vreinterpretq_u64_u32(vsetq_lane_u32(previous_crc, vdupq_n_u32(0), 0));
uint64x2_t q_crc0 = vreinterpretq_u64_u32(vsetq_lane_u32(0x9db42487, vdupq_n_u32(0), 0)); uint64x2_t q_crc0 = vreinterpretq_u64_u32(vsetq_lane_u32(0x9db42487, vdupq_n_u32(0), 0));
uint64x2_t q_crc1 = vreinterpretq_u64_u32(vdupq_n_u32(0)); uint64x2_t q_crc1 = vreinterpretq_u64_u32(vdupq_n_u32(0));
uint64x2_t q_crc2 = vreinterpretq_u64_u32(vdupq_n_u32(0)); uint64x2_t q_crc2 = vreinterpretq_u64_u32(vdupq_n_u32(0));
@@ -240,34 +240,34 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t ini
if(len < 16) if(len < 16)
{ {
if(len == 0) return initial_crc; if(len == 0) return previous_crc;
if(len < 4) if(len < 4)
{ {
/* /*
* no idea how to do this for <4 bytes, delegate to classic impl. * no idea how to do this for <4 bytes, delegate to classic impl.
*/ */
uint32_t crc = ~initial_crc; uint32_t crc = ~previous_crc;
switch(len) switch(len)
{ {
case 3: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *src++]; case 3: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
case 2: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *src++]; case 2: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
case 1: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *src++]; case 1: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
} }
return ~crc; return ~crc;
} }
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src)); q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
XOR_INITIAL(q_crc_part); XOR_INITIAL(q_crc_part);
goto partial; goto partial;
} }
/* this alignment computation would be wrong for len<16 handled above */ /* this alignment computation would be wrong for len<16 handled above */
algn_diff = (0 - (uintptr_t)src) & 0xF; algn_diff = (0 - (uintptr_t)data) & 0xF;
if(algn_diff) if(algn_diff)
{ {
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src)); q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
XOR_INITIAL(q_crc_part); XOR_INITIAL(q_crc_part);
src += algn_diff; data += algn_diff;
len -= algn_diff; len -= algn_diff;
partial_fold(algn_diff, &q_crc0, &q_crc1, &q_crc2, &q_crc3, &q_crc_part); partial_fold(algn_diff, &q_crc0, &q_crc1, &q_crc2, &q_crc3, &q_crc_part);
@@ -275,10 +275,10 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t ini
while((len -= 64) >= 0) while((len -= 64) >= 0)
{ {
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src)); q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 4)); q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 8)); q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 8));
q_t3 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 12)); q_t3 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 12));
XOR_INITIAL(q_t0); XOR_INITIAL(q_t0);
@@ -289,7 +289,7 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t ini
q_crc2 = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(q_crc2), vreinterpretq_u32_u64(q_t2))); q_crc2 = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(q_crc2), vreinterpretq_u32_u64(q_t2)));
q_crc3 = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(q_crc3), vreinterpretq_u32_u64(q_t3))); q_crc3 = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(q_crc3), vreinterpretq_u32_u64(q_t3)));
src += 64; data += 64;
} }
/* /*
@@ -299,9 +299,9 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t ini
{ {
len += 16; len += 16;
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src)); q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 4)); q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 8)); q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 8));
XOR_INITIAL(q_t0); XOR_INITIAL(q_t0);
@@ -313,14 +313,14 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t ini
if(len == 0) goto done; if(len == 0) goto done;
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 12)); q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 12));
} }
else if(len + 32 >= 0) else if(len + 32 >= 0)
{ {
len += 32; len += 32;
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src)); q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 4)); q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
XOR_INITIAL(q_t0); XOR_INITIAL(q_t0);
@@ -331,13 +331,13 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t ini
if(len == 0) goto done; if(len == 0) goto done;
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 8)); q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 8));
} }
else if(len + 48 >= 0) else if(len + 48 >= 0)
{ {
len += 48; len += 48;
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src)); q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
XOR_INITIAL(q_t0); XOR_INITIAL(q_t0);
@@ -347,13 +347,13 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(const uint8_t* src, long len, uint32_t ini
if(len == 0) goto done; if(len == 0) goto done;
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src + 4)); q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
} }
else else
{ {
len += 64; len += 64;
if(len == 0) goto done; if(len == 0) goto done;
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)src)); q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
XOR_INITIAL(q_crc_part); XOR_INITIAL(q_crc_part);
} }

View File

@@ -64,9 +64,9 @@ AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint
return 0; return 0;
} }
AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* crc, const uint8_t* data, uint32_t len) AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t* data, uint32_t len)
{ {
uint64_t c = *crc; uint64_t c = *previous_crc;
if(len > 4) if(len > 4)
{ {
@@ -93,7 +93,7 @@ AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* crc, const uint8_t* data, uin
while(len-- != 0) c = crc64_table[0][*data++ ^ ((c)&0xFF)] ^ ((c) >> 8); while(len-- != 0) c = crc64_table[0][*data++ ^ ((c)&0xFF)] ^ ((c) >> 8);
*crc = c; *previous_crc = c;
} }
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc) AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)

View File

@@ -238,7 +238,7 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init();
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len); AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc); AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc);
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx); AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx);
AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* crc, const uint8_t* data, uint32_t len); AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t* data, uint32_t len);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ #if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
@@ -246,5 +246,5 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
#endif #endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t crc, const uint8_t* data, long length); AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t* data, long len);
#endif #endif

View File

@@ -35,7 +35,7 @@ TARGET_WITH_SIMD FORCE_INLINE uint64x2_t fold(uint64x2_t in, uint64x2_t foldCons
sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants))); sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants)));
} }
AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t crc, const uint8_t* data, long length) AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t* data, long len)
{ {
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1; const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
const uint64_t k2 = 0xdabe95afc7875f40; // bitReflect(expMod65(128, poly, 1)) << 1; const uint64_t k2 = 0xdabe95afc7875f40; // bitReflect(expMod65(128, poly, 1)) << 1;
@@ -45,7 +45,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t crc, const
const uint64x2_t foldConstants1 = vcombine_u64(vcreate_u64(k1), vcreate_u64(k2)); const uint64x2_t foldConstants1 = vcombine_u64(vcreate_u64(k1), vcreate_u64(k2));
const uint64x2_t foldConstants2 = vcombine_u64(vcreate_u64(mu), vcreate_u64(p)); const uint64x2_t foldConstants2 = vcombine_u64(vcreate_u64(mu), vcreate_u64(p));
const uint8_t* end = data + length; const uint8_t* end = data + len;
// Align pointers // Align pointers
const uint64x2_t* alignedData = (const uint64x2_t*)((uintptr_t)data & ~(uintptr_t)15); const uint64x2_t* alignedData = (const uint64x2_t*)((uintptr_t)data & ~(uintptr_t)15);
@@ -66,14 +66,14 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t crc, const
vreinterpretq_u8_u64(b), vreinterpretq_u8_u64(b),
vreinterpretq_u8_u64(a))); vreinterpretq_u8_u64(a)));
const uint64x2_t initialCrc = vsetq_lane_u64(~crc, vdupq_n_u64(0), 0); const uint64x2_t initialCrc = vsetq_lane_u64(~previous_crc, vdupq_n_u64(0), 0);
uint64x2_t R; uint64x2_t R;
if(alignedLength == 1) if(alignedLength == 1)
{ {
// Single data block, initial CRC possibly bleeds into zero padding // Single data block, initial CRC possibly bleeds into zero padding
uint64x2_t crc0, crc1; uint64x2_t crc0, crc1;
shiftRight128(initialCrc, 16 - length, &crc0, &crc1); shiftRight128(initialCrc, 16 - len, &crc0, &crc1);
uint64x2_t A, B; uint64x2_t A, B;
shiftRight128(data0, leadOutSize, &A, &B); shiftRight128(data0, leadOutSize, &A, &B);
@@ -86,11 +86,11 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t crc, const
{ {
const uint64x2_t data1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(alignedData + 1))); const uint64x2_t data1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(alignedData + 1)));
if(length < 8) if(len < 8)
{ {
// Initial CRC bleeds into the zero padding // Initial CRC bleeds into the zero padding
uint64x2_t crc0, crc1; uint64x2_t crc0, crc1;
shiftRight128(initialCrc, 16 - length, &crc0, &crc1); shiftRight128(initialCrc, 16 - len, &crc0, &crc1);
uint64x2_t A, B, C, D; uint64x2_t A, B, C, D;
shiftRight128(data0, leadOutSize, &A, &B); shiftRight128(data0, leadOutSize, &A, &B);
@@ -117,7 +117,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t crc, const
else else
{ {
alignedData++; alignedData++;
length -= 16 - leadInSize; len -= 16 - leadInSize;
// Initial CRC can simply be added to data // Initial CRC can simply be added to data
uint64x2_t crc0, crc1; uint64x2_t crc0, crc1;
@@ -125,17 +125,17 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t crc, const
uint64x2_t accumulator = veorq_u64(fold(veorq_u64(crc0, data0), foldConstants1), crc1); uint64x2_t accumulator = veorq_u64(fold(veorq_u64(crc0, data0), foldConstants1), crc1);
while(length >= 32) while(len >= 32)
{ {
accumulator = fold(veorq_u64(vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)), accumulator), accumulator = fold(veorq_u64(vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)), accumulator),
foldConstants1); foldConstants1);
length -= 16; len -= 16;
alignedData++; alignedData++;
} }
uint64x2_t P; uint64x2_t P;
if(length == 16) P = veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData))); if(len == 16) P = veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)));
else else
{ {
const uint64x2_t end0 = const uint64x2_t end0 =

View File

@@ -211,7 +211,7 @@ TEST_F(crc32Fixture, crc32_clmul)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_clmul(buffer, 1048576, ~crc); crc = ~crc32_clmul(~crc, buffer, 1048576);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -224,7 +224,7 @@ TEST_F(crc32Fixture, crc32_clmul_misaligned)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_clmul(buffer_misaligned+1, 1048576, ~crc); crc = ~crc32_clmul(~crc, buffer_misaligned + 1, 1048576);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -237,7 +237,7 @@ TEST_F(crc32Fixture, crc32_clmul_15bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_clmul(buffer, 15, ~crc); crc = ~crc32_clmul(~crc, buffer, 15);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -250,7 +250,7 @@ TEST_F(crc32Fixture, crc32_clmul_31bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_clmul(buffer, 31, ~crc); crc = ~crc32_clmul(~crc, buffer, 31);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -263,7 +263,7 @@ TEST_F(crc32Fixture, crc32_clmul_63bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_clmul(buffer, 63, ~crc); crc = ~crc32_clmul(~crc, buffer, 63);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -276,7 +276,7 @@ TEST_F(crc32Fixture, crc32_clmul_2352bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_clmul(buffer, 2352, ~crc); crc = ~crc32_clmul(~crc, buffer, 2352);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -371,7 +371,7 @@ TEST_F(crc32Fixture, crc32_vmull)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_vmull(buffer, 1048576, ~crc); crc = ~crc32_vmull(~crc, buffer, 1048576);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -384,7 +384,7 @@ TEST_F(crc32Fixture, crc32_vmull_misaligned)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_vmull(buffer_misaligned+1, 1048576, ~crc); crc = ~crc32_vmull(~crc, buffer_misaligned + 1, 1048576);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -397,7 +397,7 @@ TEST_F(crc32Fixture, crc32_vmull_15bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_vmull(buffer, 15, ~crc); crc = ~crc32_vmull(~crc, buffer, 15);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -410,7 +410,7 @@ TEST_F(crc32Fixture, crc32_vmull_31bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_vmull(buffer, 31, ~crc); crc = ~crc32_vmull(~crc, buffer, 31);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -423,7 +423,7 @@ TEST_F(crc32Fixture, crc32_vmull_63bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_vmull(buffer, 63, ~crc); crc = ~crc32_vmull(~crc, buffer, 63);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;
@@ -436,7 +436,7 @@ TEST_F(crc32Fixture, crc32_vmull_2352bytes)
uint32_t crc = CRC32_ISO_SEED; uint32_t crc = CRC32_ISO_SEED;
crc = ~crc32_vmull(buffer, 2352, ~crc); crc = ~crc32_vmull(~crc, buffer, 2352);
crc ^= CRC32_ISO_SEED; crc ^= CRC32_ISO_SEED;