mirror of
https://github.com/aaru-dps/Aaru.Checksums.Native.git
synced 2025-12-16 11:14:29 +00:00
Refactor and reformat.
This commit is contained in:
99
adler32.c
99
adler32.c
@@ -42,9 +42,9 @@ AARU_EXPORT adler32_ctx *AARU_CALL adler32_init()
|
||||
{
|
||||
adler32_ctx *ctx;
|
||||
|
||||
ctx = (adler32_ctx *) malloc(sizeof(adler32_ctx));
|
||||
ctx = (adler32_ctx *)malloc(sizeof(adler32_ctx));
|
||||
|
||||
if (!ctx) return NULL;
|
||||
if(!ctx) return NULL;
|
||||
|
||||
ctx->sum1 = 1;
|
||||
ctx->sum2 = 0;
|
||||
@@ -65,10 +65,10 @@ AARU_EXPORT adler32_ctx *AARU_CALL adler32_init()
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
if (!ctx || !data) return -1;
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || ((defined(__arm__) || defined(_M_ARM)) && !defined(__MINGW32__))
|
||||
if (have_neon())
|
||||
if(have_neon())
|
||||
{
|
||||
adler32_neon(&ctx->sum1, &ctx->sum2, data, len);
|
||||
|
||||
@@ -110,16 +110,15 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
|
||||
{
|
||||
uint32_t s1 = *sum1;
|
||||
uint32_t s2 = *sum2;
|
||||
|
||||
unsigned n;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if (len == 1)
|
||||
if(len == 1)
|
||||
{
|
||||
s1 += data[0];
|
||||
if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
s2 += s1;
|
||||
if (s2 >= ADLER_MODULE) s2 -= ADLER_MODULE;
|
||||
if(s2 >= ADLER_MODULE) s2 -= ADLER_MODULE;
|
||||
|
||||
*sum1 = s1 & 0xFFFF;
|
||||
*sum2 = s2 & 0xFFFF;
|
||||
@@ -128,14 +127,14 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
|
||||
}
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if (len < 16)
|
||||
if(len < 16)
|
||||
{
|
||||
while (len--)
|
||||
while(len--)
|
||||
{
|
||||
s1 += *data++;
|
||||
s2 += s1;
|
||||
}
|
||||
if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
s2 %= ADLER_MODULE; /* only added so many ADLER_MODULE's */
|
||||
*sum1 = s1 & 0xFFFF;
|
||||
*sum2 = s2 & 0xFFFF;
|
||||
@@ -144,95 +143,95 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
|
||||
}
|
||||
|
||||
/* do length NMAX blocks -- requires just one modulo operation */
|
||||
while (len >= NMAX)
|
||||
while(len >= NMAX)
|
||||
{
|
||||
len -= NMAX;
|
||||
n = NMAX / 16; /* NMAX is divisible by 16 */
|
||||
do
|
||||
{
|
||||
s1 += (data)[0];
|
||||
s1 += data[0];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 1];
|
||||
s1 += data[0 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 2];
|
||||
s1 += data[0 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 2 + 1];
|
||||
s1 += data[0 + 2 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4];
|
||||
s1 += data[0 + 4];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4 + 1];
|
||||
s1 += data[0 + 4 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4 + 2];
|
||||
s1 += data[0 + 4 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4 + 2 + 1];
|
||||
s1 += data[0 + 4 + 2 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8];
|
||||
s1 += data[8];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 1];
|
||||
s1 += data[8 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 2];
|
||||
s1 += data[8 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 2 + 1];
|
||||
s1 += data[8 + 2 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4];
|
||||
s1 += data[8 + 4];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4 + 1];
|
||||
s1 += data[8 + 4 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4 + 2];
|
||||
s1 += data[8 + 4 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4 + 2 + 1];
|
||||
s1 += data[8 + 4 + 2 + 1];
|
||||
s2 += s1;
|
||||
|
||||
/* 16 sums unrolled */
|
||||
data += 16;
|
||||
}
|
||||
while (--n);
|
||||
while(--n);
|
||||
s1 %= ADLER_MODULE;
|
||||
s2 %= ADLER_MODULE;
|
||||
}
|
||||
|
||||
/* do remaining bytes (less than NMAX, still just one modulo) */
|
||||
if (len)
|
||||
if(len)
|
||||
{ /* avoid modulos if none remaining */
|
||||
while (len >= 16)
|
||||
while(len >= 16)
|
||||
{
|
||||
len -= 16;
|
||||
s1 += (data)[0];
|
||||
s1 += data[0];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 1];
|
||||
s1 += data[0 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 2];
|
||||
s1 += data[0 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 2 + 1];
|
||||
s1 += data[0 + 2 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4];
|
||||
s1 += data[0 + 4];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4 + 1];
|
||||
s1 += data[0 + 4 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4 + 2];
|
||||
s1 += data[0 + 4 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[0 + 4 + 2 + 1];
|
||||
s1 += data[0 + 4 + 2 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8];
|
||||
s1 += data[8];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 1];
|
||||
s1 += data[8 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 2];
|
||||
s1 += data[8 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 2 + 1];
|
||||
s1 += data[8 + 2 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4];
|
||||
s1 += data[8 + 4];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4 + 1];
|
||||
s1 += data[8 + 4 + 1];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4 + 2];
|
||||
s1 += data[8 + 4 + 2];
|
||||
s2 += s1;
|
||||
s1 += (data)[8 + 4 + 2 + 1];
|
||||
s1 += data[8 + 4 + 2 + 1];
|
||||
s2 += s1;
|
||||
|
||||
data += 16;
|
||||
}
|
||||
while (len--)
|
||||
while(len--)
|
||||
{
|
||||
s1 += *data++;
|
||||
s2 += s1;
|
||||
@@ -258,7 +257,7 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx *ctx, uint32_t *checksum)
|
||||
{
|
||||
if (!ctx) return -1;
|
||||
if(!ctx) return -1;
|
||||
|
||||
*checksum = (ctx->sum2 << 16) | ctx->sum1;
|
||||
return 0;
|
||||
@@ -274,7 +273,7 @@ AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx *ctx, uint32_t *checksum)
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx *ctx)
|
||||
{
|
||||
if (!ctx) return;
|
||||
if(!ctx) return;
|
||||
|
||||
free(ctx);
|
||||
}
|
||||
@@ -38,8 +38,9 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
|
||||
AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
|
||||
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL
|
||||
adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len);
|
||||
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL adler32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -33,16 +33,16 @@
|
||||
#include "simd.h"
|
||||
|
||||
/**
|
||||
* @brief Calculate Adler-32 checksum for a given data using AVX2 instructions.
|
||||
* @brief Calculate Adler-32 checksum for a given data using TARGET_WITH_AVX2 instructions.
|
||||
*
|
||||
* This function calculates the Adler-32 checksum for a block of data using AVX2 vector instructions.
|
||||
* This function calculates the Adler-32 checksum for a block of data using TARGET_WITH_AVX2 vector instructions.
|
||||
*
|
||||
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
|
||||
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
|
||||
* @param data Pointer to the data buffer.
|
||||
* @param len Length of the data buffer in bytes.
|
||||
*/
|
||||
AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
|
||||
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL adler32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
|
||||
{
|
||||
uint32_t s1 = *sum1;
|
||||
uint32_t s2 = *sum2;
|
||||
@@ -103,11 +103,12 @@ AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, con
|
||||
__m256i v_ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (s1 * n));
|
||||
__m256i v_s2 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s2);
|
||||
__m256i v_s1 = _mm256_setzero_si256();
|
||||
do {
|
||||
do
|
||||
{
|
||||
/*
|
||||
* Load 32 input bytes.
|
||||
*/
|
||||
const __m256i bytes = _mm256_lddqu_si256((__m256i*)(data));
|
||||
const __m256i bytes = _mm256_lddqu_si256((__m256i *)(data));
|
||||
|
||||
/*
|
||||
* Add previous block byte sum to v_ps.
|
||||
@@ -122,7 +123,8 @@ AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, con
|
||||
v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(mad, ones));
|
||||
|
||||
data += BLOCK_SIZE;
|
||||
} while(--n);
|
||||
}
|
||||
while(--n);
|
||||
|
||||
__m128i sum = _mm_add_epi32(_mm256_castsi256_si128(v_s1), _mm256_extracti128_si256(v_s1, 1));
|
||||
__m128i hi = _mm_unpackhi_epi64(sum, sum);
|
||||
@@ -171,7 +173,8 @@ AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, con
|
||||
s2 += (s1 += *data++);
|
||||
len -= 16;
|
||||
}
|
||||
while(len--) { s2 += (s1 += *data++); }
|
||||
while(len--)
|
||||
{ s2 += (s1 += *data++); }
|
||||
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
s2 %= ADLER_MODULE;
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
* @param data Pointer to the data buffer.
|
||||
* @param len Length of the data buffer in bytes.
|
||||
*/
|
||||
TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len)
|
||||
TARGET_WITH_NEON void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
/*
|
||||
* Split Adler-32 into component sums.
|
||||
@@ -58,14 +58,14 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
|
||||
/*
|
||||
* Serially compute s1 & s2, until the data is 16-byte aligned.
|
||||
*/
|
||||
if ((uintptr_t) data & 15)
|
||||
if((uintptr_t)data & 15)
|
||||
{
|
||||
while ((uintptr_t) data & 15)
|
||||
while((uintptr_t)data & 15)
|
||||
{
|
||||
s2 += (s1 += *data++);
|
||||
--len;
|
||||
}
|
||||
if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
s2 %= ADLER_MODULE;
|
||||
}
|
||||
/*
|
||||
@@ -74,10 +74,10 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
|
||||
const unsigned BLOCK_SIZE = 1 << 5;
|
||||
uint32_t blocks = len / BLOCK_SIZE;
|
||||
len -= blocks * BLOCK_SIZE;
|
||||
while (blocks)
|
||||
while(blocks)
|
||||
{
|
||||
unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */
|
||||
if (n > blocks) n = (unsigned) blocks;
|
||||
if(n > blocks) n = (unsigned)blocks;
|
||||
blocks -= n;
|
||||
/*
|
||||
* Process n blocks of data. At most NMAX data bytes can be
|
||||
@@ -87,8 +87,8 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
|
||||
uint32x4_t v_s2 = {.n128_u32 = {0, 0, 0, s1 * n}};
|
||||
uint32x4_t v_s1 = {.n128_u32 = {0, 0, 0, 0}};
|
||||
#else
|
||||
uint32x4_t v_s2 = (uint32x4_t) {0, 0, 0, s1 * n};
|
||||
uint32x4_t v_s1 = (uint32x4_t) {0, 0, 0, 0};
|
||||
uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n};
|
||||
uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0};
|
||||
#endif
|
||||
uint16x8_t v_column_sum_1 = vdupq_n_u16(0);
|
||||
uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
|
||||
@@ -99,8 +99,8 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
|
||||
/*
|
||||
* Load 32 input bytes.
|
||||
*/
|
||||
const uint8x16_t bytes1 = vld1q_u8((uint8_t *) (data));
|
||||
const uint8x16_t bytes2 = vld1q_u8((uint8_t *) (data + 16));
|
||||
const uint8x16_t bytes1 = vld1q_u8((uint8_t *)(data));
|
||||
const uint8x16_t bytes2 = vld1q_u8((uint8_t *)(data + 16));
|
||||
/*
|
||||
* Add previous block byte sum to v_s2.
|
||||
*/
|
||||
@@ -117,47 +117,48 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
|
||||
v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2));
|
||||
v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
|
||||
data += BLOCK_SIZE;
|
||||
} while (--n);
|
||||
}
|
||||
while(--n);
|
||||
v_s2 = vshlq_n_u32(v_s2, 5);
|
||||
/*
|
||||
* Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_ARM64
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]) {32, 31, 30, 29}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]) {28, 27, 26, 25}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {24, 23, 22, 21}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {20, 19, 18, 17}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {16, 15, 14, 13}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {12, 11, 10, 9}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {8, 7, 6, 5}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {4, 3, 2, 1}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){32, 31, 30, 29}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){28, 27, 26, 25}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){24, 23, 22, 21}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){20, 19, 18, 17}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){16, 15, 14, 13}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){12, 11, 10, 9}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){8, 7, 6, 5}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){4, 3, 2, 1}));
|
||||
#else
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), vld1_u16(((uint16_t[]) {32, 31, 30, 29})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), vld1_u16(((uint16_t[]) {28, 27, 26, 25})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), vld1_u16(((uint16_t[]) {24, 23, 22, 21})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), vld1_u16(((uint16_t[]) {20, 19, 18, 17})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), vld1_u16(((uint16_t[]) {16, 15, 14, 13})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), vld1_u16(((uint16_t[]) {12, 11, 10, 9})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), vld1_u16(((uint16_t[]) {8, 7, 6, 5})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), vld1_u16(((uint16_t[]) {4, 3, 2, 1})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), vld1_u16(((uint16_t[]){32, 31, 30, 29})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), vld1_u16(((uint16_t[]){28, 27, 26, 25})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), vld1_u16(((uint16_t[]){24, 23, 22, 21})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), vld1_u16(((uint16_t[]){20, 19, 18, 17})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), vld1_u16(((uint16_t[]){16, 15, 14, 13})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), vld1_u16(((uint16_t[]){12, 11, 10, 9})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), vld1_u16(((uint16_t[]){8, 7, 6, 5})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), vld1_u16(((uint16_t[]){4, 3, 2, 1})));
|
||||
#endif
|
||||
#else
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t) {32, 31, 30, 29});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t) {28, 27, 26, 25});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t) {24, 23, 22, 21});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t) {20, 19, 18, 17});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t) {16, 15, 14, 13});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t) {12, 11, 10, 9});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t) {8, 7, 6, 5});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t) {4, 3, 2, 1});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t){32, 31, 30, 29});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t){28, 27, 26, 25});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t){24, 23, 22, 21});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t){20, 19, 18, 17});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t){16, 15, 14, 13});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t){12, 11, 10, 9});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t){8, 7, 6, 5});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t){4, 3, 2, 1});
|
||||
#endif
|
||||
/*
|
||||
* Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
|
||||
*/
|
||||
uint32x2_t sum1 = vpadd_u32(vget_low_u32(v_s1), vget_high_u32(v_s1));
|
||||
uint32x2_t sum2 = vpadd_u32(vget_low_u32(v_s2), vget_high_u32(v_s2));
|
||||
uint32x2_t s1s2 = vpadd_u32(sum1, sum2);
|
||||
uint32x2_t t_s1 = vpadd_u32(vget_low_u32(v_s1), vget_high_u32(v_s1));
|
||||
uint32x2_t t_s2 = vpadd_u32(vget_low_u32(v_s2), vget_high_u32(v_s2));
|
||||
uint32x2_t s1s2 = vpadd_u32(t_s1, t_s2);
|
||||
s1 += vget_lane_u32(s1s2, 0);
|
||||
s2 += vget_lane_u32(s1s2, 1);
|
||||
/*
|
||||
@@ -169,9 +170,9 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
|
||||
/*
|
||||
* Handle leftover data.
|
||||
*/
|
||||
if (len)
|
||||
if(len)
|
||||
{
|
||||
if (len >= 16)
|
||||
if(len >= 16)
|
||||
{
|
||||
s2 += (s1 += *data++);
|
||||
s2 += (s1 += *data++);
|
||||
@@ -191,9 +192,9 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
|
||||
s2 += (s1 += *data++);
|
||||
len -= 16;
|
||||
}
|
||||
while (len--)
|
||||
while(len--)
|
||||
{ s2 += (s1 += *data++); }
|
||||
if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
s2 %= ADLER_MODULE;
|
||||
}
|
||||
/*
|
||||
|
||||
@@ -41,16 +41,17 @@
|
||||
|
||||
|
||||
/**
|
||||
* @brief Calculate Adler-32 checksum for a given data using SSSE3 instructions.
|
||||
* @brief Calculate Adler-32 checksum for a given data using TARGET_WITH_SSSE3 instructions.
|
||||
*
|
||||
* This function calculates the Adler-32 checksum for a block of data using SSSE3 vector instructions.
|
||||
* This function calculates the Adler-32 checksum for a block of data using TARGET_WITH_SSSE3 vector instructions.
|
||||
*
|
||||
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
|
||||
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
|
||||
* @param data Pointer to the data buffer.
|
||||
* @param len Length of the data buffer in bytes.
|
||||
*/
|
||||
AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
|
||||
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL
|
||||
adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
|
||||
{
|
||||
uint32_t s1 = *sum1;
|
||||
uint32_t s2 = *sum2;
|
||||
@@ -77,12 +78,13 @@ AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, c
|
||||
__m128i v_ps = _mm_set_epi32(0, 0, 0, s1 * n);
|
||||
__m128i v_s2 = _mm_set_epi32(0, 0, 0, s2);
|
||||
__m128i v_s1 = _mm_set_epi32(0, 0, 0, 0);
|
||||
do {
|
||||
do
|
||||
{
|
||||
/*
|
||||
* Load 32 input bytes.
|
||||
*/
|
||||
const __m128i bytes1 = _mm_loadu_si128((__m128i*)(data));
|
||||
const __m128i bytes2 = _mm_loadu_si128((__m128i*)(data + 16));
|
||||
const __m128i bytes1 = _mm_loadu_si128((__m128i *)(data));
|
||||
const __m128i bytes2 = _mm_loadu_si128((__m128i *)(data + 16));
|
||||
/*
|
||||
* Add previous block byte sum to v_ps.
|
||||
*/
|
||||
@@ -98,7 +100,8 @@ AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, c
|
||||
const __m128i mad2 = _mm_maddubs_epi16(bytes2, tap2);
|
||||
v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(mad2, ones));
|
||||
data += BLOCK_SIZE;
|
||||
} while(--n);
|
||||
}
|
||||
while(--n);
|
||||
v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5));
|
||||
/*
|
||||
* Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
|
||||
@@ -144,7 +147,8 @@ AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, c
|
||||
s2 += (s1 += *data++);
|
||||
len -= 16;
|
||||
}
|
||||
while(len--) { s2 += (s1 += *data++); }
|
||||
while(len--)
|
||||
{ s2 += (s1 += *data++); }
|
||||
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
|
||||
s2 %= ADLER_MODULE;
|
||||
}
|
||||
|
||||
14
arm_vmull.c
14
arm_vmull.c
@@ -33,15 +33,17 @@
|
||||
#include "simd.h"
|
||||
|
||||
#if !defined(__MINGW32__) && !defined(_MSC_FULL_VER) && (!defined(__ANDROID__) || !defined(__arm__))
|
||||
|
||||
TARGET_WITH_CRYPTO static uint64x2_t sse2neon_vmull_p64_crypto(uint64x1_t _a, uint64x1_t _b)
|
||||
{
|
||||
poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
|
||||
poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0);
|
||||
return vreinterpretq_u64_p128(vmull_p64(a, b));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
|
||||
TARGET_WITH_NEON uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
|
||||
{
|
||||
#if !defined(__MINGW32__) && !defined(_MSC_FULL_VER) && (!defined(__ANDROID__) || !defined(__arm__))
|
||||
// Wraps vmull_p64
|
||||
@@ -136,7 +138,7 @@ TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
|
||||
return vreinterpretq_u64_u8(r);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b)
|
||||
TARGET_WITH_NEON uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
uint8x16_t tbl = vreinterpretq_u8_u64(a); // input a
|
||||
uint8x16_t idx = vreinterpretq_u8_u64(b); // input b
|
||||
@@ -151,16 +153,16 @@ TARGET_WITH_SIMD uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b)
|
||||
#endif
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t mm_srli_si128(uint64x2_t a, int imm)
|
||||
TARGET_WITH_NEON uint64x2_t mm_srli_si128(uint64x2_t a, int imm)
|
||||
{
|
||||
uint8x16_t tmp[2] = {vreinterpretq_u8_u64(a), vdupq_n_u8(0)};
|
||||
return vreinterpretq_u64_u8(vld1q_u8(((uint8_t const*)tmp) + imm));
|
||||
return vreinterpretq_u64_u8(vld1q_u8(((uint8_t const *)tmp) + imm));
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t mm_slli_si128(uint64x2_t a, int imm)
|
||||
TARGET_WITH_NEON uint64x2_t mm_slli_si128(uint64x2_t a, int imm)
|
||||
{
|
||||
uint8x16_t tmp[2] = {vdupq_n_u8(0), vreinterpretq_u8_u64(a)};
|
||||
return vreinterpretq_u64_u8(vld1q_u8(((uint8_t const*)tmp) + (16 - imm)));
|
||||
return vreinterpretq_u64_u8(vld1q_u8(((uint8_t const *)tmp) + (16 - imm)));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -22,10 +22,10 @@
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
TARGET_WITH_CRYPTO static uint64x2_t sse2neon_vmull_p64_crypto(uint64x1_t _a, uint64x1_t _b);
|
||||
TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b);
|
||||
TARGET_WITH_SIMD uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b);
|
||||
TARGET_WITH_SIMD uint64x2_t mm_srli_si128(uint64x2_t a, int imm);
|
||||
TARGET_WITH_SIMD uint64x2_t mm_slli_si128(uint64x2_t a, int imm);
|
||||
TARGET_WITH_NEON uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b);
|
||||
TARGET_WITH_NEON uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b);
|
||||
TARGET_WITH_NEON uint64x2_t mm_srli_si128(uint64x2_t a, int imm);
|
||||
TARGET_WITH_NEON uint64x2_t mm_slli_si128(uint64x2_t a, int imm);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
19
crc16.c
19
crc16.c
@@ -31,9 +31,9 @@
|
||||
*
|
||||
* @return Pointer to a structure containing the checksum state.
|
||||
*/
|
||||
AARU_EXPORT crc16_ctx* AARU_CALL crc16_init(void)
|
||||
AARU_EXPORT crc16_ctx *AARU_CALL crc16_init(void)
|
||||
{
|
||||
crc16_ctx* ctx = (crc16_ctx*)malloc(sizeof(crc16_ctx));
|
||||
crc16_ctx *ctx = (crc16_ctx *)malloc(sizeof(crc16_ctx));
|
||||
|
||||
if(!ctx) return NULL;
|
||||
|
||||
@@ -56,7 +56,7 @@ AARU_EXPORT crc16_ctx* AARU_CALL crc16_init(void)
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
// Unroll according to Intel slicing by uint8_t
|
||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||
@@ -65,8 +65,8 @@ AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
uint16_t crc;
|
||||
const uint32_t* current;
|
||||
const uint8_t* current_char = (const uint8_t*)data;
|
||||
const uint32_t *current;
|
||||
const uint8_t *current_char = data;
|
||||
const size_t unroll = 4;
|
||||
const size_t bytes_at_once = 8 * unroll;
|
||||
uintptr_t unaligned_length = (4 - (((uintptr_t)current_char) & 3)) & 3;
|
||||
@@ -80,7 +80,7 @@ AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint
|
||||
unaligned_length--;
|
||||
}
|
||||
|
||||
current = (const uint32_t*)current_char;
|
||||
current = (const uint32_t *)current_char;
|
||||
|
||||
while(len >= bytes_at_once)
|
||||
{
|
||||
@@ -89,6 +89,7 @@ AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint
|
||||
{
|
||||
uint32_t one = *current++ ^ crc;
|
||||
uint32_t two = *current++;
|
||||
|
||||
// TODO: Big endian!
|
||||
crc = crc16_table[0][(two >> 24) & 0xFF] ^ crc16_table[1][(two >> 16) & 0xFF] ^
|
||||
crc16_table[2][(two >> 8) & 0xFF] ^ crc16_table[3][two & 0xFF] ^ crc16_table[4][(one >> 24) & 0xFF] ^
|
||||
@@ -98,7 +99,7 @@ AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint
|
||||
len -= bytes_at_once;
|
||||
}
|
||||
|
||||
current_char = (const uint8_t*)current;
|
||||
current_char = (const uint8_t *)current;
|
||||
|
||||
while(len-- != 0) crc = (crc >> 8) ^ crc16_table[0][(crc & 0xFF) ^ *current_char++];
|
||||
|
||||
@@ -117,7 +118,7 @@ AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx* ctx, uint16_t* crc)
|
||||
AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx *ctx, uint16_t *crc)
|
||||
{
|
||||
if(!ctx) return -1;
|
||||
|
||||
@@ -134,7 +135,7 @@ AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx* ctx, uint16_t* crc)
|
||||
*
|
||||
* @param ctx The CRC-16 checksum context structure, to be freed.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL crc16_free(crc16_ctx* ctx)
|
||||
AARU_EXPORT void AARU_CALL crc16_free(crc16_ctx *ctx)
|
||||
{
|
||||
if(ctx) free(ctx);
|
||||
}
|
||||
8
crc16.h
8
crc16.h
@@ -178,9 +178,9 @@ const uint16_t crc16_table[8][256] = {
|
||||
0xC48F, 0x084E, 0x440F, 0x88CE, 0x9D8E, 0x514F, 0xB70E, 0x7BCF, 0x6E8F, 0xA24E, 0xE20E, 0x2ECF, 0x3B8F, 0xF74E,
|
||||
0x110F, 0xDDCE, 0xC88E, 0x044F}};
|
||||
|
||||
AARU_EXPORT crc16_ctx* AARU_CALL crc16_init();
|
||||
AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx* ctx, uint16_t* crc);
|
||||
AARU_EXPORT void AARU_CALL crc16_free(crc16_ctx* ctx);
|
||||
AARU_EXPORT crc16_ctx *AARU_CALL crc16_init();
|
||||
AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx *ctx, const uint8_t *data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx *ctx, uint16_t *crc);
|
||||
AARU_EXPORT void AARU_CALL crc16_free(crc16_ctx *ctx);
|
||||
|
||||
#endif // AARU_CHECKSUMS_NATIVE_CRC16_H
|
||||
@@ -31,9 +31,9 @@
|
||||
*
|
||||
* @return Pointer to a structure containing the checksum state.
|
||||
*/
|
||||
AARU_EXPORT crc16_ccitt_ctx* AARU_CALL crc16_ccitt_init(void)
|
||||
AARU_EXPORT crc16_ccitt_ctx *AARU_CALL crc16_ccitt_init(void)
|
||||
{
|
||||
crc16_ccitt_ctx* ctx = (crc16_ccitt_ctx*)malloc(sizeof(crc16_ccitt_ctx));
|
||||
crc16_ccitt_ctx *ctx = (crc16_ccitt_ctx *)malloc(sizeof(crc16_ccitt_ctx));
|
||||
|
||||
if(!ctx) return NULL;
|
||||
|
||||
@@ -56,7 +56,7 @@ AARU_EXPORT crc16_ccitt_ctx* AARU_CALL crc16_ccitt_init(void)
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
// Unroll according to Intel slicing by uint8_t
|
||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||
@@ -65,7 +65,7 @@ AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
uint16_t crc;
|
||||
const uint8_t* current_char = (const uint8_t*)data;
|
||||
const uint8_t *current_char = data;
|
||||
const size_t unroll = 4;
|
||||
const size_t bytes_at_once = 8 * unroll;
|
||||
uintptr_t unaligned_length = (4 - (((uintptr_t)current_char) & 3)) & 3;
|
||||
@@ -113,7 +113,7 @@ AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx* ctx, uint16_t* crc)
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx *ctx, uint16_t *crc)
|
||||
{
|
||||
if(!ctx) return -1;
|
||||
|
||||
@@ -130,7 +130,7 @@ AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx* ctx, uint16_t* crc)
|
||||
*
|
||||
* @param ctx The CRC-16 checksum context structure, to be freed.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL crc16_ccitt_free(crc16_ccitt_ctx* ctx)
|
||||
AARU_EXPORT void AARU_CALL crc16_ccitt_free(crc16_ccitt_ctx *ctx)
|
||||
{
|
||||
if(ctx) free(ctx);
|
||||
}
|
||||
@@ -177,9 +177,10 @@ const uint16_t crc16_ccitt_table[8][256] = {
|
||||
0x7039, 0x37EA, 0xFF9F, 0xB84C, 0x7F54, 0x3887, 0xF0F2, 0xB721, 0x6EE3, 0x2930, 0xE145, 0xA696, 0x618E, 0x265D,
|
||||
0xEE28, 0xA9FB, 0x4D8D, 0x0A5E, 0xC22B, 0x85F8, 0x42E0, 0x0533, 0xCD46, 0x8A95, 0x5357, 0x1484, 0xDCF1, 0x9B22,
|
||||
0x5C3A, 0x1BE9, 0xD39C, 0x944F}};
|
||||
AARU_EXPORT crc16_ccitt_ctx* AARU_CALL crc16_ccitt_init();
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx* ctx, uint16_t* crc);
|
||||
AARU_EXPORT void AARU_CALL crc16_ccitt_free(crc16_ccitt_ctx* ctx);
|
||||
|
||||
AARU_EXPORT crc16_ccitt_ctx *AARU_CALL crc16_ccitt_init();
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx *ctx, const uint8_t *data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx *ctx, uint16_t *crc);
|
||||
AARU_EXPORT void AARU_CALL crc16_ccitt_free(crc16_ccitt_ctx *ctx);
|
||||
|
||||
#endif // AARU_CHECKSUMS_NATIVE_CRC16_H
|
||||
|
||||
20
crc32.c
20
crc32.c
@@ -31,9 +31,9 @@
|
||||
*
|
||||
* @return Pointer to a structure containing the checksum state.
|
||||
*/
|
||||
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
|
||||
AARU_EXPORT crc32_ctx *AARU_CALL crc32_init(void)
|
||||
{
|
||||
crc32_ctx* ctx = (crc32_ctx*)malloc(sizeof(crc32_ctx));
|
||||
crc32_ctx *ctx = (crc32_ctx *)malloc(sizeof(crc32_ctx));
|
||||
|
||||
if(!ctx) return NULL;
|
||||
|
||||
@@ -56,7 +56,7 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
@@ -101,14 +101,14 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
|
||||
*
|
||||
* @note This function assumes little-endian byte order.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t* data, long len)
|
||||
AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t *previous_crc, const uint8_t *data, long len)
|
||||
{
|
||||
// Unroll according to Intel slicing by uint8_t
|
||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||
// http://sourceforge.net/projects/slicing-by-8/
|
||||
uint32_t c;
|
||||
const uint32_t* current;
|
||||
const uint8_t* current_char = (const uint8_t*)data;
|
||||
const uint32_t *current;
|
||||
const uint8_t *current_char = data;
|
||||
const size_t unroll = 4;
|
||||
const size_t bytes_at_once = 8 * unroll;
|
||||
uintptr_t unaligned_length = (4 - (((uintptr_t)current_char) & 3)) & 3;
|
||||
@@ -122,7 +122,7 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t*
|
||||
unaligned_length--;
|
||||
}
|
||||
|
||||
current = (const uint32_t*)current_char;
|
||||
current = (const uint32_t *)current_char;
|
||||
|
||||
while(len >= bytes_at_once)
|
||||
{
|
||||
@@ -140,7 +140,7 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t*
|
||||
len -= bytes_at_once;
|
||||
}
|
||||
|
||||
current_char = (const uint8_t*)current;
|
||||
current_char = (const uint8_t *)current;
|
||||
|
||||
while(len-- != 0) c = (c >> 8) ^ crc32_table[0][(c & 0xFF) ^ *current_char++];
|
||||
|
||||
@@ -158,7 +158,7 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t*
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
|
||||
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx *ctx, uint32_t *crc)
|
||||
{
|
||||
if(!ctx) return -1;
|
||||
|
||||
@@ -175,7 +175,7 @@ AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
|
||||
*
|
||||
* @param ctx The CRC-32 checksum context structure, to be freed.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx)
|
||||
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx *ctx)
|
||||
{
|
||||
if(ctx) free(ctx);
|
||||
}
|
||||
4
crc32.h
4
crc32.h
@@ -270,7 +270,7 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uin
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len);
|
||||
AARU_EXPORT TARGET_WITH_CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len);
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
@@ -279,7 +279,7 @@ AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t
|
||||
const uint8_t* data,
|
||||
uint32_t len);
|
||||
#endif
|
||||
AARU_EXPORT TARGET_WITH_SIMD uint32_t AARU_CALL crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len);
|
||||
AARU_EXPORT TARGET_WITH_NEON uint32_t AARU_CALL crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len);
|
||||
#endif
|
||||
|
||||
#endif // AARU_CHECKSUMS_NATIVE_CRC32_H
|
||||
|
||||
@@ -52,9 +52,9 @@
|
||||
* @param len The length of the input data.
|
||||
* @return The new CRC-32 value.
|
||||
*/
|
||||
TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const uint8_t* data, uint32_t len)
|
||||
TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
uint32_t c = (uint32_t)previous_crc;
|
||||
uint32_t c = previous_crc;
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
while(len && ((uintptr_t)data & 7))
|
||||
@@ -62,7 +62,7 @@ TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const u
|
||||
c = __crc32b(c, *data++);
|
||||
--len;
|
||||
}
|
||||
const uint64_t* buf8 = (const uint64_t*)data;
|
||||
const uint64_t *buf8 = (const uint64_t *)data;
|
||||
while(len >= 64)
|
||||
{
|
||||
c = __crc32d(c, *buf8++);
|
||||
@@ -81,14 +81,14 @@ TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const u
|
||||
len -= 8;
|
||||
}
|
||||
|
||||
data = (const uint8_t*)buf8;
|
||||
data = (const uint8_t *)buf8;
|
||||
#else // AARCH64
|
||||
while(len && ((uintptr_t)data & 3))
|
||||
{
|
||||
c = __crc32b(c, *data++);
|
||||
--len;
|
||||
}
|
||||
const uint32_t* buf4 = (const uint32_t*)data;
|
||||
const uint32_t *buf4 = (const uint32_t *)data;
|
||||
while(len >= 32)
|
||||
{
|
||||
c = __crc32w(c, *buf4++);
|
||||
@@ -107,10 +107,12 @@ TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const u
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
data = (const uint8_t*)buf4;
|
||||
data = (const uint8_t *)buf4;
|
||||
#endif
|
||||
|
||||
while(len--) { c = __crc32b(c, *data++); }
|
||||
while(len--)
|
||||
{ c = __crc32b(c, *data++); }
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
#include "crc32.h"
|
||||
#include "crc32_simd.h"
|
||||
|
||||
CLMUL static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
|
||||
TARGET_WITH_CLMUL static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3)
|
||||
{
|
||||
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
|
||||
|
||||
@@ -56,7 +56,7 @@ CLMUL static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2
|
||||
*xmm_crc3 = _mm_castps_si128(ps_res);
|
||||
}
|
||||
|
||||
CLMUL static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
|
||||
TARGET_WITH_CLMUL static void fold_2(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3)
|
||||
{
|
||||
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
|
||||
|
||||
@@ -86,7 +86,7 @@ CLMUL static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2
|
||||
*xmm_crc3 = _mm_castps_si128(ps_res31);
|
||||
}
|
||||
|
||||
CLMUL static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
|
||||
TARGET_WITH_CLMUL static void fold_3(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3)
|
||||
{
|
||||
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
|
||||
|
||||
@@ -122,7 +122,7 @@ CLMUL static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2
|
||||
*xmm_crc3 = _mm_castps_si128(ps_res32);
|
||||
}
|
||||
|
||||
CLMUL static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
|
||||
TARGET_WITH_CLMUL static void fold_4(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3)
|
||||
{
|
||||
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
|
||||
|
||||
@@ -166,12 +166,12 @@ CLMUL static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2
|
||||
*xmm_crc3 = _mm_castps_si128(ps_res3);
|
||||
}
|
||||
|
||||
CLMUL static void partial_fold(const size_t len,
|
||||
__m128i* xmm_crc0,
|
||||
__m128i* xmm_crc1,
|
||||
__m128i* xmm_crc2,
|
||||
__m128i* xmm_crc3,
|
||||
__m128i* xmm_crc_part)
|
||||
TARGET_WITH_CLMUL static void partial_fold(const size_t len,
|
||||
__m128i *xmm_crc0,
|
||||
__m128i *xmm_crc1,
|
||||
__m128i *xmm_crc2,
|
||||
__m128i *xmm_crc3,
|
||||
__m128i *xmm_crc_part)
|
||||
{
|
||||
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
|
||||
const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080);
|
||||
@@ -180,7 +180,7 @@ CLMUL static void partial_fold(const size_t len,
|
||||
__m128i xmm_a0_0, xmm_a0_1;
|
||||
__m128 ps_crc3, psa0_0, psa0_1, ps_res;
|
||||
|
||||
xmm_shl = _mm_load_si128((__m128i*)pshufb_shf_table + (len - 1));
|
||||
xmm_shl = _mm_load_si128((__m128i *)pshufb_shf_table + (len - 1));
|
||||
xmm_shr = xmm_shl;
|
||||
xmm_shr = _mm_xor_si128(xmm_shr, xmm_mask3);
|
||||
|
||||
@@ -224,7 +224,7 @@ CLMUL static void partial_fold(const size_t len,
|
||||
#define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial))
|
||||
|
||||
/**
|
||||
* @brief Calculate the CRC32 checksum using CLMUL instruction extension.
|
||||
* @brief Calculate the CRC32 checksum using TARGET_WITH_CLMUL instruction extension.
|
||||
*
|
||||
* @param previous_crc The previously calculated CRC32 checksum.
|
||||
* @param data Pointer to the input data buffer.
|
||||
@@ -232,7 +232,7 @@ CLMUL static void partial_fold(const size_t len,
|
||||
*
|
||||
* @return The calculated CRC32 checksum.
|
||||
*/
|
||||
AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len)
|
||||
AARU_EXPORT TARGET_WITH_CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t *data, long len)
|
||||
{
|
||||
unsigned long algn_diff;
|
||||
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
|
||||
@@ -246,8 +246,8 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
int first = 1;
|
||||
|
||||
/* fold 512 to 32 step variable declarations for ISO-C90 compat. */
|
||||
const __m128i xmm_mask = _mm_load_si128((__m128i*)crc_mask);
|
||||
const __m128i xmm_mask2 = _mm_load_si128((__m128i*)crc_mask2);
|
||||
const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask);
|
||||
const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2);
|
||||
|
||||
uint32_t crc;
|
||||
__m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
|
||||
@@ -263,13 +263,16 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
uint32_t crc = ~previous_crc;
|
||||
switch(len)
|
||||
{
|
||||
case 3: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 2: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 1: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 3:
|
||||
crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 2:
|
||||
crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 1:
|
||||
crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
}
|
||||
return ~crc;
|
||||
}
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i*)data);
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i *)data);
|
||||
XOR_INITIAL(xmm_crc_part);
|
||||
goto partial;
|
||||
}
|
||||
@@ -278,7 +281,7 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
algn_diff = (0 - (uintptr_t)data) & 0xF;
|
||||
if(algn_diff)
|
||||
{
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i*)data);
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i *)data);
|
||||
XOR_INITIAL(xmm_crc_part);
|
||||
|
||||
data += algn_diff;
|
||||
@@ -289,10 +292,10 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
|
||||
while((len -= 64) >= 0)
|
||||
{
|
||||
xmm_t0 = _mm_load_si128((__m128i*)data);
|
||||
xmm_t1 = _mm_load_si128((__m128i*)data + 1);
|
||||
xmm_t2 = _mm_load_si128((__m128i*)data + 2);
|
||||
xmm_t3 = _mm_load_si128((__m128i*)data + 3);
|
||||
xmm_t0 = _mm_load_si128((__m128i *)data);
|
||||
xmm_t1 = _mm_load_si128((__m128i *)data + 1);
|
||||
xmm_t2 = _mm_load_si128((__m128i *)data + 2);
|
||||
xmm_t3 = _mm_load_si128((__m128i *)data + 3);
|
||||
|
||||
XOR_INITIAL(xmm_t0);
|
||||
|
||||
@@ -313,9 +316,9 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
{
|
||||
len += 16;
|
||||
|
||||
xmm_t0 = _mm_load_si128((__m128i*)data);
|
||||
xmm_t1 = _mm_load_si128((__m128i*)data + 1);
|
||||
xmm_t2 = _mm_load_si128((__m128i*)data + 2);
|
||||
xmm_t0 = _mm_load_si128((__m128i *)data);
|
||||
xmm_t1 = _mm_load_si128((__m128i *)data + 1);
|
||||
xmm_t2 = _mm_load_si128((__m128i *)data + 2);
|
||||
|
||||
XOR_INITIAL(xmm_t0);
|
||||
|
||||
@@ -327,14 +330,14 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
|
||||
if(len == 0) goto done;
|
||||
|
||||
xmm_crc_part = _mm_load_si128((__m128i*)data + 3);
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)data + 3);
|
||||
}
|
||||
else if(len + 32 >= 0)
|
||||
{
|
||||
len += 32;
|
||||
|
||||
xmm_t0 = _mm_load_si128((__m128i*)data);
|
||||
xmm_t1 = _mm_load_si128((__m128i*)data + 1);
|
||||
xmm_t0 = _mm_load_si128((__m128i *)data);
|
||||
xmm_t1 = _mm_load_si128((__m128i *)data + 1);
|
||||
|
||||
XOR_INITIAL(xmm_t0);
|
||||
|
||||
@@ -345,13 +348,13 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
|
||||
if(len == 0) goto done;
|
||||
|
||||
xmm_crc_part = _mm_load_si128((__m128i*)data + 2);
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)data + 2);
|
||||
}
|
||||
else if(len + 48 >= 0)
|
||||
{
|
||||
len += 48;
|
||||
|
||||
xmm_t0 = _mm_load_si128((__m128i*)data);
|
||||
xmm_t0 = _mm_load_si128((__m128i *)data);
|
||||
|
||||
XOR_INITIAL(xmm_t0);
|
||||
|
||||
@@ -361,13 +364,13 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui
|
||||
|
||||
if(len == 0) goto done;
|
||||
|
||||
xmm_crc_part = _mm_load_si128((__m128i*)data + 1);
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)data + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
len += 64;
|
||||
if(len == 0) goto done;
|
||||
xmm_crc_part = _mm_load_si128((__m128i*)data);
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)data);
|
||||
XOR_INITIAL(xmm_crc_part);
|
||||
}
|
||||
|
||||
@@ -382,7 +385,7 @@ done:
|
||||
/*
|
||||
* k1
|
||||
*/
|
||||
crc_fold = _mm_load_si128((__m128i*)crc_k);
|
||||
crc_fold = _mm_load_si128((__m128i *)crc_k);
|
||||
|
||||
x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10);
|
||||
xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01);
|
||||
@@ -402,7 +405,7 @@ done:
|
||||
/*
|
||||
* k5
|
||||
*/
|
||||
crc_fold = _mm_load_si128((__m128i*)crc_k + 1);
|
||||
crc_fold = _mm_load_si128((__m128i *)crc_k + 1);
|
||||
|
||||
xmm_crc0 = xmm_crc3;
|
||||
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
|
||||
@@ -420,7 +423,7 @@ done:
|
||||
*/
|
||||
xmm_crc1 = xmm_crc3;
|
||||
xmm_crc2 = xmm_crc3;
|
||||
crc_fold = _mm_load_si128((__m128i*)crc_k + 2);
|
||||
crc_fold = _mm_load_si128((__m128i *)crc_k + 2);
|
||||
|
||||
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
|
||||
@@ -434,7 +437,7 @@ done:
|
||||
/*
|
||||
* could just as well write xmm_crc3[2], doing a movaps and truncating, but
|
||||
* no real advantage - it's a tiny bit slower per call, while no additional CPUs
|
||||
* would be supported by only requiring SSSE3 and CLMUL instead of SSE4.1 + CLMUL
|
||||
* would be supported by only requiring TARGET_WITH_SSSE3 and TARGET_WITH_CLMUL instead of SSE4.1 + TARGET_WITH_CLMUL
|
||||
*/
|
||||
crc = _mm_extract_epi32(xmm_crc3, 2);
|
||||
return ~crc;
|
||||
|
||||
@@ -43,8 +43,8 @@
|
||||
#define XOR_INITIAL(where) \
|
||||
ONCE(where = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(where), vreinterpretq_u32_u64(q_initial))))
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE void
|
||||
fold_1(uint64x2_t* q_crc0, uint64x2_t* q_crc1, uint64x2_t* q_crc2, uint64x2_t* q_crc3)
|
||||
TARGET_WITH_NEON FORCE_INLINE void
|
||||
fold_1(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
|
||||
{
|
||||
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
|
||||
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
|
||||
@@ -67,8 +67,8 @@ TARGET_WITH_SIMD FORCE_INLINE void
|
||||
*q_crc3 = vreinterpretq_u64_u32(ps_res);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE void
|
||||
fold_2(uint64x2_t* q_crc0, uint64x2_t* q_crc1, uint64x2_t* q_crc2, uint64x2_t* q_crc3)
|
||||
TARGET_WITH_NEON FORCE_INLINE void
|
||||
fold_2(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
|
||||
{
|
||||
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
|
||||
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
|
||||
@@ -99,8 +99,8 @@ TARGET_WITH_SIMD FORCE_INLINE void
|
||||
*q_crc3 = vreinterpretq_u64_u32(ps_res31);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE void
|
||||
fold_3(uint64x2_t* q_crc0, uint64x2_t* q_crc1, uint64x2_t* q_crc2, uint64x2_t* q_crc3)
|
||||
TARGET_WITH_NEON FORCE_INLINE void
|
||||
fold_3(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
|
||||
{
|
||||
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
|
||||
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
|
||||
@@ -137,8 +137,8 @@ TARGET_WITH_SIMD FORCE_INLINE void
|
||||
*q_crc3 = vreinterpretq_u64_u32(ps_res32);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE void
|
||||
fold_4(uint64x2_t* q_crc0, uint64x2_t* q_crc1, uint64x2_t* q_crc2, uint64x2_t* q_crc3)
|
||||
TARGET_WITH_NEON FORCE_INLINE void
|
||||
fold_4(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
|
||||
{
|
||||
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
|
||||
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
|
||||
@@ -184,12 +184,12 @@ TARGET_WITH_SIMD FORCE_INLINE void
|
||||
*q_crc3 = vreinterpretq_u64_u32(ps_res3);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE void partial_fold(const size_t len,
|
||||
uint64x2_t* q_crc0,
|
||||
uint64x2_t* q_crc1,
|
||||
uint64x2_t* q_crc2,
|
||||
uint64x2_t* q_crc3,
|
||||
uint64x2_t* q_crc_part)
|
||||
TARGET_WITH_NEON FORCE_INLINE void partial_fold(const size_t len,
|
||||
uint64x2_t *q_crc0,
|
||||
uint64x2_t *q_crc1,
|
||||
uint64x2_t *q_crc2,
|
||||
uint64x2_t *q_crc3,
|
||||
uint64x2_t *q_crc_part)
|
||||
{
|
||||
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
|
||||
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
|
||||
@@ -198,7 +198,7 @@ TARGET_WITH_SIMD FORCE_INLINE void partial_fold(const size_t len,
|
||||
uint64x2_t q_shl, q_shr, q_tmp1, q_tmp2, q_tmp3, q_a0_0, q_a0_1;
|
||||
uint32x4_t ps_crc3, psa0_0, psa0_1, ps_res;
|
||||
|
||||
q_shl = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)pshufb_shf_table + (len - 1) * 4));
|
||||
q_shl = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)pshufb_shf_table + (len - 1) * 4));
|
||||
q_shr = q_shl;
|
||||
q_shr = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(q_shr), vreinterpretq_u32_u64(q_mask3)));
|
||||
|
||||
@@ -247,7 +247,7 @@ TARGET_WITH_SIMD FORCE_INLINE void partial_fold(const size_t len,
|
||||
*
|
||||
* @return The CRC-32 checksum of the given data.
|
||||
*/
|
||||
TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len)
|
||||
TARGET_WITH_NEON uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t *data, long len)
|
||||
{
|
||||
unsigned long algn_diff;
|
||||
uint64x2_t q_t0;
|
||||
@@ -264,8 +264,8 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
int first = 1;
|
||||
|
||||
/* fold 512 to 32 step variable declarations for ISO-C90 compat. */
|
||||
const uint64x2_t q_mask = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)crc_mask));
|
||||
const uint64x2_t q_mask2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)crc_mask2));
|
||||
const uint64x2_t q_mask = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)crc_mask));
|
||||
const uint64x2_t q_mask2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)crc_mask2));
|
||||
|
||||
uint32_t crc;
|
||||
uint64x2_t x_tmp0;
|
||||
@@ -284,13 +284,16 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
uint32_t crc = ~previous_crc;
|
||||
switch(len)
|
||||
{
|
||||
case 3: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 2: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 1: crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 3:
|
||||
crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 2:
|
||||
crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
case 1:
|
||||
crc = (crc >> 8) ^ crc32_table[0][(crc & 0xFF) ^ *data++];
|
||||
}
|
||||
return ~crc;
|
||||
}
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data));
|
||||
XOR_INITIAL(q_crc_part);
|
||||
goto partial;
|
||||
}
|
||||
@@ -299,7 +302,7 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
algn_diff = (0 - (uintptr_t)data) & 0xF;
|
||||
if(algn_diff)
|
||||
{
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data));
|
||||
XOR_INITIAL(q_crc_part);
|
||||
|
||||
data += algn_diff;
|
||||
@@ -310,10 +313,10 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
|
||||
while((len -= 64) >= 0)
|
||||
{
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
|
||||
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
|
||||
q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 8));
|
||||
q_t3 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 12));
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data));
|
||||
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 4));
|
||||
q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 8));
|
||||
q_t3 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 12));
|
||||
|
||||
XOR_INITIAL(q_t0);
|
||||
|
||||
@@ -334,9 +337,9 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
{
|
||||
len += 16;
|
||||
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
|
||||
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
|
||||
q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 8));
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data));
|
||||
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 4));
|
||||
q_t2 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 8));
|
||||
|
||||
XOR_INITIAL(q_t0);
|
||||
|
||||
@@ -348,14 +351,14 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
|
||||
if(len == 0) goto done;
|
||||
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 12));
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 12));
|
||||
}
|
||||
else if(len + 32 >= 0)
|
||||
{
|
||||
len += 32;
|
||||
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
|
||||
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data));
|
||||
q_t1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 4));
|
||||
|
||||
XOR_INITIAL(q_t0);
|
||||
|
||||
@@ -366,13 +369,13 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
|
||||
if(len == 0) goto done;
|
||||
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 8));
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 8));
|
||||
}
|
||||
else if(len + 48 >= 0)
|
||||
{
|
||||
len += 48;
|
||||
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
|
||||
q_t0 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data));
|
||||
|
||||
XOR_INITIAL(q_t0);
|
||||
|
||||
@@ -382,13 +385,13 @@ TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data
|
||||
|
||||
if(len == 0) goto done;
|
||||
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data + 4));
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data + 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
len += 64;
|
||||
if(len == 0) goto done;
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)data));
|
||||
q_crc_part = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)data));
|
||||
XOR_INITIAL(q_crc_part);
|
||||
}
|
||||
|
||||
@@ -428,12 +431,12 @@ done:
|
||||
q_crc0 = q_crc3;
|
||||
q_crc3 = (sse2neon_vmull_p64(vget_low_u64((q_crc3)), vget_low_u64((crc_fold))));
|
||||
uint8x16_t tmp[2] = {vreinterpretq_u8_u64(q_crc0), vdupq_n_u8(0)};
|
||||
q_crc0 = vreinterpretq_u64_u8(vld1q_u8(((uint8_t const*)tmp) + 8));
|
||||
q_crc0 = vreinterpretq_u64_u8(vld1q_u8(((uint8_t const *)tmp) + 8));
|
||||
q_crc3 = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(q_crc3), vreinterpretq_u32_u64(q_crc0)));
|
||||
|
||||
q_crc0 = q_crc3;
|
||||
uint8x16_t tmp_1[2] = {vdupq_n_u8(0), vreinterpretq_u8_u64(q_crc3)};
|
||||
q_crc3 = vreinterpretq_u64_u8(vld1q_u8(((uint8_t const*)tmp_1) + (16 - 4)));
|
||||
q_crc3 = vreinterpretq_u64_u8(vld1q_u8(((uint8_t const *)tmp_1) + (16 - 4)));
|
||||
q_crc3 = (sse2neon_vmull_p64(vget_low_u64((q_crc3)), vget_high_u64((crc_fold))));
|
||||
q_crc3 = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(q_crc3), vreinterpretq_u32_u64(q_crc0)));
|
||||
q_crc3 = vreinterpretq_u64_u32(vandq_u32(vreinterpretq_u32_u64(q_crc3), vreinterpretq_u32_u64(q_mask2)));
|
||||
@@ -457,7 +460,7 @@ done:
|
||||
/*
|
||||
* could just as well write q_crc3[2], doing a movaps and truncating, but
|
||||
* no real advantage - it's a tiny bit slower per call, while no additional CPUs
|
||||
* would be supported by only requiring SSSE3 and CLMUL instead of SSE4.1 + CLMUL
|
||||
* would be supported by only requiring TARGET_WITH_SSSE3 and TARGET_WITH_CLMUL instead of SSE4.1 + TARGET_WITH_CLMUL
|
||||
*/
|
||||
crc = vgetq_lane_u32(vreinterpretq_u32_u64(q_crc3), (2));
|
||||
return ~crc;
|
||||
|
||||
27
crc64.c
27
crc64.c
@@ -32,10 +32,9 @@
|
||||
*
|
||||
* @return Pointer to a structure containing the checksum state.
|
||||
*/
|
||||
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
|
||||
AARU_EXPORT crc64_ctx *AARU_CALL crc64_init(void)
|
||||
{
|
||||
int i, slice;
|
||||
crc64_ctx* ctx = (crc64_ctx*)malloc(sizeof(crc64_ctx));
|
||||
crc64_ctx *ctx = (crc64_ctx *)malloc(sizeof(crc64_ctx));
|
||||
|
||||
if(!ctx) return NULL;
|
||||
|
||||
@@ -58,7 +57,7 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
@@ -88,17 +87,17 @@ AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint
|
||||
return 0;
|
||||
}
|
||||
|
||||
AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t *previous_crc, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
uint64_t c = *previous_crc;
|
||||
|
||||
if(len > 4)
|
||||
{
|
||||
const uint8_t* limit;
|
||||
const uint8_t *limit;
|
||||
|
||||
while((uintptr_t)(data)&3)
|
||||
while((uintptr_t)(data) & 3)
|
||||
{
|
||||
c = crc64_table[0][*data++ ^ ((c)&0xFF)] ^ ((c) >> 8);
|
||||
c = crc64_table[0][*data++ ^ (c & 0xFF)] ^ (c >> 8);
|
||||
--len;
|
||||
}
|
||||
|
||||
@@ -107,15 +106,15 @@ AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t*
|
||||
|
||||
while(data < limit)
|
||||
{
|
||||
const uint32_t tmp = c ^ *(const uint32_t*)(data);
|
||||
const uint32_t tmp = c ^ *(const uint32_t *)(data);
|
||||
data += 4;
|
||||
|
||||
c = crc64_table[3][((tmp)&0xFF)] ^ crc64_table[2][(((tmp) >> 8) & 0xFF)] ^ ((c) >> 32) ^
|
||||
crc64_table[1][(((tmp) >> 16) & 0xFF)] ^ crc64_table[0][((tmp) >> 24)];
|
||||
c = crc64_table[3][tmp & 0xFF] ^ crc64_table[2][(tmp >> 8) & 0xFF] ^ (c >> 32) ^
|
||||
crc64_table[1][tmp >> 16 & 0xFF] ^ crc64_table[0][tmp >> 24];
|
||||
}
|
||||
}
|
||||
|
||||
while(len-- != 0) c = crc64_table[0][*data++ ^ ((c)&0xFF)] ^ ((c) >> 8);
|
||||
while(len-- != 0) c = crc64_table[0][*data++ ^ (c & 0xFF)] ^ (c >> 8);
|
||||
|
||||
*previous_crc = c;
|
||||
}
|
||||
@@ -131,7 +130,7 @@ AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t*
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
|
||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx *ctx, uint64_t *crc)
|
||||
{
|
||||
if(!ctx) return -1;
|
||||
|
||||
@@ -148,7 +147,7 @@ AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
|
||||
*
|
||||
* @param ctx The CRC-64 checksum context structure, to be freed.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx)
|
||||
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx *ctx)
|
||||
{
|
||||
if(ctx) free(ctx);
|
||||
}
|
||||
14
crc64.h
14
crc64.h
@@ -237,19 +237,19 @@ const static uint64_t crc64_table[4][256] = {
|
||||
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
|
||||
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init();
|
||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc);
|
||||
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx);
|
||||
AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT crc64_ctx *AARU_CALL crc64_init();
|
||||
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx *ctx, const uint8_t *data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx *ctx, uint64_t *crc);
|
||||
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx *ctx);
|
||||
AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t *previous_crc, const uint8_t *data, uint32_t len);
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* data, long length);
|
||||
AARU_EXPORT TARGET_WITH_CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t *data, long length);
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t* data, long len);
|
||||
AARU_EXPORT TARGET_WITH_NEON uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t *data, long len);
|
||||
#endif
|
||||
|
||||
#endif // AARU_CHECKSUMS_NATIVE_CRC64_H
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
#include <wmmintrin.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include "library.h"
|
||||
@@ -58,22 +60,22 @@ static const uint8_t shuffleMasks[] = {
|
||||
0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
|
||||
};
|
||||
|
||||
CLMUL static void shiftRight128(__m128i in, size_t n, __m128i* outLeft, __m128i* outRight)
|
||||
TARGET_WITH_CLMUL static void shiftRight128(__m128i in, size_t n, __m128i *outLeft, __m128i *outRight)
|
||||
{
|
||||
const __m128i maskA = _mm_loadu_si128((const __m128i*)(shuffleMasks + (16 - n)));
|
||||
const __m128i maskA = _mm_loadu_si128((const __m128i *)(shuffleMasks + (16 - n)));
|
||||
const __m128i maskB = _mm_xor_si128(maskA, _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128()));
|
||||
|
||||
*outLeft = _mm_shuffle_epi8(in, maskB);
|
||||
*outRight = _mm_shuffle_epi8(in, maskA);
|
||||
}
|
||||
|
||||
CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
|
||||
TARGET_WITH_CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
|
||||
{
|
||||
return _mm_xor_si128(_mm_clmulepi64_si128(in, foldConstants, 0x00), _mm_clmulepi64_si128(in, foldConstants, 0x11));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Calculate the CRC-64 checksum using CLMUL instruction extension.
|
||||
* @brief Calculate the CRC-64 checksum using TARGET_WITH_CLMUL instruction extension.
|
||||
*
|
||||
* @param previous_crc The previously calculated CRC-64 checksum.
|
||||
* @param data Pointer to the input data buffer.
|
||||
@@ -81,7 +83,7 @@ CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
|
||||
*
|
||||
* @return The calculated CRC-64 checksum.
|
||||
*/
|
||||
AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* data, long length)
|
||||
AARU_EXPORT TARGET_WITH_CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t *data, long length)
|
||||
{
|
||||
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
|
||||
const uint64_t k2 = 0xdabe95afc7875f40; // bitReflect(expMod65(128, poly, 1)) << 1;
|
||||
@@ -91,18 +93,18 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
const __m128i foldConstants1 = _mm_set_epi64x(k2, k1);
|
||||
const __m128i foldConstants2 = _mm_set_epi64x(p, mu);
|
||||
|
||||
const uint8_t* end = data + length;
|
||||
const uint8_t *end = data + length;
|
||||
|
||||
// Align pointers
|
||||
const __m128i* alignedData = (const __m128i*)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const __m128i* alignedEnd = (const __m128i*)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
const __m128i *alignedData = (const __m128i *)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const __m128i *alignedEnd = (const __m128i *)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
|
||||
const size_t leadInSize = data - (const uint8_t*)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t*)alignedEnd - end;
|
||||
const size_t leadInSize = data - (const uint8_t *)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t *)alignedEnd - end;
|
||||
|
||||
const size_t alignedLength = alignedEnd - alignedData;
|
||||
|
||||
const __m128i leadInMask = _mm_loadu_si128((const __m128i*)(shuffleMasks + (16 - leadInSize)));
|
||||
const __m128i leadInMask = _mm_loadu_si128((const __m128i *)(shuffleMasks + (16 - leadInSize)));
|
||||
const __m128i data0 = _mm_blendv_epi8(_mm_setzero_si128(), _mm_load_si128(alignedData), leadInMask);
|
||||
|
||||
#if defined(_WIN64)
|
||||
@@ -177,7 +179,8 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
}
|
||||
|
||||
__m128i P;
|
||||
if(length == 16) { P = _mm_xor_si128(accumulator, _mm_load_si128(alignedData)); }
|
||||
if(length == 16)
|
||||
{ P = _mm_xor_si128(accumulator, _mm_load_si128(alignedData)); }
|
||||
else
|
||||
{
|
||||
const __m128i end0 = _mm_xor_si128(accumulator, _mm_load_si128(alignedData));
|
||||
@@ -196,7 +199,9 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da
|
||||
// Final Barrett reduction
|
||||
const __m128i T1 = _mm_clmulepi64_si128(R, foldConstants2, 0x00);
|
||||
const __m128i T2 =
|
||||
_mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(T1, foldConstants2, 0x10), _mm_slli_si128(T1, 8)), R);
|
||||
_mm_xor_si128(
|
||||
_mm_xor_si128(_mm_clmulepi64_si128(T1, foldConstants2, 0x10), _mm_slli_si128(T1, 8)),
|
||||
R);
|
||||
|
||||
#if defined(_WIN64)
|
||||
return ~_mm_extract_epi64(T2, 1);
|
||||
|
||||
@@ -21,22 +21,32 @@ static const uint8_t shuffleMasks[] = {
|
||||
0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
|
||||
};
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE void shiftRight128(uint64x2_t in, size_t n, uint64x2_t* outLeft, uint64x2_t* outRight)
|
||||
TARGET_WITH_NEON FORCE_INLINE void shiftRight128(uint64x2_t in, size_t n, uint64x2_t *outLeft, uint64x2_t *outRight)
|
||||
{
|
||||
const uint64x2_t maskA =
|
||||
vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(const uint64x2_t*)(shuffleMasks + (16 - n))));
|
||||
vreinterpretq_u64_u32(
|
||||
vld1q_u32((const uint32_t *)(const uint64x2_t *)(shuffleMasks + (16 - n))));
|
||||
uint64x2_t b = vreinterpretq_u64_u8(vceqq_u8(vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0))),
|
||||
vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0)))));
|
||||
vreinterpretq_u8_u64(
|
||||
vreinterpretq_u64_u32(vdupq_n_u32(0)))));
|
||||
const uint64x2_t maskB = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(maskA), vreinterpretq_u32_u64(b)));
|
||||
|
||||
*outLeft = mm_shuffle_epi8(in, maskB);
|
||||
*outRight = mm_shuffle_epi8(in, maskA);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE uint64x2_t fold(uint64x2_t in, uint64x2_t foldConstants)
|
||||
TARGET_WITH_NEON FORCE_INLINE uint64x2_t
|
||||
fold (uint64x2_t
|
||||
in,
|
||||
uint64x2_t foldConstants
|
||||
)
|
||||
{
|
||||
return veorq_u64(sse2neon_vmull_p64(vget_low_u64(in), vget_low_u64(foldConstants)),
|
||||
sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants)));
|
||||
return
|
||||
|
||||
veorq_u64(sse2neon_vmull_p64(vget_low_u64(in), vget_low_u64(foldConstants)),
|
||||
sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants))
|
||||
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -53,7 +63,7 @@ TARGET_WITH_SIMD FORCE_INLINE uint64x2_t fold(uint64x2_t in, uint64x2_t foldCons
|
||||
*
|
||||
* @return The CRC-64 checksum of the given data.
|
||||
*/
|
||||
AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t* data, long len)
|
||||
AARU_EXPORT TARGET_WITH_NEON uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t *data, long len)
|
||||
{
|
||||
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
|
||||
const uint64_t k2 = 0xdabe95afc7875f40; // bitReflect(expMod65(128, poly, 1)) << 1;
|
||||
@@ -63,24 +73,26 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr
|
||||
const uint64x2_t foldConstants1 = vcombine_u64(vcreate_u64(k1), vcreate_u64(k2));
|
||||
const uint64x2_t foldConstants2 = vcombine_u64(vcreate_u64(mu), vcreate_u64(p));
|
||||
|
||||
const uint8_t* end = data + len;
|
||||
const uint8_t *end = data + len;
|
||||
|
||||
// Align pointers
|
||||
const uint64x2_t* alignedData = (const uint64x2_t*)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const uint64x2_t* alignedEnd = (const uint64x2_t*)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
const uint64x2_t *alignedData = (const uint64x2_t *)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const uint64x2_t *alignedEnd = (const uint64x2_t *)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
|
||||
const size_t leadInSize = data - (const uint8_t*)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t*)alignedEnd - end;
|
||||
const size_t leadInSize = data - (const uint8_t *)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t *)alignedEnd - end;
|
||||
|
||||
const size_t alignedLength = alignedEnd - alignedData;
|
||||
|
||||
const uint64x2_t leadInMask =
|
||||
vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(const uint64x2_t*)(shuffleMasks + (16 - leadInSize))));
|
||||
vreinterpretq_u64_u32(vld1q_u32(
|
||||
(const uint32_t *)(const uint64x2_t *)(shuffleMasks + (16 - leadInSize))));
|
||||
uint64x2_t a = vreinterpretq_u64_u32(vdupq_n_u32(0));
|
||||
uint64x2_t b = vreinterpretq_u64_u32(
|
||||
vld1q_u32((const uint32_t*)alignedData)); // Use a signed shift right to create a mask with the sign bit
|
||||
vld1q_u32((const uint32_t *)alignedData)); // Use a signed shift right to create a mask with the sign bit
|
||||
const uint64x2_t data0 =
|
||||
vreinterpretq_u64_u8(vbslq_u8(vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u64(leadInMask), 7)),
|
||||
vreinterpretq_u64_u8(
|
||||
vbslq_u8(vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u64(leadInMask), 7)),
|
||||
vreinterpretq_u8_u64(b),
|
||||
vreinterpretq_u8_u64(a)));
|
||||
|
||||
@@ -102,7 +114,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr
|
||||
}
|
||||
else if(alignedLength == 2)
|
||||
{
|
||||
const uint64x2_t data1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(alignedData + 1)));
|
||||
const uint64x2_t data1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)(alignedData + 1)));
|
||||
|
||||
if(len < 8)
|
||||
{
|
||||
@@ -145,7 +157,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr
|
||||
|
||||
while(len >= 32)
|
||||
{
|
||||
accumulator = fold(veorq_u64(vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)), accumulator),
|
||||
accumulator = fold(veorq_u64(vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)alignedData)), accumulator),
|
||||
foldConstants1);
|
||||
|
||||
len -= 16;
|
||||
@@ -153,12 +165,13 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr
|
||||
}
|
||||
|
||||
uint64x2_t P;
|
||||
if(len == 16) P = veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)));
|
||||
if(len == 16) P = veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)alignedData)));
|
||||
else
|
||||
{
|
||||
const uint64x2_t end0 =
|
||||
veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)));
|
||||
const uint64x2_t end1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(alignedData + 1)));
|
||||
veorq_u64(accumulator,
|
||||
vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)alignedData)));
|
||||
const uint64x2_t end1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)(alignedData + 1)));
|
||||
|
||||
uint64x2_t A, B, C, D;
|
||||
shiftRight128(end0, leadOutSize, &A, &B);
|
||||
|
||||
62
fletcher16.c
62
fletcher16.c
@@ -37,11 +37,11 @@
|
||||
*
|
||||
* @return Pointer to a structure containing the checksum state.
|
||||
*/
|
||||
AARU_EXPORT fletcher16_ctx* AARU_CALL fletcher16_init()
|
||||
AARU_EXPORT fletcher16_ctx *AARU_CALL fletcher16_init()
|
||||
{
|
||||
fletcher16_ctx* ctx;
|
||||
fletcher16_ctx *ctx;
|
||||
|
||||
ctx = (fletcher16_ctx*)malloc(sizeof(fletcher16_ctx));
|
||||
ctx = (fletcher16_ctx *)malloc(sizeof(fletcher16_ctx));
|
||||
|
||||
if(!ctx) return NULL;
|
||||
|
||||
@@ -62,7 +62,7 @@ AARU_EXPORT fletcher16_ctx* AARU_CALL fletcher16_init()
|
||||
* @param data Pointer to the input data buffer.
|
||||
* @param len The length of the input data buffer.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
@@ -103,33 +103,35 @@ AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t*
|
||||
{
|
||||
len -= NMAX;
|
||||
n = NMAX / 11; /* NMAX is divisible by 11 */
|
||||
do {
|
||||
sum1 += (data)[0];
|
||||
do
|
||||
{
|
||||
sum1 += data[0];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 1];
|
||||
sum1 += data[0 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2];
|
||||
sum1 += data[0 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2 + 1];
|
||||
sum1 += data[0 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4];
|
||||
sum1 += data[0 + 4];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 1];
|
||||
sum1 += data[0 + 4 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2];
|
||||
sum1 += data[0 + 4 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2 + 1];
|
||||
sum1 += data[0 + 4 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8];
|
||||
sum1 += data[8];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 1];
|
||||
sum1 += data[8 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 2];
|
||||
sum1 += data[8 + 2];
|
||||
sum2 += sum1;
|
||||
|
||||
/* 11 sums unrolled */
|
||||
data += 11;
|
||||
} while(--n);
|
||||
}
|
||||
while(--n);
|
||||
sum1 %= FLETCHER16_MODULE;
|
||||
sum2 %= FLETCHER16_MODULE;
|
||||
}
|
||||
@@ -140,27 +142,27 @@ AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t*
|
||||
while(len >= 11)
|
||||
{
|
||||
len -= 11;
|
||||
sum1 += (data)[0];
|
||||
sum1 += data[0];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 1];
|
||||
sum1 += data[0 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2];
|
||||
sum1 += data[0 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2 + 1];
|
||||
sum1 += data[0 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4];
|
||||
sum1 += data[0 + 4];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 1];
|
||||
sum1 += data[0 + 4 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2];
|
||||
sum1 += data[0 + 4 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2 + 1];
|
||||
sum1 += data[0 + 4 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8];
|
||||
sum1 += data[8];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 1];
|
||||
sum1 += data[8 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 2];
|
||||
sum1 += data[8 + 2];
|
||||
sum2 += sum1;
|
||||
|
||||
data += 11;
|
||||
@@ -190,7 +192,7 @@ AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t*
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx* ctx, uint16_t* checksum)
|
||||
AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx *ctx, uint16_t *checksum)
|
||||
{
|
||||
if(!ctx) return -1;
|
||||
|
||||
@@ -206,7 +208,7 @@ AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx* ctx, uint16_t* checks
|
||||
*
|
||||
* @param ctx The Fletcher-16 checksum context structure, to be freed.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL fletcher16_free(fletcher16_ctx* ctx)
|
||||
AARU_EXPORT void AARU_CALL fletcher16_free(fletcher16_ctx *ctx)
|
||||
{
|
||||
if(!ctx) return;
|
||||
|
||||
|
||||
@@ -29,9 +29,9 @@ typedef struct
|
||||
uint8_t sum2;
|
||||
} fletcher16_ctx;
|
||||
|
||||
AARU_EXPORT fletcher16_ctx* AARU_CALL fletcher16_init();
|
||||
AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx* ctx, uint16_t* checksum);
|
||||
AARU_EXPORT void AARU_CALL fletcher16_free(fletcher16_ctx* ctx);
|
||||
AARU_EXPORT fletcher16_ctx *AARU_CALL fletcher16_init();
|
||||
AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx *ctx, const uint8_t *data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx *ctx, uint16_t *checksum);
|
||||
AARU_EXPORT void AARU_CALL fletcher16_free(fletcher16_ctx *ctx);
|
||||
|
||||
#endif // AARU_CHECKSUMS_NATIVE_FLETCHER16_H
|
||||
|
||||
82
fletcher32.c
82
fletcher32.c
@@ -37,11 +37,11 @@
|
||||
*
|
||||
* @return Pointer to a structure containing the checksum state.
|
||||
*/
|
||||
AARU_EXPORT fletcher32_ctx* AARU_CALL fletcher32_init()
|
||||
AARU_EXPORT fletcher32_ctx *AARU_CALL fletcher32_init()
|
||||
{
|
||||
fletcher32_ctx* ctx;
|
||||
fletcher32_ctx *ctx;
|
||||
|
||||
ctx = (fletcher32_ctx*)malloc(sizeof(fletcher32_ctx));
|
||||
ctx = (fletcher32_ctx *)malloc(sizeof(fletcher32_ctx));
|
||||
|
||||
if(!ctx) return NULL;
|
||||
|
||||
@@ -62,7 +62,7 @@ AARU_EXPORT fletcher32_ctx* AARU_CALL fletcher32_init()
|
||||
* @param data Pointer to the input data buffer.
|
||||
* @param len The length of the input data buffer.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
@@ -130,43 +130,45 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t*
|
||||
{
|
||||
len -= NMAX;
|
||||
n = NMAX / 16; /* NMAX is divisible by 16 */
|
||||
do {
|
||||
sum1 += (data)[0];
|
||||
do
|
||||
{
|
||||
sum1 += data[0];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 1];
|
||||
sum1 += data[0 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2];
|
||||
sum1 += data[0 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2 + 1];
|
||||
sum1 += data[0 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4];
|
||||
sum1 += data[0 + 4];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 1];
|
||||
sum1 += data[0 + 4 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2];
|
||||
sum1 += data[0 + 4 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2 + 1];
|
||||
sum1 += data[0 + 4 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8];
|
||||
sum1 += data[8];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 1];
|
||||
sum1 += data[8 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 2];
|
||||
sum1 += data[8 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 2 + 1];
|
||||
sum1 += data[8 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4];
|
||||
sum1 += data[8 + 4];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4 + 1];
|
||||
sum1 += data[8 + 4 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4 + 2];
|
||||
sum1 += data[8 + 4 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4 + 2 + 1];
|
||||
sum1 += data[8 + 4 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
|
||||
/* 16 sums unrolled */
|
||||
data += 16;
|
||||
} while(--n);
|
||||
}
|
||||
while(--n);
|
||||
sum1 %= FLETCHER32_MODULE;
|
||||
sum2 %= FLETCHER32_MODULE;
|
||||
}
|
||||
@@ -177,37 +179,37 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t*
|
||||
while(len >= 16)
|
||||
{
|
||||
len -= 16;
|
||||
sum1 += (data)[0];
|
||||
sum1 += data[0];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 1];
|
||||
sum1 += data[0 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2];
|
||||
sum1 += data[0 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 2 + 1];
|
||||
sum1 += data[0 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4];
|
||||
sum1 += data[0 + 4];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 1];
|
||||
sum1 += data[0 + 4 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2];
|
||||
sum1 += data[0 + 4 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[0 + 4 + 2 + 1];
|
||||
sum1 += data[0 + 4 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8];
|
||||
sum1 += data[8];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 1];
|
||||
sum1 += data[8 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 2];
|
||||
sum1 += data[8 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 2 + 1];
|
||||
sum1 += data[8 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4];
|
||||
sum1 += data[8 + 4];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4 + 1];
|
||||
sum1 += data[8 + 4 + 1];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4 + 2];
|
||||
sum1 += data[8 + 4 + 2];
|
||||
sum2 += sum1;
|
||||
sum1 += (data)[8 + 4 + 2 + 1];
|
||||
sum1 += data[8 + 4 + 2 + 1];
|
||||
sum2 += sum1;
|
||||
|
||||
data += 16;
|
||||
@@ -237,7 +239,7 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t*
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx* ctx, uint32_t* checksum)
|
||||
AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx *ctx, uint32_t *checksum)
|
||||
{
|
||||
if(!ctx) return -1;
|
||||
|
||||
@@ -253,7 +255,7 @@ AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx* ctx, uint32_t* checks
|
||||
*
|
||||
* @param ctx The Fletcher-32 checksum context structure, to be freed.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL fletcher32_free(fletcher32_ctx* ctx)
|
||||
AARU_EXPORT void AARU_CALL fletcher32_free(fletcher32_ctx *ctx)
|
||||
{
|
||||
if(!ctx) return;
|
||||
|
||||
|
||||
@@ -37,8 +37,8 @@ AARU_EXPORT void AARU_CALL fletcher32_free(fletcher32_ctx* ctx);
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
|
||||
AARU_EXPORT SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
|
||||
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL fletcher32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
|
||||
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -42,7 +42,9 @@
|
||||
* @param data Pointer to the data buffer.
|
||||
* @param len Length of the data buffer in bytes.
|
||||
*/
|
||||
AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len) {
|
||||
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL
|
||||
fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
|
||||
{
|
||||
uint32_t s1 = *sum1;
|
||||
uint32_t s2 = *sum2;
|
||||
|
||||
@@ -53,10 +55,11 @@ AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2,
|
||||
long blocks = len / BLOCK_SIZE;
|
||||
len -= blocks * BLOCK_SIZE;
|
||||
|
||||
while (blocks) {
|
||||
while(blocks)
|
||||
{
|
||||
unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */
|
||||
|
||||
if (n > blocks) n = (unsigned) blocks;
|
||||
if(n > blocks) n = (unsigned)blocks;
|
||||
blocks -= n;
|
||||
|
||||
const __m256i tap = _mm256_set_epi8(1,
|
||||
@@ -101,11 +104,12 @@ AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2,
|
||||
__m256i v_ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (s1 * n));
|
||||
__m256i v_s2 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s2);
|
||||
__m256i v_s1 = _mm256_setzero_si256();
|
||||
do {
|
||||
do
|
||||
{
|
||||
/*
|
||||
* Load 32 input bytes.
|
||||
*/
|
||||
const __m256i bytes = _mm256_lddqu_si256((__m256i *) (data));
|
||||
const __m256i bytes = _mm256_lddqu_si256((__m256i *)(data));
|
||||
|
||||
/*
|
||||
* Add previous block byte sum to v_ps.
|
||||
@@ -120,7 +124,8 @@ AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2,
|
||||
v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(mad, ones));
|
||||
|
||||
data += BLOCK_SIZE;
|
||||
} while (--n);
|
||||
}
|
||||
while(--n);
|
||||
|
||||
__m128i sum = _mm_add_epi32(_mm256_castsi256_si128(v_s1), _mm256_extracti128_si256(v_s1, 1));
|
||||
__m128i hi = _mm_unpackhi_epi64(sum, sum);
|
||||
@@ -147,8 +152,10 @@ AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2,
|
||||
/*
|
||||
* Handle leftover data.
|
||||
*/
|
||||
if (len) {
|
||||
if (len >= 16) {
|
||||
if(len)
|
||||
{
|
||||
if(len >= 16)
|
||||
{
|
||||
s2 += (s1 += *data++);
|
||||
s2 += (s1 += *data++);
|
||||
s2 += (s1 += *data++);
|
||||
@@ -167,8 +174,9 @@ AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2,
|
||||
s2 += (s1 += *data++);
|
||||
len -= 16;
|
||||
}
|
||||
while (len--) { s2 += (s1 += *data++); }
|
||||
if (s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
|
||||
while(len--)
|
||||
{ s2 += (s1 += *data++); }
|
||||
if(s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
|
||||
s2 %= FLETCHER32_MODULE;
|
||||
}
|
||||
/*
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
#include "fletcher32.h"
|
||||
#include "simd.h"
|
||||
|
||||
TARGET_WITH_SIMD /***/
|
||||
TARGET_WITH_NEON /***/
|
||||
|
||||
/**
|
||||
* @brief Calculate Fletcher-32 checksum for a given data using NEON instructions.
|
||||
@@ -50,7 +50,8 @@ TARGET_WITH_SIMD /***/
|
||||
* @param data Pointer to the data buffer.
|
||||
* @param len Length of the data buffer in bytes.
|
||||
*/
|
||||
void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len) {
|
||||
void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
/*
|
||||
* Split Fletcher-32 into component sums.
|
||||
*/
|
||||
@@ -59,12 +60,14 @@ void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32
|
||||
/*
|
||||
* Serially compute s1 & s2, until the data is 16-byte aligned.
|
||||
*/
|
||||
if ((uintptr_t) data & 15) {
|
||||
while ((uintptr_t) data & 15) {
|
||||
if((uintptr_t)data & 15)
|
||||
{
|
||||
while((uintptr_t)data & 15)
|
||||
{
|
||||
s2 += (s1 += *data++);
|
||||
--len;
|
||||
}
|
||||
if (s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
|
||||
if(s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
|
||||
s2 %= FLETCHER32_MODULE;
|
||||
}
|
||||
/*
|
||||
@@ -73,9 +76,10 @@ void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32
|
||||
const unsigned BLOCK_SIZE = 1 << 5;
|
||||
uint32_t blocks = len / BLOCK_SIZE;
|
||||
len -= blocks * BLOCK_SIZE;
|
||||
while (blocks) {
|
||||
while(blocks)
|
||||
{
|
||||
unsigned n = NMAX / BLOCK_SIZE; /* The NMAX constraint. */
|
||||
if (n > blocks) n = (unsigned) blocks;
|
||||
if(n > blocks) n = (unsigned)blocks;
|
||||
blocks -= n;
|
||||
/*
|
||||
* Process n blocks of data. At most NMAX data bytes can be
|
||||
@@ -85,19 +89,20 @@ void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32
|
||||
uint32x4_t v_s2 = {.n128_u32 = {0, 0, 0, s1 * n}};
|
||||
uint32x4_t v_s1 = {.n128_u32 = {0, 0, 0, 0}};
|
||||
#else
|
||||
uint32x4_t v_s2 = (uint32x4_t) {0, 0, 0, s1 * n};
|
||||
uint32x4_t v_s1 = (uint32x4_t) {0, 0, 0, 0};
|
||||
uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n};
|
||||
uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0};
|
||||
#endif
|
||||
uint16x8_t v_column_sum_1 = vdupq_n_u16(0);
|
||||
uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
|
||||
uint16x8_t v_column_sum_3 = vdupq_n_u16(0);
|
||||
uint16x8_t v_column_sum_4 = vdupq_n_u16(0);
|
||||
do {
|
||||
do
|
||||
{
|
||||
/*
|
||||
* Load 32 input bytes.
|
||||
*/
|
||||
const uint8x16_t bytes1 = vld1q_u8((uint8_t *) (data));
|
||||
const uint8x16_t bytes2 = vld1q_u8((uint8_t *) (data + 16));
|
||||
const uint8x16_t bytes1 = vld1q_u8((uint8_t *)(data));
|
||||
const uint8x16_t bytes2 = vld1q_u8((uint8_t *)(data + 16));
|
||||
/*
|
||||
* Add previous block byte sum to v_s2.
|
||||
*/
|
||||
@@ -114,40 +119,41 @@ void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32
|
||||
v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2));
|
||||
v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
|
||||
data += BLOCK_SIZE;
|
||||
} while (--n);
|
||||
}
|
||||
while(--n);
|
||||
v_s2 = vshlq_n_u32(v_s2, 5);
|
||||
/*
|
||||
* Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _M_ARM64
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]) {32, 31, 30, 29}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]) {28, 27, 26, 25}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {24, 23, 22, 21}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {20, 19, 18, 17}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {16, 15, 14, 13}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {12, 11, 10, 9}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {8, 7, 6, 5}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {4, 3, 2, 1}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){32, 31, 30, 29}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){28, 27, 26, 25}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){24, 23, 22, 21}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){20, 19, 18, 17}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){16, 15, 14, 13}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){12, 11, 10, 9}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){8, 7, 6, 5}));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){4, 3, 2, 1}));
|
||||
#else
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), vld1_u16(((uint16_t[]) {32, 31, 30, 29})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), vld1_u16(((uint16_t[]) {28, 27, 26, 25})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), vld1_u16(((uint16_t[]) {24, 23, 22, 21})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), vld1_u16(((uint16_t[]) {20, 19, 18, 17})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), vld1_u16(((uint16_t[]) {16, 15, 14, 13})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), vld1_u16(((uint16_t[]) {12, 11, 10, 9})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), vld1_u16(((uint16_t[]) {8, 7, 6, 5})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), vld1_u16(((uint16_t[]) {4, 3, 2, 1})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), vld1_u16(((uint16_t[]){32, 31, 30, 29})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), vld1_u16(((uint16_t[]){28, 27, 26, 25})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), vld1_u16(((uint16_t[]){24, 23, 22, 21})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), vld1_u16(((uint16_t[]){20, 19, 18, 17})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), vld1_u16(((uint16_t[]){16, 15, 14, 13})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), vld1_u16(((uint16_t[]){12, 11, 10, 9})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), vld1_u16(((uint16_t[]){8, 7, 6, 5})));
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), vld1_u16(((uint16_t[]){4, 3, 2, 1})));
|
||||
#endif
|
||||
#else
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t) {32, 31, 30, 29});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t) {28, 27, 26, 25});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t) {24, 23, 22, 21});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t) {20, 19, 18, 17});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t) {16, 15, 14, 13});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t) {12, 11, 10, 9});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t) {8, 7, 6, 5});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t) {4, 3, 2, 1});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t){32, 31, 30, 29});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t){28, 27, 26, 25});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t){24, 23, 22, 21});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t){20, 19, 18, 17});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t){16, 15, 14, 13});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t){12, 11, 10, 9});
|
||||
v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t){8, 7, 6, 5});
|
||||
v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t){4, 3, 2, 1});
|
||||
#endif
|
||||
/*
|
||||
* Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
|
||||
@@ -166,8 +172,10 @@ void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32
|
||||
/*
|
||||
* Handle leftover data.
|
||||
*/
|
||||
if (len) {
|
||||
if (len >= 16) {
|
||||
if(len)
|
||||
{
|
||||
if(len >= 16)
|
||||
{
|
||||
s2 += (s1 += *data++);
|
||||
s2 += (s1 += *data++);
|
||||
s2 += (s1 += *data++);
|
||||
@@ -186,8 +194,9 @@ void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32
|
||||
s2 += (s1 += *data++);
|
||||
len -= 16;
|
||||
}
|
||||
while (len--) { s2 += (s1 += *data++); }
|
||||
if (s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
|
||||
while(len--)
|
||||
{ s2 += (s1 += *data++); }
|
||||
if(s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
|
||||
s2 %= FLETCHER32_MODULE;
|
||||
}
|
||||
/*
|
||||
|
||||
@@ -40,16 +40,17 @@
|
||||
#include "fletcher32.h"
|
||||
|
||||
/**
|
||||
* @brief Calculate Fletcher-32 checksum for a given data using SSSE3 instructions.
|
||||
* @brief Calculate Fletcher-32 checksum for a given data using TARGET_WITH_SSSE3 instructions.
|
||||
*
|
||||
* This function calculates the Fletcher-32 checksum for a block of data using SSSE3 vector instructions.
|
||||
* This function calculates the Fletcher-32 checksum for a block of data using TARGET_WITH_SSSE3 vector instructions.
|
||||
*
|
||||
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
|
||||
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
|
||||
* @param data Pointer to the data buffer.
|
||||
* @param len Length of the data buffer in bytes.
|
||||
*/
|
||||
AARU_EXPORT SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
|
||||
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL
|
||||
fletcher32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
|
||||
{
|
||||
uint32_t s1 = *sum1;
|
||||
uint32_t s2 = *sum2;
|
||||
@@ -76,12 +77,13 @@ AARU_EXPORT SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2
|
||||
__m128i v_ps = _mm_set_epi32(0, 0, 0, s1 * n);
|
||||
__m128i v_s2 = _mm_set_epi32(0, 0, 0, s2);
|
||||
__m128i v_s1 = _mm_set_epi32(0, 0, 0, 0);
|
||||
do {
|
||||
do
|
||||
{
|
||||
/*
|
||||
* Load 32 input bytes.
|
||||
*/
|
||||
const __m128i bytes1 = _mm_loadu_si128((__m128i*)(data));
|
||||
const __m128i bytes2 = _mm_loadu_si128((__m128i*)(data + 16));
|
||||
const __m128i bytes1 = _mm_loadu_si128((__m128i *)(data));
|
||||
const __m128i bytes2 = _mm_loadu_si128((__m128i *)(data + 16));
|
||||
/*
|
||||
* Add previous block byte sum to v_ps.
|
||||
*/
|
||||
@@ -97,7 +99,8 @@ AARU_EXPORT SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2
|
||||
const __m128i mad2 = _mm_maddubs_epi16(bytes2, tap2);
|
||||
v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(mad2, ones));
|
||||
data += BLOCK_SIZE;
|
||||
} while(--n);
|
||||
}
|
||||
while(--n);
|
||||
v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5));
|
||||
/*
|
||||
* Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
|
||||
@@ -143,7 +146,8 @@ AARU_EXPORT SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2
|
||||
s2 += (s1 += *data++);
|
||||
len -= 16;
|
||||
}
|
||||
while(len--) { s2 += (s1 += *data++); }
|
||||
while(len--)
|
||||
{ s2 += (s1 += *data++); }
|
||||
if(s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
|
||||
s2 %= FLETCHER32_MODULE;
|
||||
}
|
||||
|
||||
@@ -20,4 +20,5 @@
|
||||
|
||||
#include "library.h"
|
||||
|
||||
AARU_EXPORT uint64_t AARU_CALL get_acn_version() { return AARU_CHECKUMS_NATIVE_VERSION; }
|
||||
AARU_EXPORT uint64_t AARU_CALL get_acn_version()
|
||||
{ return AARU_CHECKUMS_NATIVE_VERSION; }
|
||||
47
simd.c
47
simd.c
@@ -123,15 +123,15 @@ static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned*
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the hardware supports the CLMUL instruction set.
|
||||
* @brief Checks if the hardware supports the TARGET_WITH_CLMUL instruction set.
|
||||
*
|
||||
* The function checks if the system's CPU supports the CLMUL (Carry-Less Multiplication) instruction set.
|
||||
* CLMUL is an extension to the x86 instruction set architecture and provides hardware acceleration for
|
||||
* The function checks if the system's CPU supports the TARGET_WITH_CLMUL (Carry-Less Multiplication) instruction set.
|
||||
* TARGET_WITH_CLMUL is an extension to the x86 instruction set architecture and provides hardware acceleration for
|
||||
* carry-less multiplication operations.
|
||||
*
|
||||
* @return True if CLMUL instruction set is supported, False otherwise.
|
||||
* @return True if TARGET_WITH_CLMUL instruction set is supported, False otherwise.
|
||||
*
|
||||
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=CLMUL
|
||||
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=TARGET_WITH_CLMUL
|
||||
* @see https://en.wikipedia.org/wiki/Carry-less_multiplication
|
||||
*/
|
||||
int have_clmul(void)
|
||||
@@ -148,17 +148,17 @@ int have_clmul(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the current processor supports SSSE3 instructions.
|
||||
* @brief Checks if the current processor supports TARGET_WITH_SSSE3 instructions.
|
||||
*
|
||||
* The function detects whether the current processor supports SSSE3 instructions by
|
||||
* checking the CPU feature flags. SSSE3 (Supplemental Streaming SIMD Extensions 3)
|
||||
* The function detects whether the current processor supports TARGET_WITH_SSSE3 instructions by
|
||||
* checking the CPU feature flags. TARGET_WITH_SSSE3 (Supplemental Streaming SIMD Extensions 3)
|
||||
* is an extension to the x86 instruction set architecture that introduces
|
||||
* additional SIMD instructions useful for multimedia and signal processing tasks.
|
||||
*
|
||||
* @return true if the current processor supports SSSE3 instructions, false otherwise.
|
||||
* @return true if the current processor supports TARGET_WITH_SSSE3 instructions, false otherwise.
|
||||
*
|
||||
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=SSSE3
|
||||
* @see https://en.wikipedia.org/wiki/SSSE3
|
||||
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=TARGET_WITH_SSSE3
|
||||
* @see https://en.wikipedia.org/wiki/TARGET_WITH_SSSE3
|
||||
*/
|
||||
int have_ssse3(void)
|
||||
{
|
||||
@@ -169,16 +169,16 @@ int have_ssse3(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the current processor supports AVX2 instructions.
|
||||
* @brief Checks if the current processor supports TARGET_WITH_AVX2 instructions.
|
||||
*
|
||||
* The function detects whether the current processor supports AVX2 instructions by
|
||||
* checking the CPU feature flags. AVX2 (Advanced Vector Extensions 2) is an extension
|
||||
* The function detects whether the current processor supports TARGET_WITH_AVX2 instructions by
|
||||
* checking the CPU feature flags. TARGET_WITH_AVX2 (Advanced Vector Extensions 2) is an extension
|
||||
* to the x86 instruction set architecture that introduces additional SIMD instructions
|
||||
* useful for multimedia and signal processing tasks.
|
||||
*
|
||||
* @return true if the current processor supports AVX2 instructions, false otherwise.
|
||||
* @return true if the current processor supports TARGET_WITH_AVX2 instructions, false otherwise.
|
||||
*
|
||||
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX2
|
||||
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=TARGET_WITH_AVX2
|
||||
* @see https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||
*/
|
||||
|
||||
@@ -193,17 +193,24 @@ int have_avx2(void)
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
#if defined(_WIN32)
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#include <processthreadsapi.h>
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__)
|
||||
|
||||
/**
|
||||
* @brief Checks if the current processor supports NEON instructions.
|
||||
*
|
||||
@@ -257,10 +264,13 @@ int have_crc32_apple()
|
||||
*
|
||||
* @return true if the current processor supports cryptographic instructions, false otherwise.
|
||||
*/
|
||||
int have_crypto_apple() { return 0; }
|
||||
int have_crypto_apple()
|
||||
{ return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
int have_neon(void)
|
||||
{
|
||||
return 1; // ARMv8-A made it mandatory
|
||||
@@ -305,9 +315,11 @@ int have_arm_crypto(void)
|
||||
return getauxval(AT_HWCAP) & HWCAP_AES;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
/**
|
||||
* @brief Checks if the current processor supports NEON instructions.
|
||||
*
|
||||
@@ -377,4 +389,5 @@ int have_arm_crypto(void)
|
||||
return getauxval(AT_HWCAP2) & HWCAP2_AES;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
20
simd.h
20
simd.h
@@ -29,13 +29,13 @@
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define AVX2
|
||||
#define SSSE3
|
||||
#define CLMUL
|
||||
#define TARGET_WITH_AVX2
|
||||
#define TARGET_WITH_SSSE3
|
||||
#define TARGET_WITH_CLMUL
|
||||
#else
|
||||
#define AVX2 __attribute__((target("avx2")))
|
||||
#define SSSE3 __attribute__((target("ssse3")))
|
||||
#define CLMUL __attribute__((target("pclmul,sse4.1")))
|
||||
#define TARGET_WITH_AVX2 __attribute__((target("avx2")))
|
||||
#define TARGET_WITH_SSSE3 __attribute__((target("ssse3")))
|
||||
#define TARGET_WITH_CLMUL __attribute__((target("pclmul,sse4.1")))
|
||||
#endif
|
||||
|
||||
AARU_EXPORT int have_clmul(void);
|
||||
@@ -71,7 +71,7 @@ AARU_EXPORT int have_arm_crypto(void);
|
||||
|
||||
#define TARGET_ARMV8_WITH_CRC
|
||||
#define TARGET_WITH_CRYPTO
|
||||
#define TARGET_WITH_SIMD
|
||||
#define TARGET_WITH_NEON
|
||||
|
||||
#else // _MSC_VER
|
||||
|
||||
@@ -89,7 +89,7 @@ AARU_EXPORT int have_arm_crypto(void);
|
||||
#define TARGET_WITH_CRYPTO __attribute__((target("+crypto")))
|
||||
#endif
|
||||
|
||||
#define TARGET_WITH_SIMD
|
||||
#define TARGET_WITH_NEON
|
||||
#else
|
||||
|
||||
#if (__ARM_ARCH >= 7 || defined (__ARM_ARCH_8A))
|
||||
@@ -109,9 +109,9 @@ AARU_EXPORT int have_arm_crypto(void);
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
#define TARGET_WITH_SIMD __attribute__((target("neon")))
|
||||
#define TARGET_WITH_NEON __attribute__((target("neon")))
|
||||
#else
|
||||
#define TARGET_WITH_SIMD __attribute__((target("fpu=neon")))
|
||||
#define TARGET_WITH_NEON __attribute__((target("fpu=neon")))
|
||||
#endif
|
||||
|
||||
#endif // __aarch64__ || _M_ARM64
|
||||
|
||||
23
spamsum.c
23
spamsum.c
@@ -42,9 +42,9 @@ static uint8_t b64[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x
|
||||
*
|
||||
* @return Pointer to a structure containing the checksum state.
|
||||
*/
|
||||
AARU_EXPORT spamsum_ctx* AARU_CALL spamsum_init(void)
|
||||
AARU_EXPORT spamsum_ctx *AARU_CALL spamsum_init(void)
|
||||
{
|
||||
spamsum_ctx* ctx = (spamsum_ctx*)malloc(sizeof(spamsum_ctx));
|
||||
spamsum_ctx *ctx = (spamsum_ctx *)malloc(sizeof(spamsum_ctx));
|
||||
if(!ctx) return NULL;
|
||||
|
||||
memset(ctx, 0, sizeof(spamsum_ctx));
|
||||
@@ -67,7 +67,7 @@ AARU_EXPORT spamsum_ctx* AARU_CALL spamsum_init(void)
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx *ctx, const uint8_t *data, uint32_t len)
|
||||
{
|
||||
int i;
|
||||
if(!ctx || !data) return -1;
|
||||
@@ -87,7 +87,7 @@ AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx* ctx, const uint8_t* data,
|
||||
*
|
||||
* @param ctx The SpamSum checksum context structure, to be freed.
|
||||
*/
|
||||
AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx* ctx)
|
||||
AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx *ctx)
|
||||
{
|
||||
if(ctx) free(ctx);
|
||||
}
|
||||
@@ -96,7 +96,7 @@ AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx* ctx)
|
||||
#define SUM_HASH(c, h) (((h)*HASH_PRIME) ^ (c));
|
||||
#define SSDEEP_BS(index) (MIN_BLOCKSIZE << (index))
|
||||
|
||||
AARU_LOCAL inline void fuzzy_engine_step(spamsum_ctx* ctx, uint8_t c)
|
||||
FORCE_INLINE void fuzzy_engine_step(spamsum_ctx *ctx, uint8_t c)
|
||||
{
|
||||
uint32_t i;
|
||||
/* At each character we update the rolling hash and the normal hashes.
|
||||
@@ -149,7 +149,7 @@ AARU_LOCAL inline void fuzzy_engine_step(spamsum_ctx* ctx, uint8_t c)
|
||||
}
|
||||
}
|
||||
|
||||
AARU_LOCAL inline void roll_hash(spamsum_ctx* ctx, uint8_t c)
|
||||
FORCE_INLINE void roll_hash(spamsum_ctx *ctx, uint8_t c)
|
||||
{
|
||||
ctx->roll.h2 -= ctx->roll.h1;
|
||||
ctx->roll.h2 += ROLLING_WINDOW * c;
|
||||
@@ -167,7 +167,7 @@ AARU_LOCAL inline void roll_hash(spamsum_ctx* ctx, uint8_t c)
|
||||
ctx->roll.h3 ^= c;
|
||||
}
|
||||
|
||||
AARU_LOCAL inline void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx)
|
||||
FORCE_INLINE void fuzzy_try_reduce_blockhash(spamsum_ctx *ctx)
|
||||
{
|
||||
// assert(ctx->bh_start < ctx->bh_end);
|
||||
|
||||
@@ -187,7 +187,7 @@ AARU_LOCAL inline void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx)
|
||||
++ctx->bh_start;
|
||||
}
|
||||
|
||||
AARU_LOCAL inline void fuzzy_try_fork_blockhash(spamsum_ctx* ctx)
|
||||
FORCE_INLINE void fuzzy_try_fork_blockhash(spamsum_ctx *ctx)
|
||||
{
|
||||
if(ctx->bh_end >= NUM_BLOCKHASHES) return;
|
||||
|
||||
@@ -214,7 +214,7 @@ AARU_LOCAL inline void fuzzy_try_fork_blockhash(spamsum_ctx* ctx)
|
||||
*
|
||||
* @returns 0 on success, -1 on error.
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
|
||||
AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx *ctx, uint8_t *result)
|
||||
{
|
||||
uint32_t bi = ctx->bh_start;
|
||||
uint32_t h = ROLL_SUM(ctx);
|
||||
@@ -244,7 +244,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
|
||||
|
||||
// assert(!(bi > 0 && ctx->bh[bi].d_len < SPAMSUM_LENGTH / 2));
|
||||
|
||||
int i = snprintf((char*)result, (size_t)remain, "%lu:", (unsigned long)SSDEEP_BS(bi));
|
||||
int i = snprintf((char *)result, (size_t)remain, "%lu:", (unsigned long)SSDEEP_BS(bi));
|
||||
|
||||
if(i <= 0) /* Maybe snprintf has set errno here? */
|
||||
return -1;
|
||||
@@ -297,8 +297,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
|
||||
++bi;
|
||||
i = (int)ctx->bh[bi].d_len;
|
||||
|
||||
if(i <= remain)
|
||||
;
|
||||
if(i <= remain);
|
||||
|
||||
memcpy(result, ctx->bh[bi].digest, (size_t)i);
|
||||
result += i;
|
||||
|
||||
19
spamsum.h
19
spamsum.h
@@ -54,14 +54,17 @@ typedef struct
|
||||
roll_state roll;
|
||||
} spamsum_ctx;
|
||||
|
||||
AARU_EXPORT spamsum_ctx* AARU_CALL spamsum_init(void);
|
||||
AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx* ctx, const uint8_t* data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result);
|
||||
AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx* ctx);
|
||||
AARU_EXPORT spamsum_ctx *AARU_CALL spamsum_init(void);
|
||||
AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx *ctx, const uint8_t *data, uint32_t len);
|
||||
AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx *ctx, uint8_t *result);
|
||||
AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx *ctx);
|
||||
|
||||
AARU_LOCAL void fuzzy_engine_step(spamsum_ctx* ctx, uint8_t c);
|
||||
AARU_LOCAL void roll_hash(spamsum_ctx* ctx, uint8_t c);
|
||||
AARU_LOCAL void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx);
|
||||
AARU_LOCAL void fuzzy_try_fork_blockhash(spamsum_ctx* ctx);
|
||||
FORCE_INLINE void fuzzy_engine_step(spamsum_ctx *ctx, uint8_t c);
|
||||
|
||||
FORCE_INLINE void roll_hash(spamsum_ctx *ctx, uint8_t c);
|
||||
|
||||
FORCE_INLINE void fuzzy_try_reduce_blockhash(spamsum_ctx *ctx);
|
||||
|
||||
FORCE_INLINE void fuzzy_try_fork_blockhash(spamsum_ctx *ctx);
|
||||
|
||||
#endif // AARU_CHECKSUMS_NATIVE_SPAMSUM_H
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_ADLER32_63BYTES 0xD8AC2081
|
||||
#define EXPECTED_ADLER32_2352BYTES 0xECD1738B
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class adler32Fixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
adler32Fixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class adler32Fixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~adler32Fixture()
|
||||
@@ -61,7 +62,7 @@ class adler32Fixture : public ::testing::Test
|
||||
|
||||
TEST_F(adler32Fixture, adler32_auto)
|
||||
{
|
||||
adler32_ctx* ctx = adler32_init();
|
||||
adler32_ctx *ctx = adler32_init();
|
||||
uint32_t adler32;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -90,12 +91,12 @@ TEST_F(adler32Fixture, adler32_slicing)
|
||||
|
||||
TEST_F(adler32Fixture, adler32_auto_misaligned)
|
||||
{
|
||||
adler32_ctx* ctx = adler32_init();
|
||||
adler32_ctx *ctx = adler32_init();
|
||||
uint32_t adler32;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
adler32_update(ctx, buffer_misaligned+1, 1048576);
|
||||
adler32_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
adler32_final(ctx, &adler32);
|
||||
|
||||
EXPECT_EQ(adler32, EXPECTED_ADLER32);
|
||||
@@ -110,7 +111,7 @@ TEST_F(adler32Fixture, adler32_slicing_misaligned)
|
||||
sum1 = 1;
|
||||
sum2 = 0;
|
||||
|
||||
adler32_slicing(&sum1, &sum2, buffer_misaligned+1, 1048576);
|
||||
adler32_slicing(&sum1, &sum2, buffer_misaligned + 1, 1048576);
|
||||
|
||||
adler32 = (sum2 << 16) | sum1;
|
||||
|
||||
@@ -119,7 +120,7 @@ TEST_F(adler32Fixture, adler32_slicing_misaligned)
|
||||
|
||||
TEST_F(adler32Fixture, adler32_auto_15bytes)
|
||||
{
|
||||
adler32_ctx* ctx = adler32_init();
|
||||
adler32_ctx *ctx = adler32_init();
|
||||
uint32_t adler32;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -148,7 +149,7 @@ TEST_F(adler32Fixture, adler32_slicing_15bytes)
|
||||
|
||||
TEST_F(adler32Fixture, adler32_auto_31bytes)
|
||||
{
|
||||
adler32_ctx* ctx = adler32_init();
|
||||
adler32_ctx *ctx = adler32_init();
|
||||
uint32_t adler32;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -177,7 +178,7 @@ TEST_F(adler32Fixture, adler32_slicing_31bytes)
|
||||
|
||||
TEST_F(adler32Fixture, adler32_auto_63bytes)
|
||||
{
|
||||
adler32_ctx* ctx = adler32_init();
|
||||
adler32_ctx *ctx = adler32_init();
|
||||
uint32_t adler32;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -206,7 +207,7 @@ TEST_F(adler32Fixture, adler32_slicing_63bytes)
|
||||
|
||||
TEST_F(adler32Fixture, adler32_auto_2352bytes)
|
||||
{
|
||||
adler32_ctx* ctx = adler32_init();
|
||||
adler32_ctx *ctx = adler32_init();
|
||||
uint32_t adler32;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -263,7 +264,7 @@ TEST_F(adler32Fixture, adler32_neon_misaligned)
|
||||
sum1 = 1;
|
||||
sum2 = 0;
|
||||
|
||||
adler32_neon(&sum1, &sum2, buffer_misaligned+1, 1048576);
|
||||
adler32_neon(&sum1, &sum2, buffer_misaligned + 1, 1048576);
|
||||
|
||||
adler32 = (sum2 << 16) | sum1;
|
||||
|
||||
@@ -341,6 +342,7 @@ TEST_F(adler32Fixture, adler32_neon_2352bytes)
|
||||
|
||||
EXPECT_EQ(adler32, EXPECTED_ADLER32_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
@@ -393,7 +395,7 @@ TEST_F(adler32Fixture, adler32_avx2_misaligned)
|
||||
sum1 = 1;
|
||||
sum2 = 0;
|
||||
|
||||
adler32_avx2(&sum1, &sum2, buffer_misaligned+1, 1048576);
|
||||
adler32_avx2(&sum1, &sum2, buffer_misaligned + 1, 1048576);
|
||||
|
||||
adler32 = (sum2 << 16) | sum1;
|
||||
|
||||
@@ -411,7 +413,7 @@ TEST_F(adler32Fixture, adler32_ssse3_misaligned)
|
||||
sum1 = 1;
|
||||
sum2 = 0;
|
||||
|
||||
adler32_ssse3(&sum1, &sum2, buffer_misaligned+1, 1048576);
|
||||
adler32_ssse3(&sum1, &sum2, buffer_misaligned + 1, 1048576);
|
||||
|
||||
adler32 = (sum2 << 16) | sum1;
|
||||
|
||||
@@ -561,4 +563,5 @@ TEST_F(adler32Fixture, adler32_ssse3_2352bytes)
|
||||
|
||||
EXPECT_EQ(adler32, EXPECTED_ADLER32_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_CRC16_63BYTES 0xFBD9
|
||||
#define EXPECTED_CRC16_2352BYTES 0x23F4
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class crc16Fixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
crc16Fixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class crc16Fixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~crc16Fixture()
|
||||
@@ -61,7 +62,7 @@ class crc16Fixture : public ::testing::Test
|
||||
|
||||
TEST_F(crc16Fixture, crc16_auto)
|
||||
{
|
||||
crc16_ctx* ctx = crc16_init();
|
||||
crc16_ctx *ctx = crc16_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -74,12 +75,12 @@ TEST_F(crc16Fixture, crc16_auto)
|
||||
|
||||
TEST_F(crc16Fixture, crc16_auto_misaligned)
|
||||
{
|
||||
crc16_ctx* ctx = crc16_init();
|
||||
crc16_ctx *ctx = crc16_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
crc16_update(ctx, buffer_misaligned+1, 1048576);
|
||||
crc16_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
crc16_final(ctx, &crc);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC16);
|
||||
@@ -87,7 +88,7 @@ TEST_F(crc16Fixture, crc16_auto_misaligned)
|
||||
|
||||
TEST_F(crc16Fixture, crc16_auto_15bytes)
|
||||
{
|
||||
crc16_ctx* ctx = crc16_init();
|
||||
crc16_ctx *ctx = crc16_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -100,7 +101,7 @@ TEST_F(crc16Fixture, crc16_auto_15bytes)
|
||||
|
||||
TEST_F(crc16Fixture, crc16_auto_31bytes)
|
||||
{
|
||||
crc16_ctx* ctx = crc16_init();
|
||||
crc16_ctx *ctx = crc16_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -113,7 +114,7 @@ TEST_F(crc16Fixture, crc16_auto_31bytes)
|
||||
|
||||
TEST_F(crc16Fixture, crc16_auto_63bytes)
|
||||
{
|
||||
crc16_ctx* ctx = crc16_init();
|
||||
crc16_ctx *ctx = crc16_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -126,7 +127,7 @@ TEST_F(crc16Fixture, crc16_auto_63bytes)
|
||||
|
||||
TEST_F(crc16Fixture, crc16_auto_2352bytes)
|
||||
{
|
||||
crc16_ctx* ctx = crc16_init();
|
||||
crc16_ctx *ctx = crc16_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_CRC16_CCITT_63BYTES 0x73c4
|
||||
#define EXPECTED_CRC16_CCITT_2352BYTES 0x1946
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class crc16_ccittFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
crc16_ccittFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class crc16_ccittFixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~crc16_ccittFixture()
|
||||
@@ -61,7 +62,7 @@ class crc16_ccittFixture : public ::testing::Test
|
||||
|
||||
TEST_F(crc16_ccittFixture, crc16_ccitt_auto)
|
||||
{
|
||||
crc16_ccitt_ctx* ctx = crc16_ccitt_init();
|
||||
crc16_ccitt_ctx *ctx = crc16_ccitt_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -74,12 +75,12 @@ TEST_F(crc16_ccittFixture, crc16_ccitt_auto)
|
||||
|
||||
TEST_F(crc16_ccittFixture, crc16_ccitt_auto_misaligned)
|
||||
{
|
||||
crc16_ccitt_ctx* ctx = crc16_ccitt_init();
|
||||
crc16_ccitt_ctx *ctx = crc16_ccitt_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
crc16_ccitt_update(ctx, buffer_misaligned+1, 1048576);
|
||||
crc16_ccitt_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
crc16_ccitt_final(ctx, &crc);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC16_CCITT);
|
||||
@@ -87,7 +88,7 @@ TEST_F(crc16_ccittFixture, crc16_ccitt_auto_misaligned)
|
||||
|
||||
TEST_F(crc16_ccittFixture, crc16_ccitt_auto_15bytes)
|
||||
{
|
||||
crc16_ccitt_ctx* ctx = crc16_ccitt_init();
|
||||
crc16_ccitt_ctx *ctx = crc16_ccitt_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -100,7 +101,7 @@ TEST_F(crc16_ccittFixture, crc16_ccitt_auto_15bytes)
|
||||
|
||||
TEST_F(crc16_ccittFixture, crc16_ccitt_auto_31bytes)
|
||||
{
|
||||
crc16_ccitt_ctx* ctx = crc16_ccitt_init();
|
||||
crc16_ccitt_ctx *ctx = crc16_ccitt_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -113,7 +114,7 @@ TEST_F(crc16_ccittFixture, crc16_ccitt_auto_31bytes)
|
||||
|
||||
TEST_F(crc16_ccittFixture, crc16_ccitt_auto_63bytes)
|
||||
{
|
||||
crc16_ccitt_ctx* ctx = crc16_ccitt_init();
|
||||
crc16_ccitt_ctx *ctx = crc16_ccitt_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -126,7 +127,7 @@ TEST_F(crc16_ccittFixture, crc16_ccitt_auto_63bytes)
|
||||
|
||||
TEST_F(crc16_ccittFixture, crc16_ccitt_auto_2352bytes)
|
||||
{
|
||||
crc16_ccitt_ctx* ctx = crc16_ccitt_init();
|
||||
crc16_ccitt_ctx *ctx = crc16_ccitt_init();
|
||||
uint16_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_CRC32_63BYTES 0xbff6a341
|
||||
#define EXPECTED_CRC32_2352BYTES 0x08ba93ea
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class crc32Fixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
crc32Fixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class crc32Fixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~crc32Fixture()
|
||||
@@ -61,7 +62,7 @@ class crc32Fixture : public ::testing::Test
|
||||
|
||||
TEST_F(crc32Fixture, crc32_auto)
|
||||
{
|
||||
crc32_ctx* ctx = crc32_init();
|
||||
crc32_ctx *ctx = crc32_init();
|
||||
uint32_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -85,12 +86,12 @@ TEST_F(crc32Fixture, crc32_slicing)
|
||||
|
||||
TEST_F(crc32Fixture, crc32_auto_misaligned)
|
||||
{
|
||||
crc32_ctx* ctx = crc32_init();
|
||||
crc32_ctx *ctx = crc32_init();
|
||||
uint32_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
crc32_update(ctx, buffer_misaligned+1, 1048576);
|
||||
crc32_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
crc32_final(ctx, &crc);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
@@ -100,7 +101,7 @@ TEST_F(crc32Fixture, crc32_slicing_misaligned)
|
||||
{
|
||||
uint32_t crc = CRC32_ISO_SEED;
|
||||
|
||||
crc32_slicing(&crc, buffer_misaligned+1, 1048576);
|
||||
crc32_slicing(&crc, buffer_misaligned + 1, 1048576);
|
||||
|
||||
crc ^= CRC32_ISO_SEED;
|
||||
|
||||
@@ -109,7 +110,7 @@ TEST_F(crc32Fixture, crc32_slicing_misaligned)
|
||||
|
||||
TEST_F(crc32Fixture, crc32_auto_15bytes)
|
||||
{
|
||||
crc32_ctx* ctx = crc32_init();
|
||||
crc32_ctx *ctx = crc32_init();
|
||||
uint32_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -133,7 +134,7 @@ TEST_F(crc32Fixture, crc32_slicing_15bytes)
|
||||
|
||||
TEST_F(crc32Fixture, crc32_auto_31bytes)
|
||||
{
|
||||
crc32_ctx* ctx = crc32_init();
|
||||
crc32_ctx *ctx = crc32_init();
|
||||
uint32_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -157,7 +158,7 @@ TEST_F(crc32Fixture, crc32_slicing_31bytes)
|
||||
|
||||
TEST_F(crc32Fixture, crc32_auto_63bytes)
|
||||
{
|
||||
crc32_ctx* ctx = crc32_init();
|
||||
crc32_ctx *ctx = crc32_init();
|
||||
uint32_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -181,7 +182,7 @@ TEST_F(crc32Fixture, crc32_slicing_63bytes)
|
||||
|
||||
TEST_F(crc32Fixture, crc32_auto_2352bytes)
|
||||
{
|
||||
crc32_ctx* ctx = crc32_init();
|
||||
crc32_ctx *ctx = crc32_init();
|
||||
uint32_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -282,6 +283,7 @@ TEST_F(crc32Fixture, crc32_clmul_2352bytes)
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
@@ -305,7 +307,7 @@ TEST_F(crc32Fixture, crc32_arm_crc32_misaligned)
|
||||
|
||||
uint32_t crc = CRC32_ISO_SEED;
|
||||
|
||||
crc = armv8_crc32_little(crc, buffer_misaligned+1, 1048576);
|
||||
crc = armv8_crc32_little(crc, buffer_misaligned + 1, 1048576);
|
||||
|
||||
crc ^= CRC32_ISO_SEED;
|
||||
|
||||
@@ -363,6 +365,7 @@ TEST_F(crc32Fixture, crc32_arm_crc32_2352bytes)
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
TEST_F(crc32Fixture, crc32_vmull)
|
||||
@@ -442,4 +445,5 @@ TEST_F(crc32Fixture, crc32_vmull_2352bytes)
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_CRC64_63BYTES 0x29F331FC90702BF4
|
||||
#define EXPECTED_CRC64_2352BYTES 0x126435DB43477623
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class crc64Fixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
crc64Fixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class crc64Fixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~crc64Fixture()
|
||||
@@ -61,7 +62,7 @@ class crc64Fixture : public ::testing::Test
|
||||
|
||||
TEST_F(crc64Fixture, crc64_auto)
|
||||
{
|
||||
crc64_ctx* ctx = crc64_init();
|
||||
crc64_ctx *ctx = crc64_init();
|
||||
uint64_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -85,12 +86,12 @@ TEST_F(crc64Fixture, crc64_slicing)
|
||||
|
||||
TEST_F(crc64Fixture, crc64_auto_misaligned)
|
||||
{
|
||||
crc64_ctx* ctx = crc64_init();
|
||||
crc64_ctx *ctx = crc64_init();
|
||||
uint64_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
crc64_update(ctx, buffer_misaligned+1, 1048576);
|
||||
crc64_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
crc64_final(ctx, &crc);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC64);
|
||||
@@ -100,7 +101,7 @@ TEST_F(crc64Fixture, crc64_slicing_misaligned)
|
||||
{
|
||||
uint64_t crc = CRC64_ECMA_SEED;
|
||||
|
||||
crc64_slicing(&crc, buffer_misaligned+1, 1048576);
|
||||
crc64_slicing(&crc, buffer_misaligned + 1, 1048576);
|
||||
|
||||
crc ^= CRC64_ECMA_SEED;
|
||||
|
||||
@@ -109,7 +110,7 @@ TEST_F(crc64Fixture, crc64_slicing_misaligned)
|
||||
|
||||
TEST_F(crc64Fixture, crc64_auto_15bytes)
|
||||
{
|
||||
crc64_ctx* ctx = crc64_init();
|
||||
crc64_ctx *ctx = crc64_init();
|
||||
uint64_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -133,7 +134,7 @@ TEST_F(crc64Fixture, crc64_slicing_15bytes)
|
||||
|
||||
TEST_F(crc64Fixture, crc64_auto_31bytes)
|
||||
{
|
||||
crc64_ctx* ctx = crc64_init();
|
||||
crc64_ctx *ctx = crc64_init();
|
||||
uint64_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -157,7 +158,7 @@ TEST_F(crc64Fixture, crc64_slicing_31bytes)
|
||||
|
||||
TEST_F(crc64Fixture, crc64_auto_63bytes)
|
||||
{
|
||||
crc64_ctx* ctx = crc64_init();
|
||||
crc64_ctx *ctx = crc64_init();
|
||||
uint64_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -181,7 +182,7 @@ TEST_F(crc64Fixture, crc64_slicing_63bytes)
|
||||
|
||||
TEST_F(crc64Fixture, crc64_auto_2352bytes)
|
||||
{
|
||||
crc64_ctx* ctx = crc64_init();
|
||||
crc64_ctx *ctx = crc64_init();
|
||||
uint64_t crc;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -224,7 +225,7 @@ TEST_F(crc64Fixture, crc64_clmul_misaligned)
|
||||
|
||||
uint64_t crc = CRC64_ECMA_SEED;
|
||||
|
||||
crc = ~crc64_clmul(~crc, buffer_misaligned+1, 1048576);
|
||||
crc = ~crc64_clmul(~crc, buffer_misaligned + 1, 1048576);
|
||||
|
||||
crc ^= CRC64_ECMA_SEED;
|
||||
|
||||
@@ -282,6 +283,7 @@ TEST_F(crc64Fixture, crc64_clmul_2352bytes)
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC64_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
@@ -304,7 +306,7 @@ TEST_F(crc64Fixture, crc64_vmull_misaligned)
|
||||
|
||||
uint64_t crc = CRC64_ECMA_SEED;
|
||||
|
||||
crc = ~crc64_vmull(~crc, buffer_misaligned+1, 1048576);
|
||||
crc = ~crc64_vmull(~crc, buffer_misaligned + 1, 1048576);
|
||||
|
||||
crc ^= CRC64_ECMA_SEED;
|
||||
|
||||
@@ -362,4 +364,5 @@ TEST_F(crc64Fixture, crc64_vmull_2352bytes)
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC64_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_FLETCHER16_63BYTES 0x1CA0
|
||||
#define EXPECTED_FLETCHER16_2352BYTES 0x0AC5
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class fletcher16Fixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
fletcher16Fixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class fletcher16Fixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~fletcher16Fixture()
|
||||
@@ -61,7 +62,7 @@ class fletcher16Fixture : public ::testing::Test
|
||||
|
||||
TEST_F(fletcher16Fixture, fletcher16_auto)
|
||||
{
|
||||
fletcher16_ctx* ctx = fletcher16_init();
|
||||
fletcher16_ctx *ctx = fletcher16_init();
|
||||
uint16_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -74,12 +75,12 @@ TEST_F(fletcher16Fixture, fletcher16_auto)
|
||||
|
||||
TEST_F(fletcher16Fixture, fletcher16_auto_misaligned)
|
||||
{
|
||||
fletcher16_ctx* ctx = fletcher16_init();
|
||||
fletcher16_ctx *ctx = fletcher16_init();
|
||||
uint16_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
fletcher16_update(ctx, buffer_misaligned+1, 1048576);
|
||||
fletcher16_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
fletcher16_final(ctx, &fletcher);
|
||||
|
||||
EXPECT_EQ(fletcher, EXPECTED_FLETCHER16);
|
||||
@@ -87,7 +88,7 @@ TEST_F(fletcher16Fixture, fletcher16_auto_misaligned)
|
||||
|
||||
TEST_F(fletcher16Fixture, fletcher16_auto_15bytes)
|
||||
{
|
||||
fletcher16_ctx* ctx = fletcher16_init();
|
||||
fletcher16_ctx *ctx = fletcher16_init();
|
||||
uint16_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -100,7 +101,7 @@ TEST_F(fletcher16Fixture, fletcher16_auto_15bytes)
|
||||
|
||||
TEST_F(fletcher16Fixture, fletcher16_auto_31bytes)
|
||||
{
|
||||
fletcher16_ctx* ctx = fletcher16_init();
|
||||
fletcher16_ctx *ctx = fletcher16_init();
|
||||
uint16_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -113,7 +114,7 @@ TEST_F(fletcher16Fixture, fletcher16_auto_31bytes)
|
||||
|
||||
TEST_F(fletcher16Fixture, fletcher16_auto_63bytes)
|
||||
{
|
||||
fletcher16_ctx* ctx = fletcher16_init();
|
||||
fletcher16_ctx *ctx = fletcher16_init();
|
||||
uint16_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -126,7 +127,7 @@ TEST_F(fletcher16Fixture, fletcher16_auto_63bytes)
|
||||
|
||||
TEST_F(fletcher16Fixture, fletcher16_auto_2352bytes)
|
||||
{
|
||||
fletcher16_ctx* ctx = fletcher16_init();
|
||||
fletcher16_ctx *ctx = fletcher16_init();
|
||||
uint16_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_FLETCHER32_63BYTES 0xD8432080
|
||||
#define EXPECTED_FLETCHER32_2352BYTES 0xCB3E7352
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class fletcher32Fixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
fletcher32Fixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class fletcher32Fixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~fletcher32Fixture()
|
||||
@@ -61,7 +62,7 @@ class fletcher32Fixture : public ::testing::Test
|
||||
|
||||
TEST_F(fletcher32Fixture, fletcher32_auto)
|
||||
{
|
||||
fletcher32_ctx* ctx = fletcher32_init();
|
||||
fletcher32_ctx *ctx = fletcher32_init();
|
||||
uint32_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -74,12 +75,12 @@ TEST_F(fletcher32Fixture, fletcher32_auto)
|
||||
|
||||
TEST_F(fletcher32Fixture, fletcher32_auto_misaligned)
|
||||
{
|
||||
fletcher32_ctx* ctx = fletcher32_init();
|
||||
fletcher32_ctx *ctx = fletcher32_init();
|
||||
uint32_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
|
||||
fletcher32_update(ctx, buffer_misaligned+1, 1048576);
|
||||
fletcher32_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
fletcher32_final(ctx, &fletcher);
|
||||
|
||||
EXPECT_EQ(fletcher, EXPECTED_FLETCHER32);
|
||||
@@ -87,7 +88,7 @@ TEST_F(fletcher32Fixture, fletcher32_auto_misaligned)
|
||||
|
||||
TEST_F(fletcher32Fixture, fletcher32_auto_15bytes)
|
||||
{
|
||||
fletcher32_ctx* ctx = fletcher32_init();
|
||||
fletcher32_ctx *ctx = fletcher32_init();
|
||||
uint32_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -100,7 +101,7 @@ TEST_F(fletcher32Fixture, fletcher32_auto_15bytes)
|
||||
|
||||
TEST_F(fletcher32Fixture, fletcher32_auto_31bytes)
|
||||
{
|
||||
fletcher32_ctx* ctx = fletcher32_init();
|
||||
fletcher32_ctx *ctx = fletcher32_init();
|
||||
uint32_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -113,7 +114,7 @@ TEST_F(fletcher32Fixture, fletcher32_auto_31bytes)
|
||||
|
||||
TEST_F(fletcher32Fixture, fletcher32_auto_63bytes)
|
||||
{
|
||||
fletcher32_ctx* ctx = fletcher32_init();
|
||||
fletcher32_ctx *ctx = fletcher32_init();
|
||||
uint32_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -126,7 +127,7 @@ TEST_F(fletcher32Fixture, fletcher32_auto_63bytes)
|
||||
|
||||
TEST_F(fletcher32Fixture, fletcher32_auto_2352bytes)
|
||||
{
|
||||
fletcher32_ctx* ctx = fletcher32_init();
|
||||
fletcher32_ctx *ctx = fletcher32_init();
|
||||
uint32_t fletcher;
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
@@ -167,7 +168,7 @@ TEST_F(fletcher32Fixture, fletcher32_neon_misaligned)
|
||||
sum1 = 0xFFFF;
|
||||
sum2 = 0xFFFF;
|
||||
|
||||
fletcher32_neon(&sum1, &sum2, buffer_misaligned+1, 1048576);
|
||||
fletcher32_neon(&sum1, &sum2, buffer_misaligned + 1, 1048576);
|
||||
|
||||
fletcher32 = (sum2 << 16) | sum1;
|
||||
|
||||
@@ -245,6 +246,7 @@ TEST_F(fletcher32Fixture, fletcher32_neon_2352bytes)
|
||||
|
||||
EXPECT_EQ(fletcher32, EXPECTED_FLETCHER32_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
@@ -297,7 +299,7 @@ TEST_F(fletcher32Fixture, fletcher32_avx2_misaligned)
|
||||
sum1 = 0xFFFF;
|
||||
sum2 = 0xFFFF;
|
||||
|
||||
fletcher32_avx2(&sum1, &sum2, buffer_misaligned+1, 1048576);
|
||||
fletcher32_avx2(&sum1, &sum2, buffer_misaligned + 1, 1048576);
|
||||
|
||||
fletcher32 = (sum2 << 16) | sum1;
|
||||
|
||||
@@ -315,7 +317,7 @@ TEST_F(fletcher32Fixture, fletcher32_ssse3_misaligned)
|
||||
sum1 = 0xFFFF;
|
||||
sum2 = 0xFFFF;
|
||||
|
||||
fletcher32_ssse3(&sum1, &sum2, buffer_misaligned+1, 1048576);
|
||||
fletcher32_ssse3(&sum1, &sum2, buffer_misaligned + 1, 1048576);
|
||||
|
||||
fletcher32 = (sum2 << 16) | sum1;
|
||||
|
||||
@@ -465,4 +467,5 @@ TEST_F(fletcher32Fixture, fletcher32_ssse3_2352bytes)
|
||||
|
||||
EXPECT_EQ(fletcher32, EXPECTED_FLETCHER32_2352BYTES);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -16,19 +16,19 @@
|
||||
#define EXPECTED_SPAMSUM_63BYTES "3:Ac4E9E5+S09q2kABV9:Ac4E9EgSs7kW9"
|
||||
#define EXPECTED_SPAMSUM_2352BYTES "48:pasCLoANDXmjCz1p2OpPm+Gek3xmZfJJ5DD4BacmmlodQMQa/58Z:csK1Nxz7XFGeJS/flHMQu2Z"
|
||||
|
||||
static const uint8_t* buffer;
|
||||
static const uint8_t* buffer_misaligned;
|
||||
static const uint8_t *buffer;
|
||||
static const uint8_t *buffer_misaligned;
|
||||
|
||||
class spamsumFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
public:
|
||||
spamsumFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@@ -37,18 +37,19 @@ class spamsumFixture : public ::testing::Test
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/random", path);
|
||||
|
||||
FILE* file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t*)malloc(1048576);
|
||||
fread((void*)buffer, 1, 1048576, file);
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(1048576);
|
||||
fread((void *)buffer, 1, 1048576, file);
|
||||
fclose(file);
|
||||
|
||||
buffer_misaligned = (const uint8_t*)malloc(1048577);
|
||||
memcpy((void*)(buffer_misaligned + 1), buffer, 1048576);
|
||||
buffer_misaligned = (const uint8_t *)malloc(1048577);
|
||||
memcpy((void *)(buffer_misaligned + 1), buffer, 1048576);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
free((void*)buffer);
|
||||
free((void*)buffer_misaligned);
|
||||
void TearDown()
|
||||
{
|
||||
free((void *)buffer);
|
||||
free((void *)buffer_misaligned);
|
||||
}
|
||||
|
||||
~spamsumFixture()
|
||||
@@ -61,96 +62,96 @@ class spamsumFixture : public ::testing::Test
|
||||
|
||||
TEST_F(spamsumFixture, spamsum_auto)
|
||||
{
|
||||
spamsum_ctx* ctx = spamsum_init();
|
||||
const char* spamsum = (const char*)malloc(FUZZY_MAX_RESULT);
|
||||
spamsum_ctx *ctx = spamsum_init();
|
||||
const char *spamsum = (const char *)malloc(FUZZY_MAX_RESULT);
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
EXPECT_NE(spamsum, nullptr);
|
||||
|
||||
spamsum_update(ctx, buffer, 1048576);
|
||||
spamsum_final(ctx, (uint8_t*)spamsum);
|
||||
spamsum_final(ctx, (uint8_t *)spamsum);
|
||||
|
||||
EXPECT_STREQ(spamsum, EXPECTED_SPAMSUM);
|
||||
|
||||
free((void*)spamsum);
|
||||
free((void *)spamsum);
|
||||
}
|
||||
|
||||
TEST_F(spamsumFixture, spamsum_auto_misaligned)
|
||||
{
|
||||
spamsum_ctx* ctx = spamsum_init();
|
||||
const char* spamsum = (const char*)malloc(FUZZY_MAX_RESULT);
|
||||
spamsum_ctx *ctx = spamsum_init();
|
||||
const char *spamsum = (const char *)malloc(FUZZY_MAX_RESULT);
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
EXPECT_NE(spamsum, nullptr);
|
||||
|
||||
spamsum_update(ctx, buffer_misaligned+1, 1048576);
|
||||
spamsum_final(ctx, (uint8_t*)spamsum);
|
||||
spamsum_update(ctx, buffer_misaligned + 1, 1048576);
|
||||
spamsum_final(ctx, (uint8_t *)spamsum);
|
||||
|
||||
EXPECT_STREQ(spamsum, EXPECTED_SPAMSUM);
|
||||
|
||||
free((void*)spamsum);
|
||||
free((void *)spamsum);
|
||||
}
|
||||
|
||||
TEST_F(spamsumFixture, spamsum_auto_15bytes)
|
||||
{
|
||||
spamsum_ctx* ctx = spamsum_init();
|
||||
const char* spamsum = (const char*)malloc(FUZZY_MAX_RESULT);
|
||||
spamsum_ctx *ctx = spamsum_init();
|
||||
const char *spamsum = (const char *)malloc(FUZZY_MAX_RESULT);
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
EXPECT_NE(spamsum, nullptr);
|
||||
|
||||
spamsum_update(ctx, buffer, 15);
|
||||
spamsum_final(ctx, (uint8_t*)spamsum);
|
||||
spamsum_final(ctx, (uint8_t *)spamsum);
|
||||
|
||||
EXPECT_STREQ(spamsum, EXPECTED_SPAMSUM_15BYTES);
|
||||
|
||||
free((void*)spamsum);
|
||||
free((void *)spamsum);
|
||||
}
|
||||
|
||||
TEST_F(spamsumFixture, spamsum_auto_31bytes)
|
||||
{
|
||||
spamsum_ctx* ctx = spamsum_init();
|
||||
const char* spamsum = (const char*)malloc(FUZZY_MAX_RESULT);
|
||||
spamsum_ctx *ctx = spamsum_init();
|
||||
const char *spamsum = (const char *)malloc(FUZZY_MAX_RESULT);
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
EXPECT_NE(spamsum, nullptr);
|
||||
|
||||
spamsum_update(ctx, buffer, 31);
|
||||
spamsum_final(ctx, (uint8_t*)spamsum);
|
||||
spamsum_final(ctx, (uint8_t *)spamsum);
|
||||
|
||||
EXPECT_STREQ(spamsum, EXPECTED_SPAMSUM_31BYTES);
|
||||
|
||||
free((void*)spamsum);
|
||||
free((void *)spamsum);
|
||||
}
|
||||
|
||||
TEST_F(spamsumFixture, spamsum_auto_63bytes)
|
||||
{
|
||||
spamsum_ctx* ctx = spamsum_init();
|
||||
const char* spamsum = (const char*)malloc(FUZZY_MAX_RESULT);
|
||||
spamsum_ctx *ctx = spamsum_init();
|
||||
const char *spamsum = (const char *)malloc(FUZZY_MAX_RESULT);
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
EXPECT_NE(spamsum, nullptr);
|
||||
|
||||
spamsum_update(ctx, buffer, 63);
|
||||
spamsum_final(ctx, (uint8_t*)spamsum);
|
||||
spamsum_final(ctx, (uint8_t *)spamsum);
|
||||
|
||||
EXPECT_STREQ(spamsum, EXPECTED_SPAMSUM_63BYTES);
|
||||
|
||||
free((void*)spamsum);
|
||||
free((void *)spamsum);
|
||||
}
|
||||
|
||||
TEST_F(spamsumFixture, spamsum_auto_2352bytes)
|
||||
{
|
||||
spamsum_ctx* ctx = spamsum_init();
|
||||
const char* spamsum = (const char*)malloc(FUZZY_MAX_RESULT);
|
||||
spamsum_ctx *ctx = spamsum_init();
|
||||
const char *spamsum = (const char *)malloc(FUZZY_MAX_RESULT);
|
||||
|
||||
EXPECT_NE(ctx, nullptr);
|
||||
EXPECT_NE(spamsum, nullptr);
|
||||
|
||||
spamsum_update(ctx, buffer, 2352);
|
||||
spamsum_final(ctx, (uint8_t*)spamsum);
|
||||
spamsum_final(ctx, (uint8_t *)spamsum);
|
||||
|
||||
EXPECT_STREQ(spamsum, EXPECTED_SPAMSUM_2352BYTES);
|
||||
|
||||
free((void*)spamsum);
|
||||
free((void *)spamsum);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user