General refactor and cleanup.

This commit is contained in:
2024-04-30 15:12:48 +01:00
parent 552aa9da02
commit bd5051ce18
48 changed files with 1157 additions and 1290 deletions

View File

@@ -4,7 +4,7 @@
"name": "x64-Debug",
"generator": "Ninja",
"configurationType": "Debug",
"inheritEnvironments": [ "msvc_x64_x64" ],
"inheritEnvironments": ["msvc_x64_x64"],
"buildRoot": "${projectDir}\\out\\build\\${name}",
"installRoot": "${projectDir}\\out\\install\\${name}",
"cmakeCommandArgs": "",
@@ -20,7 +20,7 @@
"cmakeCommandArgs": "",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_arm_x64" ],
"inheritEnvironments": ["msvc_arm_x64"],
"variables": []
},
{
@@ -32,7 +32,7 @@
"cmakeCommandArgs": "",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_arm64_x64" ],
"inheritEnvironments": ["msvc_arm64_x64"],
"variables": []
},
{
@@ -44,7 +44,7 @@
"cmakeCommandArgs": "",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_x86_x64" ],
"inheritEnvironments": ["msvc_x86_x64"],
"variables": []
}
]

View File

@@ -6,9 +6,11 @@ The purpose of this library is to provide checksums and hashing algorithms for A
No archiver processing code should fall here, those go in [Aaru.Checksums](https://github.com/aaru-dps/Aaru.Checksums).
To build you just need Docker on Linux and run `build.sh`, that will generate a NuGet package for use with Aaru.Checksums.
To build you just need Docker on Linux and run `build.sh`, that will generate a NuGet package for use with
Aaru.Checksums.
Currently implemented algorithms are:
- Adler-32
- CRC-16 (CCITT and IBM polynomials)
- CRC-32 (ISO polynomial)

View File

@@ -147,8 +147,7 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
{
len -= NMAX;
n = NMAX / 16; /* NMAX is divisible by 16 */
do
{
do {
s1 += data[0];
s2 += s1;
s1 += data[0 + 1];
@@ -184,8 +183,7 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
/* 16 sums unrolled */
data += 16;
}
while(--n);
} while(--n);
s1 %= ADLER_MODULE;
s2 %= ADLER_MODULE;
}

View File

@@ -38,8 +38,8 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL
adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len);
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data,
long len);
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL adler32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len);
#endif

View File

@@ -63,38 +63,8 @@ AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL adler32_avx2(uint16_t *sum1, uint16_
if(n > blocks) n = (unsigned)blocks;
blocks -= n;
const __m256i tap = _mm256_set_epi8(1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32);
const __m256i tap = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
const __m256i zero = _mm256_setzero_si256();
const __m256i ones = _mm256_set1_epi16(1);
@@ -105,8 +75,7 @@ AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL adler32_avx2(uint16_t *sum1, uint16_
__m256i v_ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (s1 * n));
__m256i v_s2 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s2);
__m256i v_s1 = _mm256_setzero_si256();
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -125,8 +94,7 @@ AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL adler32_avx2(uint16_t *sum1, uint16_
v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(mad, ones));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
__m128i sum = _mm_add_epi32(_mm256_castsi256_si128(v_s1), _mm256_extracti128_si256(v_s1, 1));
__m128i hi = _mm_unpackhi_epi64(sum, sum);
@@ -176,8 +144,7 @@ AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL adler32_avx2(uint16_t *sum1, uint16_
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE;
}

View File

@@ -88,8 +88,12 @@ TARGET_WITH_NEON void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
* processed before s2 must be reduced modulo ADLER_MODULE.
*/
#ifdef _MSC_VER
uint32x4_t v_s2 = {.n128_u32 = {0, 0, 0, s1 * n}};
uint32x4_t v_s1 = {.n128_u32 = {0, 0, 0, 0}};
uint32x4_t v_s2 = {
.n128_u32 = {0, 0, 0, s1 * n}
};
uint32x4_t v_s1 = {
.n128_u32 = {0, 0, 0, 0}
};
#else
uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n};
uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0};
@@ -98,8 +102,7 @@ TARGET_WITH_NEON void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
uint16x8_t v_column_sum_3 = vdupq_n_u16(0);
uint16x8_t v_column_sum_4 = vdupq_n_u16(0);
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -121,8 +124,7 @@ TARGET_WITH_NEON void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2));
v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
v_s2 = vshlq_n_u32(v_s2, 5);
/*
* Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
@@ -198,8 +200,7 @@ TARGET_WITH_NEON void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE;
}

View File

@@ -39,7 +39,6 @@
#include "library.h"
#include "adler32.h"
/**
* @brief Calculate Adler-32 checksum for a given data using SSSE3 instructions.
*
@@ -50,8 +49,8 @@
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL
adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data,
long len)
{
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;
@@ -80,8 +79,7 @@ adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
__m128i v_ps = _mm_set_epi32(0, 0, 0, s1 * n);
__m128i v_s2 = _mm_set_epi32(0, 0, 0, s2);
__m128i v_s1 = _mm_set_epi32(0, 0, 0, 0);
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -102,8 +100,7 @@ adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
const __m128i mad2 = _mm_maddubs_epi16(bytes2, tap2);
v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(mad2, ones));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5));
/*
* Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
@@ -151,8 +148,7 @@ adler32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE;
}

View File

@@ -176,7 +176,8 @@ const uint16_t crc16_table[8][256] = {
0x858C, 0x494D, 0xAF0C, 0x63CD, 0x768D, 0xBA4C, 0xFA0C, 0x36CD, 0x238D, 0xEF4C, 0x090D, 0xC5CC, 0xD08C, 0x1C4D,
0x480E, 0x84CF, 0x918F, 0x5D4E, 0xBB0F, 0x77CE, 0x628E, 0xAE4F, 0xEE0F, 0x22CE, 0x378E, 0xFB4F, 0x1D0E, 0xD1CF,
0xC48F, 0x084E, 0x440F, 0x88CE, 0x9D8E, 0x514F, 0xB70E, 0x7BCF, 0x6E8F, 0xA24E, 0xE20E, 0x2ECF, 0x3B8F, 0xF74E,
0x110F, 0xDDCE, 0xC88E, 0x044F}};
0x110F, 0xDDCE, 0xC88E, 0x044F}
};
AARU_EXPORT crc16_ctx *AARU_CALL crc16_init();
AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx *ctx, const uint8_t *data, uint32_t len);

View File

@@ -176,7 +176,8 @@ const uint16_t crc16_ccitt_table[8][256] = {
0xB943, 0xFE90, 0x3988, 0x7E5B, 0xB62E, 0xF1FD, 0x283F, 0x6FEC, 0xA799, 0xE04A, 0x2752, 0x6081, 0xA8F4, 0xEF27,
0x7039, 0x37EA, 0xFF9F, 0xB84C, 0x7F54, 0x3887, 0xF0F2, 0xB721, 0x6EE3, 0x2930, 0xE145, 0xA696, 0x618E, 0x265D,
0xEE28, 0xA9FB, 0x4D8D, 0x0A5E, 0xC22B, 0x85F8, 0x42E0, 0x0533, 0xCD46, 0x8A95, 0x5357, 0x1484, 0xDCF1, 0x9B22,
0x5C3A, 0x1BE9, 0xD39C, 0x944F}};
0x5C3A, 0x1BE9, 0xD39C, 0x944F}
};
AARU_EXPORT crc16_ccitt_ctx *AARU_CALL crc16_ccitt_init();
AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx *ctx, const uint8_t *data, uint32_t len);

17
crc32.h
View File

@@ -262,24 +262,23 @@ static const uint32_t crc32_table[8][256] = {
#define CRC32_ISO_POLY 0xEDB88320
#define CRC32_ISO_SEED 0xFFFFFFFF
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init();
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc);
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx);
AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t* data, long len);
AARU_EXPORT crc32_ctx *AARU_CALL crc32_init();
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx *ctx, const uint8_t *data, uint32_t len);
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx *ctx, uint32_t *crc);
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx *ctx);
AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t *previous_crc, const uint8_t *data, long len);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
AARU_EXPORT TARGET_WITH_CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len);
AARU_EXPORT TARGET_WITH_CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t *data, long len);
#endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
#if __ARM_ARCH >= 7
AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t previous_crc,
const uint8_t* data,
AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t previous_crc, const uint8_t *data,
uint32_t len);
#endif
AARU_EXPORT TARGET_WITH_NEON uint32_t AARU_CALL crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len);
AARU_EXPORT TARGET_WITH_NEON uint32_t AARU_CALL crc32_vmull(uint32_t previous_crc, const uint8_t *data, long len);
#endif
#endif // AARU_CHECKSUMS_NATIVE_CRC32_H

View File

@@ -110,8 +110,7 @@ TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const u
data = (const uint8_t *)buf4;
#endif
while(len--)
{ c = __crc32b(c, *data++); }
while(len--) { c = __crc32b(c, *data++); }
return c;
}

View File

@@ -166,12 +166,8 @@ TARGET_WITH_CLMUL static void fold_4(__m128i *xmm_crc0, __m128i *xmm_crc1, __m12
*xmm_crc3 = _mm_castps_si128(ps_res3);
}
TARGET_WITH_CLMUL static void partial_fold(const size_t len,
__m128i *xmm_crc0,
__m128i *xmm_crc1,
__m128i *xmm_crc2,
__m128i *xmm_crc3,
__m128i *xmm_crc_part)
TARGET_WITH_CLMUL static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2,
__m128i *xmm_crc3, __m128i *xmm_crc_part)
{
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080);

View File

@@ -38,18 +38,12 @@ static const unsigned ALIGNED_(32) pshufb_shf_table[60] = {
};
static const uint32_t ALIGNED_(16) crc_k[] = {
0xccaa009e,
0x00000000, /* rk1 */
0x751997d0,
0x00000001, /* rk2 */
0xccaa009e,
0x00000000, /* rk5 */
0x63cd6124,
0x00000001, /* rk6 */
0xf7011640,
0x00000001, /* rk7 */
0xdb710640,
0x00000001 /* rk8 */
0xccaa009e, 0x00000000, /* rk1 */
0x751997d0, 0x00000001, /* rk2 */
0xccaa009e, 0x00000000, /* rk5 */
0x63cd6124, 0x00000001, /* rk6 */
0xf7011640, 0x00000001, /* rk7 */
0xdb710640, 0x00000001 /* rk8 */
};
static const unsigned ALIGNED_(16) crc_mask[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000};

View File

@@ -43,8 +43,8 @@
#define XOR_INITIAL(where) \
ONCE(where = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(where), vreinterpretq_u32_u64(q_initial))))
TARGET_WITH_NEON FORCE_INLINE void
fold_1(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
TARGET_WITH_NEON FORCE_INLINE void fold_1(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2,
uint64x2_t *q_crc3)
{
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
@@ -67,8 +67,8 @@ fold_1(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q
*q_crc3 = vreinterpretq_u64_u32(ps_res);
}
TARGET_WITH_NEON FORCE_INLINE void
fold_2(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
TARGET_WITH_NEON FORCE_INLINE void fold_2(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2,
uint64x2_t *q_crc3)
{
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
@@ -99,8 +99,8 @@ fold_2(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q
*q_crc3 = vreinterpretq_u64_u32(ps_res31);
}
TARGET_WITH_NEON FORCE_INLINE void
fold_3(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
TARGET_WITH_NEON FORCE_INLINE void fold_3(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2,
uint64x2_t *q_crc3)
{
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
@@ -137,8 +137,8 @@ fold_3(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q
*q_crc3 = vreinterpretq_u64_u32(ps_res32);
}
TARGET_WITH_NEON FORCE_INLINE void
fold_4(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q_crc3)
TARGET_WITH_NEON FORCE_INLINE void fold_4(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2,
uint64x2_t *q_crc3)
{
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));
@@ -184,12 +184,8 @@ fold_4(uint64x2_t *q_crc0, uint64x2_t *q_crc1, uint64x2_t *q_crc2, uint64x2_t *q
*q_crc3 = vreinterpretq_u64_u32(ps_res3);
}
TARGET_WITH_NEON FORCE_INLINE void partial_fold(const size_t len,
uint64x2_t *q_crc0,
uint64x2_t *q_crc1,
uint64x2_t *q_crc2,
uint64x2_t *q_crc3,
uint64x2_t *q_crc_part)
TARGET_WITH_NEON FORCE_INLINE void partial_fold(const size_t len, uint64x2_t *q_crc0, uint64x2_t *q_crc1,
uint64x2_t *q_crc2, uint64x2_t *q_crc3, uint64x2_t *q_crc_part)
{
uint32_t ALIGNED_(16) data[4] = {0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001};
const uint64x2_t q_fold4 = vreinterpretq_u64_u32(vld1q_u32(data));

View File

@@ -232,7 +232,8 @@ const static uint64_t crc64_table[4][256] = {
0xA0A13C6791602FF9, 0xBD4FB639B34C8E25, 0x9B7C28DBD5396C41, 0x8692A285F715CD9D, 0xD71B151F19D2A889,
0xCAF59F413BFE0955, 0xECC601A35D8BEB31, 0xF1288BFD7FA74AED, 0x4FD56E9680052119, 0x523BE4C8A22980C5,
0x74087A2AC45C62A1, 0x69E6F074E670C37D, 0x386F47EE08B7A669, 0x2581CDB02A9B07B5, 0x03B253524CEEE5D1,
0x1E5CD90C6EC2440D}};
0x1E5CD90C6EC2440D}
};
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF

View File

@@ -179,8 +179,7 @@ AARU_EXPORT TARGET_WITH_CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const
}
__m128i P;
if(length == 16)
{ P = _mm_xor_si128(accumulator, _mm_load_si128(alignedData)); }
if(length == 16) { P = _mm_xor_si128(accumulator, _mm_load_si128(alignedData)); }
else
{
const __m128i end0 = _mm_xor_si128(accumulator, _mm_load_si128(alignedData));
@@ -199,9 +198,7 @@ AARU_EXPORT TARGET_WITH_CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const
// Final Barrett reduction
const __m128i T1 = _mm_clmulepi64_si128(R, foldConstants2, 0x00);
const __m128i T2 =
_mm_xor_si128(
_mm_xor_si128(_mm_clmulepi64_si128(T1, foldConstants2, 0x10), _mm_slli_si128(T1, 8)),
R);
_mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(T1, foldConstants2, 0x10), _mm_slli_si128(T1, 8)), R);
#if defined(_WIN64)
return ~_mm_extract_epi64(T2, 1);

View File

@@ -24,29 +24,23 @@ static const uint8_t shuffleMasks[] = {
TARGET_WITH_NEON FORCE_INLINE void shiftRight128(uint64x2_t in, size_t n, uint64x2_t *outLeft, uint64x2_t *outRight)
{
const uint64x2_t maskA =
vreinterpretq_u64_u32(
vld1q_u32((const uint32_t *)(const uint64x2_t *)(shuffleMasks + (16 - n))));
vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)(const uint64x2_t *)(shuffleMasks + (16 - n))));
uint64x2_t b = vreinterpretq_u64_u8(vceqq_u8(vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0))),
vreinterpretq_u8_u64(
vreinterpretq_u64_u32(vdupq_n_u32(0)))));
vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0)))));
const uint64x2_t maskB = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(maskA), vreinterpretq_u32_u64(b)));
*outLeft = mm_shuffle_epi8(in, maskB);
*outRight = mm_shuffle_epi8(in, maskA);
}
TARGET_WITH_NEON FORCE_INLINE uint64x2_t
fold (uint64x2_t
in,
uint64x2_t foldConstants
)
TARGET_WITH_NEON FORCE_INLINE uint64x2_t fold(uint64x2_t in, uint64x2_t foldConstants)
{
return
return
veorq_u64(sse2neon_vmull_p64(vget_low_u64(in), vget_low_u64(foldConstants)),
veorq_u64(sse2neon_vmull_p64(vget_low_u64(in), vget_low_u64(foldConstants)),
sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants))
);
);
}
/**
@@ -85,16 +79,13 @@ AARU_EXPORT TARGET_WITH_NEON uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr
const size_t alignedLength = alignedEnd - alignedData;
const uint64x2_t leadInMask =
vreinterpretq_u64_u32(vld1q_u32(
(const uint32_t *)(const uint64x2_t *)(shuffleMasks + (16 - leadInSize))));
vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)(const uint64x2_t *)(shuffleMasks + (16 - leadInSize))));
uint64x2_t a = vreinterpretq_u64_u32(vdupq_n_u32(0));
uint64x2_t b = vreinterpretq_u64_u32(
vld1q_u32((const uint32_t *)alignedData)); // Use a signed shift right to create a mask with the sign bit
const uint64x2_t data0 =
vreinterpretq_u64_u8(
vbslq_u8(vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u64(leadInMask), 7)),
vreinterpretq_u8_u64(b),
vreinterpretq_u8_u64(a)));
vreinterpretq_u64_u8(vbslq_u8(vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u64(leadInMask), 7)),
vreinterpretq_u8_u64(b), vreinterpretq_u8_u64(a)));
const uint64x2_t initialCrc = vsetq_lane_u64(~previous_crc, vdupq_n_u64(0), 0);
@@ -165,12 +156,12 @@ AARU_EXPORT TARGET_WITH_NEON uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr
}
uint64x2_t P;
if(len == 16) P = veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)alignedData)));
if(len == 16)
P = veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)alignedData)));
else
{
const uint64x2_t end0 =
veorq_u64(accumulator,
vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)alignedData)));
veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)alignedData)));
const uint64x2_t end1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t *)(alignedData + 1)));
uint64x2_t A, B, C, D;

View File

@@ -129,8 +129,7 @@ AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx *ctx, const uint8_t *
{
len -= NMAX;
n = NMAX / 6; /* NMAX is divisible by 6 */
do
{
do {
sum1 += data[0];
sum2 += sum1;
sum1 += data[0 + 1];
@@ -146,8 +145,7 @@ AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx *ctx, const uint8_t *
/* 6 sums unrolled */
data += 6;
}
while(--n);
} while(--n);
sum1 %= FLETCHER16_MODULE;
sum2 %= FLETCHER16_MODULE;
}

View File

@@ -37,14 +37,17 @@ AARU_EXPORT void AARU_CALL fletcher16_free(fletcher16_ctx *ctx);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL fletcher16_avx2(uint8_t* sum1, uint8_t* sum2, const uint8_t* data, long len);
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL fletcher16_ssse3(uint8_t* sum1, uint8_t* sum2, const uint8_t* data, long len);
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL fletcher16_avx2(uint8_t *sum1, uint8_t *sum2, const uint8_t *data,
long len);
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL fletcher16_ssse3(uint8_t *sum1, uint8_t *sum2, const uint8_t *data,
long len);
#endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
AARU_EXPORT TARGET_WITH_NEON void AARU_CALL fletcher16_neon(uint8_t* sum1, uint8_t* sum2, const uint8_t* data, uint32_t len);
AARU_EXPORT TARGET_WITH_NEON void AARU_CALL fletcher16_neon(uint8_t *sum1, uint8_t *sum2, const uint8_t *data,
uint32_t len);
#endif

View File

@@ -42,8 +42,7 @@
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL
fletcher16_avx2(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL fletcher16_avx2(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
{
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;
@@ -64,38 +63,8 @@ fletcher16_avx2(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
if(n > blocks) n = (unsigned)blocks;
blocks -= n;
const __m256i tap = _mm256_set_epi8(1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32);
const __m256i tap = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
const __m256i zero = _mm256_setzero_si256();
const __m256i ones = _mm256_set1_epi16(1);
@@ -106,8 +75,7 @@ fletcher16_avx2(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
__m256i v_ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (s1 * n));
__m256i v_s2 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s2);
__m256i v_s1 = _mm256_setzero_si256();
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -126,8 +94,7 @@ fletcher16_avx2(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(mad, ones));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
__m128i sum = _mm_add_epi32(_mm256_castsi256_si128(v_s1), _mm256_extracti128_si256(v_s1, 1));
__m128i hi = _mm_unpackhi_epi64(sum, sum);
@@ -177,8 +144,7 @@ fletcher16_avx2(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
s1 %= FLETCHER16_MODULE;
s2 %= FLETCHER16_MODULE;
}

View File

@@ -88,8 +88,12 @@ TARGET_WITH_NEON void fletcher16_neon(uint8_t *sum1, uint8_t *sum2, const uint8_
* processed before s2 must be reduced modulo FLETCHER16_MODULE.
*/
#ifdef _MSC_VER
uint32x4_t v_s2 = {.n128_u32 = {0, 0, 0, s1 * n}};
uint32x4_t v_s1 = {.n128_u32 = {0, 0, 0, 0}};
uint32x4_t v_s2 = {
.n128_u32 = {0, 0, 0, s1 * n}
};
uint32x4_t v_s1 = {
.n128_u32 = {0, 0, 0, 0}
};
#else
uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n};
uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0};
@@ -98,8 +102,7 @@ TARGET_WITH_NEON void fletcher16_neon(uint8_t *sum1, uint8_t *sum2, const uint8_
uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
uint16x8_t v_column_sum_3 = vdupq_n_u16(0);
uint16x8_t v_column_sum_4 = vdupq_n_u16(0);
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -121,8 +124,7 @@ TARGET_WITH_NEON void fletcher16_neon(uint8_t *sum1, uint8_t *sum2, const uint8_
v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2));
v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
v_s2 = vshlq_n_u32(v_s2, 5);
/*
* Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
@@ -198,8 +200,7 @@ TARGET_WITH_NEON void fletcher16_neon(uint8_t *sum1, uint8_t *sum2, const uint8_
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
s1 %= FLETCHER16_MODULE;
s2 %= FLETCHER16_MODULE;
}

View File

@@ -49,8 +49,8 @@
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL
fletcher16_ssse3(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL fletcher16_ssse3(uint8_t *sum1, uint8_t *sum2, const uint8_t *data,
long len)
{
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;
@@ -79,8 +79,7 @@ fletcher16_ssse3(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
__m128i v_ps = _mm_set_epi32(0, 0, 0, s1 * n);
__m128i v_s2 = _mm_set_epi32(0, 0, 0, s2);
__m128i v_s1 = _mm_set_epi32(0, 0, 0, 0);
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -101,8 +100,7 @@ fletcher16_ssse3(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
const __m128i mad2 = _mm_maddubs_epi16(bytes2, tap2);
v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(mad2, ones));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5));
/*
* Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
@@ -150,8 +148,7 @@ fletcher16_ssse3(uint8_t *sum1, uint8_t *sum2, const uint8_t *data, long len)
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
s1 %= FLETCHER16_MODULE;
s2 %= FLETCHER16_MODULE;
}

View File

@@ -92,7 +92,6 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx *ctx, const uint8_t *
}
#endif
uint32_t sum1 = ctx->sum1;
uint32_t sum2 = ctx->sum2;
unsigned n;
@@ -130,8 +129,7 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx *ctx, const uint8_t *
{
len -= NMAX;
n = NMAX / 16; /* NMAX is divisible by 16 */
do
{
do {
sum1 += data[0];
sum2 += sum1;
sum1 += data[0 + 1];
@@ -167,8 +165,7 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx *ctx, const uint8_t *
/* 16 sums unrolled */
data += 16;
}
while(--n);
} while(--n);
sum1 %= FLETCHER32_MODULE;
sum2 %= FLETCHER32_MODULE;
}

View File

@@ -29,22 +29,24 @@ typedef struct
uint16_t sum2;
} fletcher32_ctx;
AARU_EXPORT fletcher32_ctx* AARU_CALL fletcher32_init();
AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx* ctx, uint32_t* checksum);
AARU_EXPORT void AARU_CALL fletcher32_free(fletcher32_ctx* ctx);
AARU_EXPORT fletcher32_ctx *AARU_CALL fletcher32_init();
AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx *ctx, const uint8_t *data, uint32_t len);
AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx *ctx, uint32_t *checksum);
AARU_EXPORT void AARU_CALL fletcher32_free(fletcher32_ctx *ctx);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL fletcher32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data,
long len);
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data,
long len);
#endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
AARU_EXPORT void AARU_CALL fletcher32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, uint32_t len);
AARU_EXPORT void AARU_CALL fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len);
#endif

View File

@@ -42,8 +42,8 @@
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL
fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
AARU_EXPORT TARGET_WITH_AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data,
long len)
{
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;
@@ -64,38 +64,8 @@ fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
if(n > blocks) n = (unsigned)blocks;
blocks -= n;
const __m256i tap = _mm256_set_epi8(1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32);
const __m256i tap = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
const __m256i zero = _mm256_setzero_si256();
const __m256i ones = _mm256_set1_epi16(1);
@@ -106,8 +76,7 @@ fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
__m256i v_ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (s1 * n));
__m256i v_s2 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s2);
__m256i v_s1 = _mm256_setzero_si256();
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -126,8 +95,7 @@ fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
v_s2 = _mm256_add_epi32(v_s2, _mm256_madd_epi16(mad, ones));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
__m128i sum = _mm_add_epi32(_mm256_castsi256_si128(v_s1), _mm256_extracti128_si256(v_s1, 1));
__m128i hi = _mm_unpackhi_epi64(sum, sum);
@@ -177,8 +145,7 @@ fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
if(s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
s2 %= FLETCHER32_MODULE;
}

View File

@@ -88,8 +88,12 @@ TARGET_WITH_NEON void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint
* processed before s2 must be reduced modulo FLETCHER32_MODULE.
*/
#ifdef _MSC_VER
uint32x4_t v_s2 = {.n128_u32 = {0, 0, 0, s1 * n}};
uint32x4_t v_s1 = {.n128_u32 = {0, 0, 0, 0}};
uint32x4_t v_s2 = {
.n128_u32 = {0, 0, 0, s1 * n}
};
uint32x4_t v_s1 = {
.n128_u32 = {0, 0, 0, 0}
};
#else
uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n};
uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0};
@@ -98,8 +102,7 @@ TARGET_WITH_NEON void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint
uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
uint16x8_t v_column_sum_3 = vdupq_n_u16(0);
uint16x8_t v_column_sum_4 = vdupq_n_u16(0);
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -121,8 +124,7 @@ TARGET_WITH_NEON void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint
v_column_sum_3 = vaddw_u8(v_column_sum_3, vget_low_u8(bytes2));
v_column_sum_4 = vaddw_u8(v_column_sum_4, vget_high_u8(bytes2));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
v_s2 = vshlq_n_u32(v_s2, 5);
/*
* Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
@@ -198,8 +200,7 @@ TARGET_WITH_NEON void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
if(s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
s2 %= FLETCHER32_MODULE;
}

View File

@@ -49,8 +49,8 @@
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL
fletcher32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
AARU_EXPORT TARGET_WITH_SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data,
long len)
{
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;
@@ -79,8 +79,7 @@ fletcher32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
__m128i v_ps = _mm_set_epi32(0, 0, 0, s1 * n);
__m128i v_s2 = _mm_set_epi32(0, 0, 0, s2);
__m128i v_s1 = _mm_set_epi32(0, 0, 0, 0);
do
{
do {
/*
* Load 32 input bytes.
*/
@@ -101,8 +100,7 @@ fletcher32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
const __m128i mad2 = _mm_maddubs_epi16(bytes2, tap2);
v_s2 = _mm_add_epi32(v_s2, _mm_madd_epi16(mad2, ones));
data += BLOCK_SIZE;
}
while(--n);
} while(--n);
v_s2 = _mm_add_epi32(v_s2, _mm_slli_epi32(v_ps, 5));
/*
* Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
@@ -150,8 +148,7 @@ fletcher32_ssse3(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
s2 += (s1 += *data++);
len -= 16;
}
while(len--)
{ s2 += (s1 += *data++); }
while(len--) { s2 += (s1 += *data++); }
if(s1 >= FLETCHER32_MODULE) s1 -= FLETCHER32_MODULE;
s2 %= FLETCHER32_MODULE;
}

View File

@@ -20,5 +20,4 @@
#include "library.h"
AARU_EXPORT uint64_t AARU_CALL get_acn_version()
{ return AARU_CHECKUMS_NATIVE_VERSION; }
AARU_EXPORT uint64_t AARU_CALL get_acn_version() { return AARU_CHECKUMS_NATIVE_VERSION; }

7
simd.c
View File

@@ -56,7 +56,7 @@
*
* @return None.
*/
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
static void cpuid(int info, unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
{
#ifdef _MSC_VER
unsigned int registers[4];
@@ -99,7 +99,7 @@ static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigne
*
* @return None.
*/
static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
static void cpuidex(int info, int count, unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
{
#ifdef _MSC_VER
unsigned int registers[4];
@@ -264,8 +264,7 @@ int have_crc32_apple()
*
* @return true if the current processor supports cryptographic instructions, false otherwise.
*/
int have_crypto_apple()
{ return 0; }
int have_crypto_apple() { return 0; }
#endif

2
simd.h
View File

@@ -92,7 +92,7 @@ AARU_EXPORT int have_arm_crypto(void);
#define TARGET_WITH_NEON
#else
#if (__ARM_ARCH >= 7 || defined (__ARM_ARCH_8A))
#if(__ARM_ARCH >= 7 || defined(__ARM_ARCH_8A))
#ifdef __clang__
#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc")))

View File

@@ -93,7 +93,7 @@ AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx *ctx)
}
#define ROLL_SUM(ctx) ((ctx)->roll.h1 + (ctx)->roll.h2 + (ctx)->roll.h3)
#define SUM_HASH(c, h) (((h)*HASH_PRIME) ^ (c));
#define SUM_HASH(c, h) (((h) * HASH_PRIME) ^ (c));
#define SSDEEP_BS(index) (MIN_BLOCKSIZE << (index))
FORCE_INLINE void fuzzy_engine_step(spamsum_ctx *ctx, uint8_t c)
@@ -297,7 +297,8 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx *ctx, uint8_t *result)
++bi;
i = (int)ctx->bh[bi].d_len;
if(i <= remain);
if(i <= remain)
;
memcpy(result, ctx->bh[bi].digest, (size_t)i);
result += i;

View File

@@ -1,6 +1,6 @@
if("${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW" OR "${AARU_BUILD_PACKAGE}" MATCHES 1)
if ("${CMAKE_C_PLATFORM_ID}" MATCHES "MinGW" OR "${AARU_BUILD_PACKAGE}" MATCHES 1)
return()
endif()
endif ()
# 'Google_test' is the subproject name
project(tests)

View File

@@ -91,7 +91,7 @@ TEST_F(fletcher16Fixture, fletcher16_auto_misaligned)
TEST_F(fletcher16Fixture, fletcher16_auto_1byte)
{
fletcher16_ctx* ctx = fletcher16_init();
fletcher16_ctx *ctx = fletcher16_init();
uint16_t fletcher;
EXPECT_NE(ctx, nullptr);
@@ -104,7 +104,7 @@ TEST_F(fletcher16Fixture, fletcher16_auto_1byte)
TEST_F(fletcher16Fixture, fletcher16_auto_5bytes)
{
fletcher16_ctx* ctx = fletcher16_init();
fletcher16_ctx *ctx = fletcher16_init();
uint16_t fletcher;
EXPECT_NE(ctx, nullptr);
@@ -117,7 +117,7 @@ TEST_F(fletcher16Fixture, fletcher16_auto_5bytes)
TEST_F(fletcher16Fixture, fletcher16_auto_7bytes)
{
fletcher16_ctx* ctx = fletcher16_init();
fletcher16_ctx *ctx = fletcher16_init();
uint16_t fletcher;
EXPECT_NE(ctx, nullptr);