From 58f97a93f38d78cdcff989690a33f43130458494 Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Wed, 13 Oct 2021 03:46:47 +0100 Subject: [PATCH] Refactor code. --- adler32.h | 2 +- adler32_neon.c | 34 ++++++++++++++++----------------- crc32.h | 4 ++-- crc32_vmull.c | 4 ++-- crc64_clmul.c | 4 ++-- crc64_vmull.c | 20 ++++++++++---------- library.h | 32 +++++++++++++++---------------- simd.c | 51 ++++++++++++++++++++++++-------------------------- simd.h | 32 +++++++++++++++---------------- spamsum.c | 26 ++++++++++++------------- 10 files changed, 103 insertions(+), 106 deletions(-) diff --git a/adler32.h b/adler32.h index 4d5bb1a..cc787c5 100644 --- a/adler32.h +++ b/adler32.h @@ -39,7 +39,7 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len); -AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len); +AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len); #endif diff --git a/adler32_neon.c b/adler32_neon.c index 4c4d069..58722a2 100644 --- a/adler32_neon.c +++ b/adler32_neon.c @@ -77,8 +77,8 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t uint32x4_t v_s2 = {.n128_u32 = {0, 0, 0, s1 * n}}; uint32x4_t v_s1 = {.n128_u32 = {0, 0, 0, 0}}; #else - uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n}; - uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0}; + uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n}; + uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0}; #endif uint16x8_t v_column_sum_1 = vdupq_n_u16(0); uint16x8_t v_column_sum_2 = vdupq_n_u16(0); @@ -112,23 +112,23 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t * Multiply-add bytes by [ 32, 31, 30, ... ] for s2. */ #ifdef _WIN32 - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]) {32, 31, 30, 29})); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){32, 31, 30, 29})); v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){28, 27, 26, 25})); - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {24, 23, 22, 21})); - v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {20, 19, 18, 17})); - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {16, 15, 14, 13})); - v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {12, 11, 10, 9})); - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {8, 7, 6, 5})); - v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {4, 3, 2, 1})); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){24, 23, 22, 21})); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){20, 19, 18, 17})); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){16, 15, 14, 13})); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){12, 11, 10, 9})); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){8, 7, 6, 5})); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){4, 3, 2, 1})); #else - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t){32, 31, 30, 29}); - v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t){28, 27, 26, 25}); - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t){24, 23, 22, 21}); - v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t){20, 19, 18, 17}); - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t){16, 15, 14, 13}); - v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t){12, 11, 10, 9}); - v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t){8, 7, 6, 5}); - v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t){4, 3, 2, 1}); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t){32, 31, 30, 29}); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t){28, 27, 26, 25}); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t){24, 23, 22, 21}); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t){20, 19, 18, 17}); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t){16, 15, 14, 13}); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t){12, 11, 10, 9}); + v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t){8, 7, 6, 5}); + v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t){4, 3, 2, 1}); #endif /* * Sum epi32 ints v_s1(s2) and accumulate in s1(s2). diff --git a/crc32.h b/crc32.h index 88770d4..e43986b 100644 --- a/crc32.h +++ b/crc32.h @@ -275,9 +275,9 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) #if __ARM_ARCH >= 8 -AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t previous_crc, +AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t previous_crc, const uint8_t* data, - uint32_t len); + uint32_t len); #endif AARU_EXPORT TARGET_WITH_SIMD uint32_t AARU_CALL crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len); #endif diff --git a/crc32_vmull.c b/crc32_vmull.c index 7e8d974..835f32c 100644 --- a/crc32_vmull.c +++ b/crc32_vmull.c @@ -26,13 +26,13 @@ #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) #include -#include #include +#include #include "library.h" +#include "arm_vmull.h" #include "crc32.h" #include "crc32_simd.h" -#include "arm_vmull.h" /* * somewhat surprisingly the "naive" way of doing this, ie. with a flag and a cond. branch, diff --git a/crc64_clmul.c b/crc64_clmul.c index 5c26e79..329ffde 100644 --- a/crc64_clmul.c +++ b/crc64_clmul.c @@ -114,7 +114,7 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da const __m128i P = _mm_xor_si128(A, crc0); R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10), - _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8))); + _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8))); } else if(alignedLength == 2) { @@ -132,7 +132,7 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da const __m128i P = _mm_xor_si128(_mm_xor_si128(B, C), crc0); R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10), - _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8))); + _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8))); } else { diff --git a/crc64_vmull.c b/crc64_vmull.c index d8c6f83..3bc7885 100644 --- a/crc64_vmull.c +++ b/crc64_vmull.c @@ -1,16 +1,16 @@ /* -* This file is part of the Aaru Data Preservation Suite. -* Copyright (c) 2019-2021 Natalia Portillo. -* -* This file is under the public domain: -* https://github.com/rawrunprotected/crc -*/ + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2021 Natalia Portillo. + * + * This file is under the public domain: + * https://github.com/rawrunprotected/crc + */ #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) #include -#include #include +#include #include "library.h" #include "arm_vmull.h" @@ -26,7 +26,7 @@ TARGET_WITH_SIMD FORCE_INLINE void shiftRight128(uint64x2_t in, size_t n, uint64 const uint64x2_t maskA = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(const uint64x2_t*)(shuffleMasks + (16 - n)))); uint64x2_t b = vreinterpretq_u64_u8(vceqq_u8(vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0))), - vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0))))); + vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0))))); const uint64x2_t maskB = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(maskA), vreinterpretq_u32_u64(b))); *outLeft = mm_shuffle_epi8(in, maskB); @@ -84,7 +84,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr const uint64x2_t P = veorq_u64(A, crc0); R = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)), - veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8))); + veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8))); } else if(alignedLength == 2) { @@ -102,7 +102,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr const uint64x2_t P = veorq_u64(veorq_u64(B, C), crc0); R = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)), - veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8))); + veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8))); } else { diff --git a/library.h b/library.h index ee61c87..d77a770 100644 --- a/library.h +++ b/library.h @@ -1,20 +1,20 @@ /* -* This file is part of the Aaru Data Preservation Suite. -* Copyright (c) 2019-2021 Natalia Portillo. -* -* This library is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as -* published by the Free Software Foundation; either version 2.1 of the -* License, or (at your option) any later version. -* -* This library is distributed in the hope that it will be useful, but -* WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -* Lesser General Public License for more details. -* -* You should have received a copy of the GNU Lesser General Public -* License along with this library; if not, see . -*/ + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2021 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ #ifndef AARU_CHECKSUMS_NATIVE_LIBRARY_H #define AARU_CHECKSUMS_NATIVE_LIBRARY_H diff --git a/simd.c b/simd.c index 1db6390..e550369 100644 --- a/simd.c +++ b/simd.c @@ -1,20 +1,20 @@ /* -* This file is part of the Aaru Data Preservation Suite. -* Copyright (c) 2019-2021 Natalia Portillo. -* -* This library is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as -* published by the Free Software Foundation; either version 2.1 of the -* License, or (at your option) any later version. -* -* This library is distributed in the hope that it will be useful, but -* WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -* Lesser General Public License for more details. -* -* You should have received a copy of the GNU Lesser General Public -* License along with this library; if not, see . -*/ + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2021 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ #include "library.h" #include "simd.h" @@ -112,6 +112,7 @@ int have_avx2(void) #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) #if defined(_WIN32) #include + #include #elif defined(__APPLE__) #include @@ -120,29 +121,25 @@ int have_avx2(void) #endif #endif -#if (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__) +#if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__) int have_neon_apple() { - int value; + int value; size_t len = sizeof(int); - int ret= - sysctlbyname("hw.optional.neon", &value, &len, NULL, 0); + int ret = sysctlbyname("hw.optional.neon", &value, &len, NULL, 0); - if(ret != 0) - return 0; + if(ret != 0) return 0; return value == 1; } int have_crc32_apple() { - int value; + int value; size_t len = sizeof(int); - int ret= - sysctlbyname("hw.optional.crc32", &value, &len, NULL, 0); + int ret = sysctlbyname("hw.optional.crc32", &value, &len, NULL, 0); - if(ret != 0) - return 0; + if(ret != 0) return 0; return value == 1; } diff --git a/simd.h b/simd.h index 5bb4cf3..1f20a33 100644 --- a/simd.h +++ b/simd.h @@ -1,20 +1,20 @@ /* -* This file is part of the Aaru Data Preservation Suite. -* Copyright (c) 2019-2021 Natalia Portillo. -* -* This library is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as -* published by the Free Software Foundation; either version 2.1 of the -* License, or (at your option) any later version. -* -* This library is distributed in the hope that it will be useful, but -* WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -* Lesser General Public License for more details. -* -* You should have received a copy of the GNU Lesser General Public -* License along with this library; if not, see . -*/ + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2021 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ #ifndef AARU_CHECKSUMS_NATIVE_SIMD_H #define AARU_CHECKSUMS_NATIVE_SIMD_H diff --git a/spamsum.c b/spamsum.c index 419a08b..f3cfa7b 100644 --- a/spamsum.c +++ b/spamsum.c @@ -141,7 +141,7 @@ AARU_LOCAL inline void roll_hash(spamsum_ctx* ctx, uint8_t c) AARU_LOCAL inline void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx) { - //assert(ctx->bh_start < ctx->bh_end); + // assert(ctx->bh_start < ctx->bh_end); if(ctx->bh_end - ctx->bh_start < 2) /* Need at least two working hashes. */ return; @@ -163,7 +163,7 @@ AARU_LOCAL inline void fuzzy_try_fork_blockhash(spamsum_ctx* ctx) { if(ctx->bh_end >= NUM_BLOCKHASHES) return; - //assert(ctx->bh_end != 0); + // assert(ctx->bh_end != 0); uint32_t obh = ctx->bh_end - 1; uint32_t nbh = ctx->bh_end; @@ -184,7 +184,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) if(!result) return -1; /* Verify that our elimination was not overeager. */ - //assert(bi == 0 || (uint64_t)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH < ctx->total_size); + // assert(bi == 0 || (uint64_t)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH < ctx->total_size); /* Initial blocksize guess. */ while((uint64_t)SSDEEP_BS(bi) * SPAMSUM_LENGTH < ctx->total_size) @@ -203,21 +203,21 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) while(bi > ctx->bh_start && ctx->bh[bi].d_len < SPAMSUM_LENGTH / 2) --bi; - //assert(!(bi > 0 && ctx->bh[bi].d_len < SPAMSUM_LENGTH / 2)); + // assert(!(bi > 0 && ctx->bh[bi].d_len < SPAMSUM_LENGTH / 2)); int i = snprintf((char*)result, (size_t)remain, "%lu:", (unsigned long)SSDEEP_BS(bi)); if(i <= 0) /* Maybe snprintf has set errno here? */ return -1; - //assert(i < remain); + // assert(i < remain); remain -= i; result += i; i = (int)ctx->bh[bi].d_len; - //assert(i <= remain); + // assert(i <= remain); memcpy(result, ctx->bh[bi].digest, (size_t)i); result += i; @@ -225,7 +225,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) if(h != 0) { - //assert(remain > 0); + // assert(remain > 0); *result = b64[ctx->bh[bi].h % 64]; @@ -237,7 +237,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) } else if(ctx->bh[bi].digest[i] != 0) { - //assert(remain > 0); + // assert(remain > 0); *result = ctx->bh[bi].digest[i]; @@ -248,7 +248,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) } } - //assert(remain > 0); + // assert(remain > 0); *result++ = ':'; --remain; @@ -267,7 +267,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) if(h != 0) { - //assert(remain > 0); + // assert(remain > 0); h = ctx->bh[bi].half_h; *result = b64[h % 64]; @@ -284,7 +284,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) if(i != 0) { - //assert(remain > 0); + // assert(remain > 0); *result = (uint8_t)i; @@ -298,9 +298,9 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result) } else if(h != 0) { - //assert(ctx->bh[bi].d_len == 0); + // assert(ctx->bh[bi].d_len == 0); - //assert(remain > 0); + // assert(remain > 0); *result++ = b64[ctx->bh[bi].h % 64]; /* No need to bother with FUZZY_FLAG_ELIMSEQ, because this