Refactor code.

2025-12-16 11:14:29 +00:00 · 2021-10-13 03:46:47 +01:00
parent ca65d12c7e
commit 58f97a93f3
10 changed files with 103 additions and 106 deletions
--- a/adler32.h
+++ b/adler32.h
@@ -39,7 +39,7 @@ AARU_EXPORT void AARU_CALL         adler32_slicing(uint16_t* sum1, uint16_t* sum
    defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)

 AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
-AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
+AARU_EXPORT AVX2 void AARU_CALL  adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);

 #endif

--- a/adler32_neon.c
+++ b/adler32_neon.c
@@ -77,8 +77,8 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t
        uint32x4_t v_s2 = {.n128_u32 = {0, 0, 0, s1 * n}};
        uint32x4_t v_s1 = {.n128_u32 = {0, 0, 0, 0}};
 #else
-        uint32x4_t v_s2           = (uint32x4_t){0, 0, 0, s1 * n};
-        uint32x4_t v_s1           = (uint32x4_t){0, 0, 0, 0};
+        uint32x4_t v_s2 = (uint32x4_t){0, 0, 0, s1 * n};
+        uint32x4_t v_s1 = (uint32x4_t){0, 0, 0, 0};
 #endif
        uint16x8_t v_column_sum_1 = vdupq_n_u16(0);
        uint16x8_t v_column_sum_2 = vdupq_n_u16(0);
@@ -112,23 +112,23 @@ TARGET_WITH_SIMD void adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t
         * Multiply-add bytes by [ 32, 31, 30, ... ] for s2.
         */
 #ifdef _WIN32
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]) {32, 31, 30, 29}));
+        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){32, 31, 30, 29}));
        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), neon_ld1m_16((uint16_t[]){28, 27, 26, 25}));
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {24, 23, 22, 21}));
-        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]) {20, 19, 18, 17}));
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {16, 15, 14, 13}));
-        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]) {12, 11, 10, 9}));
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {8, 7, 6, 5}));
-        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]) {4, 3, 2, 1}));
+        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){24, 23, 22, 21}));
+        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), neon_ld1m_16((uint16_t[]){20, 19, 18, 17}));
+        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){16, 15, 14, 13}));
+        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), neon_ld1m_16((uint16_t[]){12, 11, 10, 9}));
+        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){8, 7, 6, 5}));
+        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), neon_ld1m_16((uint16_t[]){4, 3, 2, 1}));
 #else
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t){32, 31, 30, 29});
-        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t){28, 27, 26, 25});
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t){24, 23, 22, 21});
-        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t){20, 19, 18, 17});
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t){16, 15, 14, 13});
-        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t){12, 11, 10, 9});
-        v_s2 = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t){8, 7, 6, 5});
-        v_s2 = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t){4, 3, 2, 1});
+        v_s2            = vmlal_u16(v_s2, vget_low_u16(v_column_sum_1), (uint16x4_t){32, 31, 30, 29});
+        v_s2            = vmlal_u16(v_s2, vget_high_u16(v_column_sum_1), (uint16x4_t){28, 27, 26, 25});
+        v_s2            = vmlal_u16(v_s2, vget_low_u16(v_column_sum_2), (uint16x4_t){24, 23, 22, 21});
+        v_s2            = vmlal_u16(v_s2, vget_high_u16(v_column_sum_2), (uint16x4_t){20, 19, 18, 17});
+        v_s2            = vmlal_u16(v_s2, vget_low_u16(v_column_sum_3), (uint16x4_t){16, 15, 14, 13});
+        v_s2            = vmlal_u16(v_s2, vget_high_u16(v_column_sum_3), (uint16x4_t){12, 11, 10, 9});
+        v_s2            = vmlal_u16(v_s2, vget_low_u16(v_column_sum_4), (uint16x4_t){8, 7, 6, 5});
+        v_s2            = vmlal_u16(v_s2, vget_high_u16(v_column_sum_4), (uint16x4_t){4, 3, 2, 1});
 #endif
        /*
         * Sum epi32 ints v_s1(s2) and accumulate in s1(s2).
--- a/crc32.h
+++ b/crc32.h
@@ -275,9 +275,9 @@ AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const ui

 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
 #if __ARM_ARCH >= 8
-AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t             previous_crc,
+AARU_EXPORT TARGET_ARMV8_WITH_CRC uint32_t AARU_CALL armv8_crc32_little(uint32_t       previous_crc,
                                                                        const uint8_t* data,
-                                                                        uint32_t             len);
+                                                                        uint32_t       len);
 #endif
 AARU_EXPORT TARGET_WITH_SIMD uint32_t AARU_CALL crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len);
 #endif
--- a/crc32_vmull.c
+++ b/crc32_vmull.c
@@ -26,13 +26,13 @@
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)

 #include <arm_neon.h>
-#include <stdint.h>
 #include <stddef.h>
+#include <stdint.h>

 #include "library.h"
+#include "arm_vmull.h"
 #include "crc32.h"
 #include "crc32_simd.h"
-#include "arm_vmull.h"

 /*
 * somewhat surprisingly the "naive" way of doing this, ie. with a flag and a cond. branch,
--- a/crc64_clmul.c
+++ b/crc64_clmul.c
@@ -114,7 +114,7 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da

        const __m128i P = _mm_xor_si128(A, crc0);
        R               = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
-                                        _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
+                          _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
    }
    else if(alignedLength == 2)
    {
@@ -132,7 +132,7 @@ AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* da

            const __m128i P = _mm_xor_si128(_mm_xor_si128(B, C), crc0);
            R               = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
-                                            _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
+                              _mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
        }
        else
        {
--- a/crc64_vmull.c
+++ b/crc64_vmull.c
@@ -1,16 +1,16 @@
 /*
-* This file is part of the Aaru Data Preservation Suite.
-* Copyright (c) 2019-2021 Natalia Portillo.
-*
-* This file is under the public domain:
-* https://github.com/rawrunprotected/crc
-*/
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2021 Natalia Portillo.
+ *
+ * This file is under the public domain:
+ * https://github.com/rawrunprotected/crc
+ */

 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)

 #include <arm_neon.h>
-#include <stdint.h>
 #include <stddef.h>
+#include <stdint.h>

 #include "library.h"
 #include "arm_vmull.h"
@@ -26,7 +26,7 @@ TARGET_WITH_SIMD FORCE_INLINE void shiftRight128(uint64x2_t in, size_t n, uint64
    const uint64x2_t maskA =
        vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(const uint64x2_t*)(shuffleMasks + (16 - n))));
    uint64x2_t       b     = vreinterpretq_u64_u8(vceqq_u8(vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0))),
-                                                           vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0)))));
+                                                 vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0)))));
    const uint64x2_t maskB = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(maskA), vreinterpretq_u32_u64(b)));

    *outLeft  = mm_shuffle_epi8(in, maskB);
@@ -84,7 +84,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr

        const uint64x2_t P = veorq_u64(A, crc0);
        R                  = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)),
-                                       veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8)));
+                      veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8)));
    }
    else if(alignedLength == 2)
    {
@@ -102,7 +102,7 @@ AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_cr

            const uint64x2_t P = veorq_u64(veorq_u64(B, C), crc0);
            R                  = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)),
-                                           veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8)));
+                          veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8)));
        }
        else
        {
--- a/library.h
+++ b/library.h
@@ -1,20 +1,20 @@
 /*
-* This file is part of the Aaru Data Preservation Suite.
-* Copyright (c) 2019-2021 Natalia Portillo.
-*
-* This library is free software; you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public License as
-* published by the Free Software Foundation; either version 2.1 of the
-* License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2021 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */

 #ifndef AARU_CHECKSUMS_NATIVE_LIBRARY_H
 #define AARU_CHECKSUMS_NATIVE_LIBRARY_H
--- a/simd.c
+++ b/simd.c
@@ -1,20 +1,20 @@
 /*
-* This file is part of the Aaru Data Preservation Suite.
-* Copyright (c) 2019-2021 Natalia Portillo.
-*
-* This library is free software; you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public License as
-* published by the Free Software Foundation; either version 2.1 of the
-* License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2021 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */

 #include "library.h"
 #include "simd.h"
@@ -112,6 +112,7 @@ int have_avx2(void)
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
 #if defined(_WIN32)
 #include <windows.h>
+
 #include <processthreadsapi.h>
 #elif defined(__APPLE__)
 #include <sys/sysctl.h>
@@ -120,29 +121,25 @@ int have_avx2(void)
 #endif
 #endif

-#if (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__)
+#if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__)
 int have_neon_apple()
 {
-    int value;
+    int    value;
    size_t len = sizeof(int);
-    int ret=
-        sysctlbyname("hw.optional.neon", &value, &len, NULL, 0);
+    int    ret = sysctlbyname("hw.optional.neon", &value, &len, NULL, 0);

-    if(ret != 0)
-        return 0;
+    if(ret != 0) return 0;

    return value == 1;
 }

 int have_crc32_apple()
 {
-    int value;
+    int    value;
    size_t len = sizeof(int);
-    int ret=
-        sysctlbyname("hw.optional.crc32", &value, &len, NULL, 0);
+    int    ret = sysctlbyname("hw.optional.crc32", &value, &len, NULL, 0);

-    if(ret != 0)
-        return 0;
+    if(ret != 0) return 0;

    return value == 1;
 }
--- a/simd.h
+++ b/simd.h
@@ -1,20 +1,20 @@
 /*
-* This file is part of the Aaru Data Preservation Suite.
-* Copyright (c) 2019-2021 Natalia Portillo.
-*
-* This library is free software; you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public License as
-* published by the Free Software Foundation; either version 2.1 of the
-* License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, see <http://www.gnu.org/licenses/>.
-*/
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2021 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */

 #ifndef AARU_CHECKSUMS_NATIVE_SIMD_H
 #define AARU_CHECKSUMS_NATIVE_SIMD_H
--- a/spamsum.c
+++ b/spamsum.c
@@ -141,7 +141,7 @@ AARU_LOCAL inline void roll_hash(spamsum_ctx* ctx, uint8_t c)

 AARU_LOCAL inline void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx)
 {
-    //assert(ctx->bh_start < ctx->bh_end);
+    // assert(ctx->bh_start < ctx->bh_end);

    if(ctx->bh_end - ctx->bh_start < 2) /* Need at least two working hashes. */
        return;
@@ -163,7 +163,7 @@ AARU_LOCAL inline void fuzzy_try_fork_blockhash(spamsum_ctx* ctx)
 {
    if(ctx->bh_end >= NUM_BLOCKHASHES) return;

-    //assert(ctx->bh_end != 0);
+    // assert(ctx->bh_end != 0);

    uint32_t obh             = ctx->bh_end - 1;
    uint32_t nbh             = ctx->bh_end;
@@ -184,7 +184,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
    if(!result) return -1;

    /* Verify that our elimination was not overeager. */
-    //assert(bi == 0 || (uint64_t)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH < ctx->total_size);
+    // assert(bi == 0 || (uint64_t)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH < ctx->total_size);

    /* Initial blocksize guess. */
    while((uint64_t)SSDEEP_BS(bi) * SPAMSUM_LENGTH < ctx->total_size)
@@ -203,21 +203,21 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)

    while(bi > ctx->bh_start && ctx->bh[bi].d_len < SPAMSUM_LENGTH / 2) --bi;

-    //assert(!(bi > 0 && ctx->bh[bi].d_len < SPAMSUM_LENGTH / 2));
+    // assert(!(bi > 0 && ctx->bh[bi].d_len < SPAMSUM_LENGTH / 2));

    int i = snprintf((char*)result, (size_t)remain, "%lu:", (unsigned long)SSDEEP_BS(bi));

    if(i <= 0) /* Maybe snprintf has set errno here? */
        return -1;

-    //assert(i < remain);
+    // assert(i < remain);

    remain -= i;
    result += i;

    i = (int)ctx->bh[bi].d_len;

-    //assert(i <= remain);
+    // assert(i <= remain);

    memcpy(result, ctx->bh[bi].digest, (size_t)i);
    result += i;
@@ -225,7 +225,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)

    if(h != 0)
    {
-        //assert(remain > 0);
+        // assert(remain > 0);

        *result = b64[ctx->bh[bi].h % 64];

@@ -237,7 +237,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
    }
    else if(ctx->bh[bi].digest[i] != 0)
    {
-        //assert(remain > 0);
+        // assert(remain > 0);

        *result = ctx->bh[bi].digest[i];

@@ -248,7 +248,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
        }
    }

-    //assert(remain > 0);
+    // assert(remain > 0);

    *result++ = ':';
    --remain;
@@ -267,7 +267,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)

        if(h != 0)
        {
-            //assert(remain > 0);
+            // assert(remain > 0);

            h       = ctx->bh[bi].half_h;
            *result = b64[h % 64];
@@ -284,7 +284,7 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)

            if(i != 0)
            {
-                //assert(remain > 0);
+                // assert(remain > 0);

                *result = (uint8_t)i;

@@ -298,9 +298,9 @@ AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
    }
    else if(h != 0)
    {
-        //assert(ctx->bh[bi].d_len == 0);
+        // assert(ctx->bh[bi].d_len == 0);

-        //assert(remain > 0);
+        // assert(remain > 0);

        *result++ = b64[ctx->bh[bi].h % 64];
        /* No need to bother with FUZZY_FLAG_ELIMSEQ, because this