Add documentation.

2025-12-16 11:14:29 +00:00 · 2023-09-23 18:10:44 +01:00
parent 3358d66f0a
commit 33f021fd54
22 changed files with 658 additions and 48 deletions
--- a/adler32.c
+++ b/adler32.c
@@ -29,13 +29,22 @@
 #include "adler32.h"
 #include "simd.h"

-AARU_EXPORT adler32_ctx* AARU_CALL adler32_init()
+/**
+ * @brief Initializes the Adler-32 checksum algorithm.
+ *
+ * This function initializes the state variables required for the Adler-32
+ * checksum algorithm. It prepares the algorithm to calculate the checksum
+ * for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
+AARU_EXPORT adler32_ctx *AARU_CALL adler32_init()
 {
-    adler32_ctx* ctx;
+    adler32_ctx *ctx;

-    ctx = (adler32_ctx*)malloc(sizeof(adler32_ctx));
+    ctx = (adler32_ctx *) malloc(sizeof(adler32_ctx));

-    if(!ctx) return NULL;
+    if (!ctx) return NULL;

    ctx->sum1 = 1;
    ctx->sum2 = 0;
@@ -43,18 +52,31 @@ AARU_EXPORT adler32_ctx* AARU_CALL adler32_init()
    return ctx;
 }

-AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len)
+/**
+ * @brief Updates the Adler-32 checksum with new data.
+ *
+ * This function updates the Adler-32 checksum.
+ * The checksum is updated for the given data by iterating through each byte and
+ * applying the corresponding calculations to the rolling checksum values.
+ *
+ * @param ctx Pointer to the Adler-32 context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ */
+AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx *ctx, const uint8_t *data, uint32_t len)
 {
-    if(!ctx || !data) return -1;
+    if (!ctx || !data) return -1;
+
 #if defined(__aarch64__) || defined(_M_ARM64) || ((defined(__arm__) || defined(_M_ARM)) && !defined(__MINGW32__))
-    if(have_neon())
+    if (have_neon())
    {
        adler32_neon(&ctx->sum1, &ctx->sum2, data, len);

        return 0;
    }
 #endif
-#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) ||            \
+
+#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
    defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
    if(have_avx2())
    {
@@ -76,7 +98,15 @@ AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data,
    return 0;
 }

-AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
+/**
+ * @brief Calculates Adler-32 checksum for a given data using slicing algorithm.
+ *
+ * @param sum1 Pointer to a 16-bit unsigned integer to store the first sum value.
+ * @param sum2 Pointer to a 16-bit unsigned integer to store the second sum value.
+ * @param data Pointer to the data for which the checksum is to be calculated.
+ * @param len The length of the data in bytes.
+ */
+AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
 {
    uint32_t s1 = *sum1;
    uint32_t s2 = *sum2;
@@ -84,12 +114,12 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
    unsigned n;

    /* in case user likes doing a byte at a time, keep it fast */
-    if(len == 1)
+    if (len == 1)
    {
        s1 += data[0];
-        if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
+        if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
        s2 += s1;
-        if(s2 >= ADLER_MODULE) s2 -= ADLER_MODULE;
+        if (s2 >= ADLER_MODULE) s2 -= ADLER_MODULE;

        *sum1 = s1 & 0xFFFF;
        *sum2 = s2 & 0xFFFF;
@@ -98,14 +128,14 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
    }

    /* in case short lengths are provided, keep it somewhat fast */
-    if(len < 16)
+    if (len < 16)
    {
-        while(len--)
+        while (len--)
        {
            s1 += *data++;
            s2 += s1;
        }
-        if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
+        if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
        s2 %= ADLER_MODULE; /* only added so many ADLER_MODULE's */
        *sum1 = s1 & 0xFFFF;
        *sum2 = s2 & 0xFFFF;
@@ -114,11 +144,12 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
    }

    /* do length NMAX blocks -- requires just one modulo operation */
-    while(len >= NMAX)
+    while (len >= NMAX)
    {
        len -= NMAX;
        n = NMAX / 16; /* NMAX is divisible by 16 */
-        do {
+        do
+        {
            s1 += (data)[0];
            s2 += s1;
            s1 += (data)[0 + 1];
@@ -154,15 +185,16 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const

            /* 16 sums unrolled */
            data += 16;
-        } while(--n);
+        }
+        while (--n);
        s1 %= ADLER_MODULE;
        s2 %= ADLER_MODULE;
    }

    /* do remaining bytes (less than NMAX, still just one modulo) */
-    if(len)
+    if (len)
    { /* avoid modulos if none remaining */
-        while(len >= 16)
+        while (len >= 16)
        {
            len -= 16;
            s1 += (data)[0];
@@ -200,7 +232,7 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const

            data += 16;
        }
-        while(len--)
+        while (len--)
        {
            s1 += *data++;
            s2 += s1;
@@ -213,17 +245,36 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
    *sum2 = s2 & 0xFFFF;
 }

-AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum)
+/**
+ * @brief Finalizes the calculation of the Adler-32 checksum.
+ *
+ * This function finalizes the calculation of the Adler-32 checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the Adler-32 context structure.
+ * @param[out] checksum Pointer to a 32-bit unsigned integer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
+AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx *ctx, uint32_t *checksum)
 {
-    if(!ctx) return -1;
+    if (!ctx) return -1;

    *checksum = (ctx->sum2 << 16) | ctx->sum1;
    return 0;
 }

-AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx)
+/**
+ * @brief Frees the resources allocated for the Adler-32 checksum context.
+ *
+ * This function should be called to release the memory used by the Adler-32 checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The Adler-32 checksum context structure, to be freed.
+ */
+AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx *ctx)
 {
-    if(!ctx) return;
+    if (!ctx) return;

    free(ctx);
 }
--- a/adler32.h
+++ b/adler32.h
@@ -29,13 +29,13 @@ typedef struct
    uint16_t sum2;
 } adler32_ctx;

-AARU_EXPORT adler32_ctx* AARU_CALL adler32_init();
-AARU_EXPORT int AARU_CALL          adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len);
-AARU_EXPORT int AARU_CALL          adler32_final(adler32_ctx* ctx, uint32_t* checksum);
-AARU_EXPORT void AARU_CALL         adler32_free(adler32_ctx* ctx);
-AARU_EXPORT void AARU_CALL         adler32_slicing(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
+AARU_EXPORT adler32_ctx *AARU_CALL adler32_init();
+AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx *ctx, const uint8_t *data, uint32_t len);
+AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx *ctx, uint32_t *checksum);
+AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx *ctx);
+AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len);

-#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) ||            \
+#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
    defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)

 AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
@@ -45,7 +45,7 @@ AARU_EXPORT AVX2 void AARU_CALL  adler32_avx2(uint16_t* sum1, uint16_t* sum2, co

 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)

-AARU_EXPORT void AARU_CALL adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, uint32_t len);
+AARU_EXPORT void AARU_CALL adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len);

 #endif

--- a/adler32_avx2.c
+++ b/adler32_avx2.c
@@ -32,6 +32,16 @@
 #include "adler32.h"
 #include "simd.h"

+/**
+ * @brief Calculate Adler-32 checksum for a given data using AVX2 instructions.
+ *
+ * This function calculates the Adler-32 checksum for a block of data using AVX2 vector instructions.
+ *
+ * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
+ * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
+ * @param data Pointer to the data buffer.
+ * @param len Length of the data buffer in bytes.
+ */
 AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
 {
    uint32_t s1 = *sum1;
--- a/adler32_neon.c
+++ b/adler32_neon.c
@@ -38,6 +38,16 @@
 #include "adler32.h"
 #include "simd.h"

+/**
+ * @brief Calculate Adler-32 checksum for a given data using NEON instructions.
+ *
+ * This function calculates the Adler-32 checksum for a block of data using NEON vector instructions.
+ *
+ * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
+ * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
+ * @param data Pointer to the data buffer.
+ * @param len Length of the data buffer in bytes.
+ */
 TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len)
 {
    /*
--- a/adler32_ssse3.c
+++ b/adler32_ssse3.c
@@ -39,6 +39,17 @@
 #include "library.h"
 #include "adler32.h"

+
+/**
+ * @brief Calculate Adler-32 checksum for a given data using SSSE3 instructions.
+ *
+ * This function calculates the Adler-32 checksum for a block of data using SSSE3 vector instructions.
+ *
+ * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
+ * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
+ * @param data Pointer to the data buffer.
+ * @param len Length of the data buffer in bytes.
+ */
 AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
 {
    uint32_t s1 = *sum1;
--- a/build.sh
+++ b/build.sh
--- a/crc16.c
+++ b/crc16.c
@@ -22,6 +22,15 @@
 #include "library.h"
 #include "crc16.h"

+/**
+ * @brief Initializes the CRC-16 checksum algorithm with the IBM polynomial.
+ *
+ * This function initializes the state variables required for the CRC-16
+ * checksum algorithm using the IBM polynomial. It prepares the algorithm
+ * to calculate the checksum for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
 AARU_EXPORT crc16_ctx* AARU_CALL crc16_init(void)
 {
    crc16_ctx* ctx = (crc16_ctx*)malloc(sizeof(crc16_ctx));
@@ -33,6 +42,20 @@ AARU_EXPORT crc16_ctx* AARU_CALL crc16_init(void)
    return ctx;
 }

+/**
+ * @brief Updates the CRC-16 checksum with new data.
+ *
+ * This function updates the CRC-16 checksum.
+ * The checksum is updated for the given data by using the IBM polynomial.
+ * The algorithm continues the checksum calculation from the previous state,
+ * so it can be used to update the checksum with new data as it is read.
+ *
+ * @param ctx Pointer to the CRC-16 context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint32_t len)
 {
    // Unroll according to Intel slicing by uint8_t
@@ -83,6 +106,17 @@ AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint
    return 0;
 }

+/**
+ * @brief Finalizes the calculation of the CRC-16 checksum.
+ *
+ * This function finalizes the calculation of the CRC-16 checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the CRC-16 context structure.
+ * @param[out] checksum Pointer to a 16-bit unsigned integer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx* ctx, uint16_t* crc)
 {
    if(!ctx) return -1;
@@ -92,6 +126,14 @@ AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx* ctx, uint16_t* crc)
    return 0;
 }

+/**
+ * @brief Frees the resources allocated for the CRC-16 checksum context.
+ *
+ * This function should be called to release the memory used by the CRC-16 checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The CRC-16 checksum context structure, to be freed.
+ */
 AARU_EXPORT void AARU_CALL crc16_free(crc16_ctx* ctx)
 {
    if(ctx) free(ctx);
--- a/crc16_ccitt.c
+++ b/crc16_ccitt.c
@@ -22,6 +22,15 @@
 #include "library.h"
 #include "crc16_ccitt.h"

+/**
+ * @brief Initializes the CRC-16 checksum algorithm with the CCITT polynomial.
+ *
+ * This function initializes the state variables required for the CRC-16
+ * checksum algorithm using the CCITT polynomial. It prepares the algorithm
+ * to calculate the checksum for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
 AARU_EXPORT crc16_ccitt_ctx* AARU_CALL crc16_ccitt_init(void)
 {
    crc16_ccitt_ctx* ctx = (crc16_ccitt_ctx*)malloc(sizeof(crc16_ccitt_ctx));
@@ -33,6 +42,20 @@ AARU_EXPORT crc16_ccitt_ctx* AARU_CALL crc16_ccitt_init(void)
    return ctx;
 }

+/**
+ * @brief Updates the CRC-16 checksum with new data.
+ *
+ * This function updates the CRC-16 checksum.
+ * The checksum is updated for the given data by using the CCITT polynomial.
+ * The algorithm continues the checksum calculation from the previous state,
+ * so it can be used to update the checksum with new data as it is read.
+ *
+ * @param ctx Pointer to the CRC-16 context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t* data, uint32_t len)
 {
    // Unroll according to Intel slicing by uint8_t
@@ -79,6 +102,17 @@ AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t
    return 0;
 }

+/**
+ * @brief Finalizes the calculation of the CRC-16 checksum.
+ *
+ * This function finalizes the calculation of the CRC-16 checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the CRC-16 context structure.
+ * @param[out] checksum Pointer to a 16-bit unsigned integer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx* ctx, uint16_t* crc)
 {
    if(!ctx) return -1;
@@ -88,6 +122,14 @@ AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx* ctx, uint16_t* crc)
    return 0;
 }

+/**
+ * @brief Frees the resources allocated for the CRC-16 checksum context.
+ *
+ * This function should be called to release the memory used by the CRC-16 checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The CRC-16 checksum context structure, to be freed.
+ */
 AARU_EXPORT void AARU_CALL crc16_ccitt_free(crc16_ccitt_ctx* ctx)
 {
    if(ctx) free(ctx);
--- a/crc32.c
+++ b/crc32.c
@@ -22,6 +22,15 @@
 #include "library.h"
 #include "crc32.h"

+/**
+ * @brief Initializes the CRC-32 checksum algorithm with the ISO polynomial.
+ *
+ * This function initializes the state variables required for the CRC-32
+ * checksum algorithm using the ISO polynomial. It prepares the algorithm
+ * to calculate the checksum for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
 AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
 {
    crc32_ctx* ctx = (crc32_ctx*)malloc(sizeof(crc32_ctx));
@@ -33,6 +42,20 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
    return ctx;
 }

+/**
+ * @brief Updates the CRC-32 checksum with new data.
+ *
+ * This function updates the CRC-32 checksum.
+ * The checksum is updated for the given data by using the ISO polynomial.
+ * The algorithm continues the checksum calculation from the previous state,
+ * so it can be used to update the checksum with new data as it is read.
+ *
+ * @param ctx Pointer to the CRC-32 context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len)
 {
    if(!ctx || !data) return -1;
@@ -67,6 +90,17 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
    return 0;
 }

+/**
+ * @brief Computes the CRC-32 checksum using slicing-by-8 algorithm.
+ *
+ * This function calculates the CRC-32 value for the given data using slicing-by-8 algorithm.
+ *
+ * @param previous_crc A pointer to the previous CRC-32 value, and where the updated value gets stored.
+ * @param data The pointer to the data buffer.
+ * @param len The length of the data in bytes.
+ *
+ * @note This function assumes little-endian byte order.
+ */
 AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t* data, long len)
 {
    // Unroll according to Intel slicing by uint8_t
@@ -113,6 +147,17 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t*
    *previous_crc = c;
 }

+/**
+ * @brief Finalizes the calculation of the CRC-32 checksum.
+ *
+ * This function finalizes the calculation of the CRC-32 checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the CRC-32 context structure.
+ * @param[out] checksum Pointer to a 32-bit unsigned integer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
 {
    if(!ctx) return -1;
@@ -122,6 +167,14 @@ AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
    return 0;
 }

+/**
+ * @brief Frees the resources allocated for the CRC-32 checksum context.
+ *
+ * This function should be called to release the memory used by the CRC-32 checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The CRC-32 checksum context structure, to be freed.
+ */
 AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx)
 {
    if(ctx) free(ctx);
--- a/crc32_arm_simd.c
+++ b/crc32_arm_simd.c
@@ -41,6 +41,17 @@
 #include "library.h"
 #include "crc32.h"

+/**
+ * @brief Calculates the CRC-32 using the ARMv8 instruction set in little endian mode.
+ *
+ * This function takes the previous CRC value, data and length as inputs and calculates
+ * the new CRC-32 using the ARMv8 instruction set in little endian mode.
+ *
+ * @param previous_crc The previous CRC value.
+ * @param data The input data to calculate the CRC over.
+ * @param len The length of the input data.
+ * @return The new CRC-32 value.
+ */
 TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const uint8_t* data, uint32_t len)
 {
    uint32_t c = (uint32_t)previous_crc;
--- a/crc32_clmul.c
+++ b/crc32_clmul.c
@@ -34,8 +34,7 @@
 #include "crc32.h"
 #include "crc32_simd.h"

-CLMUL
-static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
+CLMUL static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
 {
    const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);

@@ -57,8 +56,7 @@ static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
    *xmm_crc3 = _mm_castps_si128(ps_res);
 }

-CLMUL
-static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
+CLMUL static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
 {
    const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);

@@ -88,8 +86,7 @@ static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
    *xmm_crc3 = _mm_castps_si128(ps_res31);
 }

-CLMUL
-static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
+CLMUL static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
 {
    const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);

@@ -125,8 +122,7 @@ static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
    *xmm_crc3 = _mm_castps_si128(ps_res32);
 }

-CLMUL
-static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
+CLMUL static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
 {
    const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);

@@ -170,8 +166,7 @@ static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
    *xmm_crc3 = _mm_castps_si128(ps_res3);
 }

-CLMUL
-static void partial_fold(const size_t len,
+CLMUL static void partial_fold(const size_t len,
                         __m128i*     xmm_crc0,
                         __m128i*     xmm_crc1,
                         __m128i*     xmm_crc2,
@@ -228,6 +223,15 @@ static void partial_fold(const size_t len,
 */
 #define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial))

+/**
+ * @brief Calculate the CRC32 checksum using CLMUL instruction extension.
+ *
+ * @param previous_crc The previously calculated CRC32 checksum.
+ * @param data Pointer to the input data buffer.
+ * @param len Length of the input data in bytes.
+ *
+ * @return The calculated CRC32 checksum.
+ */
 AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len)
 {
    unsigned long algn_diff;
--- a/crc32_vmull.c
+++ b/crc32_vmull.c
@@ -233,6 +233,20 @@ TARGET_WITH_SIMD FORCE_INLINE void partial_fold(const size_t len,
    *q_crc3 = vreinterpretq_u64_u32(ps_res);
 }

+/**
+ * @brief Calculates the CRC-32 checksum using the vmull instruction.
+ *
+ * This function calculates the CRC-32 checksum of the given data using the
+ * vmull instruction for optimized performance. It takes the previous CRC value,
+ * the data buffer, and the length of data as parameters. The function returns
+ * the resulting CRC-32 checksum.
+ *
+ * @param previous_crc The previous CRC value.
+ * @param data The data buffer.
+ * @param len The length of the data buffer.
+ *
+ * @return The CRC-32 checksum of the given data.
+ */
 TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len)
 {
    unsigned long algn_diff;
--- a/crc64.c
+++ b/crc64.c
@@ -23,6 +23,15 @@
 #include "crc64.h"
 #include "simd.h"

+/**
+ * @brief Initializes the CRC-64 checksum algorithm with the ECMA polynomial.
+ *
+ * This function initializes the state variables required for the CRC-ECMA
+ * checksum algorithm using the IBM polynomial. It prepares the algorithm
+ * to calculate the checksum for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
 AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
 {
    int        i, slice;
@@ -35,6 +44,20 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
    return ctx;
 }

+/**
+ * @brief Updates the CRC-64 checksum with new data.
+ *
+ * This function updates the CRC-64 checksum.
+ * The checksum is updated for the given data by using the ECMA polynomial.
+ * The algorithm continues the checksum calculation from the previous state,
+ * so it can be used to update the checksum with new data as it is read.
+ *
+ * @param ctx Pointer to the CRC-64 context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
 {
    if(!ctx || !data) return -1;
@@ -97,6 +120,17 @@ AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t*
    *previous_crc = c;
 }

+/**
+ * @brief Finalizes the calculation of the CRC-64 checksum.
+ *
+ * This function finalizes the calculation of the CRC-64 checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the CRC-64 context structure.
+ * @param[out] checksum Pointer to a 64-bit unsigned integer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
 {
    if(!ctx) return -1;
@@ -106,6 +140,14 @@ AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
    return 0;
 }

+/**
+ * @brief Frees the resources allocated for the CRC-64 checksum context.
+ *
+ * This function should be called to release the memory used by the CRC-64 checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The CRC-64 checksum context structure, to be freed.
+ */
 AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx)
 {
    if(ctx) free(ctx);
--- a/crc64_clmul.c
+++ b/crc64_clmul.c
@@ -72,6 +72,15 @@ CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
    return _mm_xor_si128(_mm_clmulepi64_si128(in, foldConstants, 0x00), _mm_clmulepi64_si128(in, foldConstants, 0x11));
 }

+/**
+ * @brief Calculate the CRC-64 checksum using CLMUL instruction extension.
+ *
+ * @param previous_crc The previously calculated CRC-64 checksum.
+ * @param data Pointer to the input data buffer.
+ * @param len Length of the input data in bytes.
+ *
+ * @return The calculated CRC-64 checksum.
+ */
 AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* data, long length)
 {
    const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
--- a/crc64_vmull.c
+++ b/crc64_vmull.c
@@ -39,6 +39,20 @@ TARGET_WITH_SIMD FORCE_INLINE uint64x2_t fold(uint64x2_t in, uint64x2_t foldCons
                     sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants)));
 }

+/**
+ * @brief Calculates the CRC-64 checksum using the vmull instruction.
+ *
+ * This function calculates the CRC-64 checksum of the given data using the
+ * vmull instruction for optimized performance. It takes the previous CRC value,
+ * the data buffer, and the length of data as parameters. The function returns
+ * the resulting CRC-32 checksum.
+ *
+ * @param previous_crc The previous CRC value.
+ * @param data The data buffer.
+ * @param len The length of the data buffer.
+ *
+ * @return The CRC-64 checksum of the given data.
+ */
 AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t* data, long len)
 {
    const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
--- a/fletcher16.c
+++ b/fletcher16.c
@@ -28,6 +28,15 @@
 #include "library.h"
 #include "fletcher16.h"

+/**
+ * @brief Initializes the Fletcher-16 checksum algorithm.
+ *
+ * This function initializes the state variables required for the Fletcher-16
+ * checksum algorithm. It prepares the algorithm to calculate the checksum
+ * for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
 AARU_EXPORT fletcher16_ctx* AARU_CALL fletcher16_init()
 {
    fletcher16_ctx* ctx;
@@ -42,6 +51,17 @@ AARU_EXPORT fletcher16_ctx* AARU_CALL fletcher16_init()
    return ctx;
 }

+/**
+ * @brief Updates the Fletcher-16 checksum with new data.
+ *
+ * This function updates the Fletcher-16 checksum.
+ * The checksum is updated for the given data by iterating through each byte and
+ * applying the corresponding calculations to the rolling checksum values.
+ *
+ * @param ctx Pointer to the Fletcher-16 context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ */
 AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t* data, uint32_t len)
 {
    if(!ctx || !data) return -1;
@@ -159,6 +179,17 @@ AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t*
    return 0;
 }

+/**
+ * @brief Finalizes the calculation of the Fletcher-16 checksum.
+ *
+ * This function finalizes the calculation of the Fletcher-16 checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the Fletcher-32 context structure.
+ * @param[out] checksum Pointer to a 16-bit unsigned integer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx* ctx, uint16_t* checksum)
 {
    if(!ctx) return -1;
@@ -167,6 +198,14 @@ AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx* ctx, uint16_t* checks
    return 0;
 }

+/**
+ * @brief Frees the resources allocated for the Fletcher-16 checksum context.
+ *
+ * This function should be called to release the memory used by the Fletcher-16 checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The Fletcher-16 checksum context structure, to be freed.
+ */
 AARU_EXPORT void AARU_CALL fletcher16_free(fletcher16_ctx* ctx)
 {
    if(!ctx) return;
--- a/fletcher32.c
+++ b/fletcher32.c
@@ -28,6 +28,15 @@
 #include "library.h"
 #include "fletcher32.h"

+/**
+ * @brief Initializes the Fletcher-32 checksum algorithm.
+ *
+ * This function initializes the state variables required for the Fletcher-32
+ * checksum algorithm. It prepares the algorithm to calculate the checksum
+ * for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
 AARU_EXPORT fletcher32_ctx* AARU_CALL fletcher32_init()
 {
    fletcher32_ctx* ctx;
@@ -42,6 +51,17 @@ AARU_EXPORT fletcher32_ctx* AARU_CALL fletcher32_init()
    return ctx;
 }

+/**
+ * @brief Updates the Fletcher-32 checksum with new data.
+ *
+ * This function updates the Fletcher-32 checksum.
+ * The checksum is updated for the given data by iterating through each byte and
+ * applying the corresponding calculations to the rolling checksum values.
+ *
+ * @param ctx Pointer to the Fletcher-32 context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ */
 AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t* data, uint32_t len)
 {
    if(!ctx || !data) return -1;
@@ -206,6 +226,17 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t*
    return 0;
 }

+/**
+ * @brief Finalizes the calculation of the Fletcher-32 checksum.
+ *
+ * This function finalizes the calculation of the Fletcher-32 checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the Fletcher-32 context structure.
+ * @param[out] checksum Pointer to a 32-bit unsigned integer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx* ctx, uint32_t* checksum)
 {
    if(!ctx) return -1;
@@ -214,6 +245,14 @@ AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx* ctx, uint32_t* checks
    return 0;
 }

+/**
+ * @brief Frees the resources allocated for the Fletcher-32 checksum context.
+ *
+ * This function should be called to release the memory used by the Fletcher-32 checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The Fletcher-32 checksum context structure, to be freed.
+ */
 AARU_EXPORT void AARU_CALL fletcher32_free(fletcher32_ctx* ctx)
 {
    if(!ctx) return;
--- a/fletcher32_avx2.c
+++ b/fletcher32_avx2.c
@@ -32,6 +32,16 @@
 #include "fletcher32.h"
 #include "simd.h"

+/**
+ * @brief Calculate Fletcher-32 checksum for a given data using NEON instructions.
+ *
+ * This function calculates the Fletcher-32 checksum for a block of data using NEON vector instructions.
+ *
+ * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
+ * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
+ * @param data Pointer to the data buffer.
+ * @param len Length of the data buffer in bytes.
+ */
 AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len) {
    uint32_t s1 = *sum1;
    uint32_t s2 = *sum2;
--- a/fletcher32_neon.c
+++ b/fletcher32_neon.c
@@ -41,14 +41,15 @@
 TARGET_WITH_SIMD /***/

 /**
- * @brief Compute the Fletcher-32 checksum using NEON instructions.
+ * @brief Calculate Fletcher-32 checksum for a given data using NEON instructions.
 *
- * @param[out] sum1 Pointer to the first sum value.
- * @param[out] sum2 Pointer to the second sum value.
- * @param[in] data Pointer to the input data.
- * @param[in] len The length of the input data.
+ * This function calculates the Fletcher-32 checksum for a block of data using NEON vector instructions.
+ *
+ * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
+ * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
+ * @param data Pointer to the data buffer.
+ * @param len Length of the data buffer in bytes.
 */
-
 void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len) {
    /*
     * Split Fletcher-32 into component sums.
--- a/fletcher32_ssse3.c
+++ b/fletcher32_ssse3.c
@@ -39,6 +39,16 @@
 #include "library.h"
 #include "fletcher32.h"

+/**
+ * @brief Calculate Fletcher-32 checksum for a given data using SSSE3 instructions.
+ *
+ * This function calculates the Fletcher-32 checksum for a block of data using SSSE3 vector instructions.
+ *
+ * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
+ * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
+ * @param data Pointer to the data buffer.
+ * @param len Length of the data buffer in bytes.
+ */
 AARU_EXPORT SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
 {
    uint32_t s1 = *sum1;
--- a/simd.c
+++ b/simd.c
@@ -36,6 +36,26 @@

 #endif

+/**
+ * @brief Gets the CPUID information for the given info value.
+ *
+ * This function retrieves the CPUID information for the specified info argument
+ * and stores the results in the provided pointers: eax, ebx, ecx, and edx.
+ * Each register represents a 32-bit value returned by the CPUID instruction.
+ *
+ * @param info The CPUID info value specifying the desired information to retrieve.
+ * @param eax Pointer to store the value of the EAX register.
+ * @param ebx Pointer to store the value of the EBX register.
+ * @param ecx Pointer to store the value of the ECX register.
+ * @param edx Pointer to store the value of the EDX register.
+ *
+ * @note It is important to ensure that the provided pointers are valid and point
+ * to a memory location that can be modified by this function.
+ *
+ * @see https://en.wikipedia.org/wiki/CPUID
+ *
+ * @return None.
+ */
 static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
 {
 #ifdef _MSC_VER
@@ -59,6 +79,26 @@ static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigne
 #endif
 }

+/**
+ * @brief Get the CPU extended information using CPUID instruction.
+ *
+ * This function retrieves the extended information from the CPU by using the CPUID instruction.
+ * It reads the result into the output parameters eax, ebx, ecx, and edx based on the input parameters info and count.
+ *
+ * @param info The CPUID function number to be executed.
+ * @param count The sub-leaf index for certain CPUID functions.
+ * @param eax Pointer to store the value of the EAX register.
+ * @param ebx Pointer to store the value of the EBX register.
+ * @param ecx Pointer to store the value of the ECX register.
+ * @param edx Pointer to store the value of the EDX register.
+ *
+ * @note It is important to ensure that the provided pointers are valid and point
+ * to a memory location that can be modified by this function.
+ *
+ * @see https://en.wikipedia.org/wiki/CPUID
+ *
+ * @return None.
+ */
 static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
 {
 #ifdef _MSC_VER
@@ -82,6 +122,18 @@ static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned*
 #endif
 }

+/**
+ * @brief Checks if the hardware supports the CLMUL instruction set.
+ *
+ * The function checks if the system's CPU supports the CLMUL (Carry-Less Multiplication) instruction set.
+ * CLMUL is an extension to the x86 instruction set architecture and provides hardware acceleration for
+ * carry-less multiplication operations.
+ *
+ * @return True if CLMUL instruction set is supported, False otherwise.
+ *
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=CLMUL
+ * @see https://en.wikipedia.org/wiki/Carry-less_multiplication
+ */
 int have_clmul(void)
 {
    unsigned eax, ebx, ecx, edx;
@@ -95,6 +147,19 @@ int have_clmul(void)
    return has_pclmulqdq && has_sse41;
 }

+/**
+ * @brief Checks if the current processor supports SSSE3 instructions.
+ *
+ * The function detects whether the current processor supports SSSE3 instructions by
+ * checking the CPU feature flags. SSSE3 (Supplemental Streaming SIMD Extensions 3)
+ * is an extension to the x86 instruction set architecture that introduces
+ * additional SIMD instructions useful for multimedia and signal processing tasks.
+ *
+ * @return true if the current processor supports SSSE3 instructions, false otherwise.
+ *
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=SSSE3
+ * @see https://en.wikipedia.org/wiki/SSSE3
+ */
 int have_ssse3(void)
 {
    unsigned eax, ebx, ecx, edx;
@@ -103,6 +168,20 @@ int have_ssse3(void)
    return ecx & 0x200;
 }

+/**
+ * @brief Checks if the current processor supports AVX2 instructions.
+ *
+ * The function detects whether the current processor supports AVX2 instructions by
+ * checking the CPU feature flags. AVX2 (Advanced Vector Extensions 2) is an extension
+ * to the x86 instruction set architecture that introduces additional SIMD instructions
+ * useful for multimedia and signal processing tasks.
+ *
+ * @return true if the current processor supports AVX2 instructions, false otherwise.
+ *
+ * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX2
+ * @see https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+ */
+
 int have_avx2(void)
 {
    unsigned eax, ebx, ecx, edx;
@@ -125,6 +204,19 @@ int have_avx2(void)
 #endif

 #if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__)
+/**
+ * @brief Checks if the current processor supports NEON instructions.
+ *
+ * The function detects whether the current processor supports NEON instructions by
+ * checking the CPU feature flags. NEON is an extension to the ARM instruction set
+ * architecture that introduces additional SIMD instructions useful for multimedia
+ * and signal processing tasks.
+ *
+ * @return true if the current processor supports NEON instructions, false otherwise.
+ *
+ * @see https://developer.arm.com/architectures/instruction-sets/simd-isas/neon
+ * @see https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(NEON)
+ */
 int have_neon_apple()
 {
    int    value;
@@ -136,6 +228,15 @@ int have_neon_apple()
    return value == 1;
 }

+/**
+ * @brief Checks if the current processor supports CRC32 instructions.
+ *
+ * The function detects whether the current processor supports CRC32 instructions by
+ * checking the CPU feature flags. CRC32 is an extension to the ARM instruction set
+ * architecture that introduces additional instructions for calculating CRC32 checksums.
+ *
+ * @return true if the current processor supports CRC32 instructions, false otherwise.
+ */
 int have_crc32_apple()
 {
    int    value;
@@ -147,6 +248,15 @@ int have_crc32_apple()
    return value == 1;
 }

+/**
+ * @brief Checks if the current processor supports cryptographic instructions.
+ *
+ * The function detects whether the current processor supports cryptographic instructions by
+ * checking the CPU feature flags. Cryptographic instructions are an extension to the ARM instruction set
+ * architecture that introduces additional instructions for cryptographic operations.
+ *
+ * @return true if the current processor supports cryptographic instructions, false otherwise.
+ */
 int have_crypto_apple() { return 0; }
 #endif

@@ -156,6 +266,15 @@ int have_neon(void)
    return 1; // ARMv8-A made it mandatory
 }

+/**
+ * @brief Checks if the current processor supports CRC32 instructions.
+ *
+ * The function detects whether the current processor supports CRC32 instructions by
+ * checking the CPU feature flags. CRC32 is an extension to the ARM instruction set
+ * architecture that introduces additional instructions for calculating CRC32 checksums.
+ *
+ * @return true if the current processor supports CRC32 instructions, false otherwise.
+ */
 int have_arm_crc32(void)
 {
 #if defined(_WIN32)
@@ -167,6 +286,15 @@ int have_arm_crc32(void)
 #endif
 }

+/**
+ * @brief Checks if the current processor supports cryptographic instructions.
+ *
+ * The function detects whether the current processor supports cryptographic instructions by
+ * checking the CPU feature flags. Cryptographic instructions are an extension to the ARM instruction set
+ * architecture that introduces additional instructions for cryptographic operations.
+ *
+ * @return true if the current processor supports cryptographic instructions, false otherwise.
+ */
 int have_arm_crypto(void)
 {
 #if defined(_WIN32)
@@ -180,6 +308,19 @@ int have_arm_crypto(void)
 #endif

 #if defined(__arm__) || defined(_M_ARM)
+/**
+ * @brief Checks if the current processor supports NEON instructions.
+ *
+ * The function detects whether the current processor supports NEON instructions by
+ * checking the CPU feature flags. NEON is an extension to the ARM instruction set
+ * architecture that introduces additional SIMD instructions useful for multimedia
+ * and signal processing tasks.
+ *
+ * @return true if the current processor supports NEON instructions, false otherwise.
+ *
+ * @see https://developer.arm.com/architectures/instruction-sets/simd-isas/neon
+ * @see https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(NEON)
+ */
 int have_neon(void)
 {
 #if defined(_WIN32)
@@ -191,6 +332,15 @@ int have_neon(void)
 #endif
 }

+/**
+ * @brief Checks if the current processor supports CRC32 instructions.
+ *
+ * The function detects whether the current processor supports CRC32 instructions by
+ * checking the CPU feature flags. CRC32 is an extension to the ARM instruction set
+ * architecture that introduces additional instructions for calculating CRC32 checksums.
+ *
+ * @return true if the current processor supports CRC32 instructions, false otherwise.
+ */
 int have_arm_crc32(void)
 {
 #if defined(_WIN32)
@@ -208,6 +358,15 @@ int have_arm_crc32(void)
 #endif
 }

+/**
+ * @brief Checks if the current processor supports cryptographic instructions.
+ *
+ * The function detects whether the current processor supports cryptographic instructions by
+ * checking the CPU feature flags. Cryptographic instructions are an extension to the ARM instruction set
+ * architecture that introduces additional instructions for cryptographic operations.
+ *
+ * @return true if the current processor supports cryptographic instructions, false otherwise.
+ */
 int have_arm_crypto(void)
 {
 #if defined(_WIN32)
--- a/spamsum.c
+++ b/spamsum.c
@@ -33,6 +33,15 @@ static uint8_t b64[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x
                        0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
                        0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F};

+/**
+ * @brief Initializes the SpamSum checksum algorithm.
+ *
+ * This function initializes the state variables required for the SpamSum
+ * checksum algorithm. It prepares the algorithm to calculate the checksum
+ * for a new data set.
+ *
+ * @return Pointer to a structure containing the checksum state.
+ */
 AARU_EXPORT spamsum_ctx* AARU_CALL spamsum_init(void)
 {
    spamsum_ctx* ctx = (spamsum_ctx*)malloc(sizeof(spamsum_ctx));
@@ -47,6 +56,17 @@ AARU_EXPORT spamsum_ctx* AARU_CALL spamsum_init(void)
    return ctx;
 }

+/**
+ * @brief Updates the SpamSum checksum with new data.
+ *
+ * This function updates the SpamSum checksum.
+ *
+ * @param ctx Pointer to the SpamSum context structure.
+ * @param data Pointer to the input data buffer.
+ * @param len The length of the input data buffer.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx* ctx, const uint8_t* data, uint32_t len)
 {
    int i;
@@ -59,6 +79,14 @@ AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx* ctx, const uint8_t* data,
    return 0;
 }

+/**
+ * @brief Frees the resources allocated for the SpamSum checksum context.
+ *
+ * This function should be called to release the memory used by the SpamSum checksum
+ * context structure after it is no longer needed.
+ *
+ * @param ctx The SpamSum checksum context structure, to be freed.
+ */
 AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx* ctx)
 {
    if(ctx) free(ctx);
@@ -175,6 +203,17 @@ AARU_LOCAL inline void fuzzy_try_fork_blockhash(spamsum_ctx* ctx)
    ++ctx->bh_end;
 }

+/**
+ * @brief Finalizes the calculation of the SpamSum checksum.
+ *
+ * This function finalizes the calculation of the SpamSum checksum and returns
+ * its value.
+ *
+ * @param[in] ctx Pointer to the SpamSum context structure.
+ * @param[out] result Pointer to a buffer to store the checksum value.
+ *
+ * @returns 0 on success, -1 on error.
+ */
 AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
 {
    uint32_t bi     = ctx->bh_start;