Add documentation.

This commit is contained in:
2023-09-23 18:10:44 +01:00
parent 3358d66f0a
commit 33f021fd54
22 changed files with 658 additions and 48 deletions

101
adler32.c
View File

@@ -29,13 +29,22 @@
#include "adler32.h"
#include "simd.h"
AARU_EXPORT adler32_ctx* AARU_CALL adler32_init()
/**
* @brief Initializes the Adler-32 checksum algorithm.
*
* This function initializes the state variables required for the Adler-32
* checksum algorithm. It prepares the algorithm to calculate the checksum
* for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT adler32_ctx *AARU_CALL adler32_init()
{
adler32_ctx* ctx;
adler32_ctx *ctx;
ctx = (adler32_ctx*)malloc(sizeof(adler32_ctx));
ctx = (adler32_ctx *) malloc(sizeof(adler32_ctx));
if(!ctx) return NULL;
if (!ctx) return NULL;
ctx->sum1 = 1;
ctx->sum2 = 0;
@@ -43,18 +52,31 @@ AARU_EXPORT adler32_ctx* AARU_CALL adler32_init()
return ctx;
}
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len)
/**
* @brief Updates the Adler-32 checksum with new data.
*
* This function updates the Adler-32 checksum.
* The checksum is updated for the given data by iterating through each byte and
* applying the corresponding calculations to the rolling checksum values.
*
* @param ctx Pointer to the Adler-32 context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*/
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx *ctx, const uint8_t *data, uint32_t len)
{
if(!ctx || !data) return -1;
if (!ctx || !data) return -1;
#if defined(__aarch64__) || defined(_M_ARM64) || ((defined(__arm__) || defined(_M_ARM)) && !defined(__MINGW32__))
if(have_neon())
if (have_neon())
{
adler32_neon(&ctx->sum1, &ctx->sum2, data, len);
return 0;
}
#endif
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
if(have_avx2())
{
@@ -76,7 +98,15 @@ AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data,
return 0;
}
AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
/**
* @brief Calculates Adler-32 checksum for a given data using slicing algorithm.
*
* @param sum1 Pointer to a 16-bit unsigned integer to store the first sum value.
* @param sum2 Pointer to a 16-bit unsigned integer to store the second sum value.
* @param data Pointer to the data for which the checksum is to be calculated.
* @param len The length of the data in bytes.
*/
AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len)
{
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;
@@ -84,12 +114,12 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
unsigned n;
/* in case user likes doing a byte at a time, keep it fast */
if(len == 1)
if (len == 1)
{
s1 += data[0];
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 += s1;
if(s2 >= ADLER_MODULE) s2 -= ADLER_MODULE;
if (s2 >= ADLER_MODULE) s2 -= ADLER_MODULE;
*sum1 = s1 & 0xFFFF;
*sum2 = s2 & 0xFFFF;
@@ -98,14 +128,14 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
}
/* in case short lengths are provided, keep it somewhat fast */
if(len < 16)
if (len < 16)
{
while(len--)
while (len--)
{
s1 += *data++;
s2 += s1;
}
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
if (s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE; /* only added so many ADLER_MODULE's */
*sum1 = s1 & 0xFFFF;
*sum2 = s2 & 0xFFFF;
@@ -114,11 +144,12 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
}
/* do length NMAX blocks -- requires just one modulo operation */
while(len >= NMAX)
while (len >= NMAX)
{
len -= NMAX;
n = NMAX / 16; /* NMAX is divisible by 16 */
do {
do
{
s1 += (data)[0];
s2 += s1;
s1 += (data)[0 + 1];
@@ -154,15 +185,16 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
/* 16 sums unrolled */
data += 16;
} while(--n);
}
while (--n);
s1 %= ADLER_MODULE;
s2 %= ADLER_MODULE;
}
/* do remaining bytes (less than NMAX, still just one modulo) */
if(len)
if (len)
{ /* avoid modulos if none remaining */
while(len >= 16)
while (len >= 16)
{
len -= 16;
s1 += (data)[0];
@@ -200,7 +232,7 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
data += 16;
}
while(len--)
while (len--)
{
s1 += *data++;
s2 += s1;
@@ -213,17 +245,36 @@ AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const
*sum2 = s2 & 0xFFFF;
}
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum)
/**
* @brief Finalizes the calculation of the Adler-32 checksum.
*
* This function finalizes the calculation of the Adler-32 checksum and returns
* its value.
*
* @param[in] ctx Pointer to the Adler-32 context structure.
* @param[out] checksum Pointer to a 32-bit unsigned integer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx *ctx, uint32_t *checksum)
{
if(!ctx) return -1;
if (!ctx) return -1;
*checksum = (ctx->sum2 << 16) | ctx->sum1;
return 0;
}
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx)
/**
* @brief Frees the resources allocated for the Adler-32 checksum context.
*
* This function should be called to release the memory used by the Adler-32 checksum
* context structure after it is no longer needed.
*
* @param ctx The Adler-32 checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx *ctx)
{
if(!ctx) return;
if (!ctx) return;
free(ctx);
}

View File

@@ -29,13 +29,13 @@ typedef struct
uint16_t sum2;
} adler32_ctx;
AARU_EXPORT adler32_ctx* AARU_CALL adler32_init();
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum);
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx);
AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
AARU_EXPORT adler32_ctx *AARU_CALL adler32_init();
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx *ctx, const uint8_t *data, uint32_t len);
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx *ctx, uint32_t *checksum);
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx *ctx);
AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len);
@@ -45,7 +45,7 @@ AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, co
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
AARU_EXPORT void AARU_CALL adler32_neon(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, uint32_t len);
AARU_EXPORT void AARU_CALL adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len);
#endif

View File

@@ -32,6 +32,16 @@
#include "adler32.h"
#include "simd.h"
/**
* @brief Calculate Adler-32 checksum for a given data using AVX2 instructions.
*
* This function calculates the Adler-32 checksum for a block of data using AVX2 vector instructions.
*
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT AVX2 void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
{
uint32_t s1 = *sum1;

View File

@@ -38,6 +38,16 @@
#include "adler32.h"
#include "simd.h"
/**
* @brief Calculate Adler-32 checksum for a given data using NEON instructions.
*
* This function calculates the Adler-32 checksum for a block of data using NEON vector instructions.
*
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
TARGET_WITH_SIMD void adler32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len)
{
/*

View File

@@ -39,6 +39,17 @@
#include "library.h"
#include "adler32.h"
/**
* @brief Calculate Adler-32 checksum for a given data using SSSE3 instructions.
*
* This function calculates the Adler-32 checksum for a block of data using SSSE3 vector instructions.
*
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT SSSE3 void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
{
uint32_t s1 = *sum1;

0
build.sh Executable file → Normal file
View File

42
crc16.c
View File

@@ -22,6 +22,15 @@
#include "library.h"
#include "crc16.h"
/**
* @brief Initializes the CRC-16 checksum algorithm with the IBM polynomial.
*
* This function initializes the state variables required for the CRC-16
* checksum algorithm using the IBM polynomial. It prepares the algorithm
* to calculate the checksum for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT crc16_ctx* AARU_CALL crc16_init(void)
{
crc16_ctx* ctx = (crc16_ctx*)malloc(sizeof(crc16_ctx));
@@ -33,6 +42,20 @@ AARU_EXPORT crc16_ctx* AARU_CALL crc16_init(void)
return ctx;
}
/**
* @brief Updates the CRC-16 checksum with new data.
*
* This function updates the CRC-16 checksum.
* The checksum is updated for the given data by using the IBM polynomial.
* The algorithm continues the checksum calculation from the previous state,
* so it can be used to update the checksum with new data as it is read.
*
* @param ctx Pointer to the CRC-16 context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint32_t len)
{
// Unroll according to Intel slicing by uint8_t
@@ -83,6 +106,17 @@ AARU_EXPORT int AARU_CALL crc16_update(crc16_ctx* ctx, const uint8_t* data, uint
return 0;
}
/**
* @brief Finalizes the calculation of the CRC-16 checksum.
*
* This function finalizes the calculation of the CRC-16 checksum and returns
* its value.
*
* @param[in] ctx Pointer to the CRC-16 context structure.
* @param[out] checksum Pointer to a 16-bit unsigned integer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx* ctx, uint16_t* crc)
{
if(!ctx) return -1;
@@ -92,6 +126,14 @@ AARU_EXPORT int AARU_CALL crc16_final(crc16_ctx* ctx, uint16_t* crc)
return 0;
}
/**
* @brief Frees the resources allocated for the CRC-16 checksum context.
*
* This function should be called to release the memory used by the CRC-16 checksum
* context structure after it is no longer needed.
*
* @param ctx The CRC-16 checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL crc16_free(crc16_ctx* ctx)
{
if(ctx) free(ctx);

View File

@@ -22,6 +22,15 @@
#include "library.h"
#include "crc16_ccitt.h"
/**
* @brief Initializes the CRC-16 checksum algorithm with the CCITT polynomial.
*
* This function initializes the state variables required for the CRC-16
* checksum algorithm using the CCITT polynomial. It prepares the algorithm
* to calculate the checksum for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT crc16_ccitt_ctx* AARU_CALL crc16_ccitt_init(void)
{
crc16_ccitt_ctx* ctx = (crc16_ccitt_ctx*)malloc(sizeof(crc16_ccitt_ctx));
@@ -33,6 +42,20 @@ AARU_EXPORT crc16_ccitt_ctx* AARU_CALL crc16_ccitt_init(void)
return ctx;
}
/**
* @brief Updates the CRC-16 checksum with new data.
*
* This function updates the CRC-16 checksum.
* The checksum is updated for the given data by using the CCITT polynomial.
* The algorithm continues the checksum calculation from the previous state,
* so it can be used to update the checksum with new data as it is read.
*
* @param ctx Pointer to the CRC-16 context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t* data, uint32_t len)
{
// Unroll according to Intel slicing by uint8_t
@@ -79,6 +102,17 @@ AARU_EXPORT int AARU_CALL crc16_ccitt_update(crc16_ccitt_ctx* ctx, const uint8_t
return 0;
}
/**
* @brief Finalizes the calculation of the CRC-16 checksum.
*
* This function finalizes the calculation of the CRC-16 checksum and returns
* its value.
*
* @param[in] ctx Pointer to the CRC-16 context structure.
* @param[out] checksum Pointer to a 16-bit unsigned integer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx* ctx, uint16_t* crc)
{
if(!ctx) return -1;
@@ -88,6 +122,14 @@ AARU_EXPORT int AARU_CALL crc16_ccitt_final(crc16_ccitt_ctx* ctx, uint16_t* crc)
return 0;
}
/**
* @brief Frees the resources allocated for the CRC-16 checksum context.
*
* This function should be called to release the memory used by the CRC-16 checksum
* context structure after it is no longer needed.
*
* @param ctx The CRC-16 checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL crc16_ccitt_free(crc16_ccitt_ctx* ctx)
{
if(ctx) free(ctx);

53
crc32.c
View File

@@ -22,6 +22,15 @@
#include "library.h"
#include "crc32.h"
/**
* @brief Initializes the CRC-32 checksum algorithm with the ISO polynomial.
*
* This function initializes the state variables required for the CRC-32
* checksum algorithm using the ISO polynomial. It prepares the algorithm
* to calculate the checksum for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
{
crc32_ctx* ctx = (crc32_ctx*)malloc(sizeof(crc32_ctx));
@@ -33,6 +42,20 @@ AARU_EXPORT crc32_ctx* AARU_CALL crc32_init(void)
return ctx;
}
/**
* @brief Updates the CRC-32 checksum with new data.
*
* This function updates the CRC-32 checksum.
* The checksum is updated for the given data by using the ISO polynomial.
* The algorithm continues the checksum calculation from the previous state,
* so it can be used to update the checksum with new data as it is read.
*
* @param ctx Pointer to the CRC-32 context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint32_t len)
{
if(!ctx || !data) return -1;
@@ -67,6 +90,17 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
return 0;
}
/**
* @brief Computes the CRC-32 checksum using slicing-by-8 algorithm.
*
* This function calculates the CRC-32 value for the given data using slicing-by-8 algorithm.
*
* @param previous_crc A pointer to the previous CRC-32 value, and where the updated value gets stored.
* @param data The pointer to the data buffer.
* @param len The length of the data in bytes.
*
* @note This function assumes little-endian byte order.
*/
AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t* data, long len)
{
// Unroll according to Intel slicing by uint8_t
@@ -113,6 +147,17 @@ AARU_EXPORT void AARU_CALL crc32_slicing(uint32_t* previous_crc, const uint8_t*
*previous_crc = c;
}
/**
* @brief Finalizes the calculation of the CRC-32 checksum.
*
* This function finalizes the calculation of the CRC-32 checksum and returns
* its value.
*
* @param[in] ctx Pointer to the CRC-32 context structure.
* @param[out] checksum Pointer to a 32-bit unsigned integer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
{
if(!ctx) return -1;
@@ -122,6 +167,14 @@ AARU_EXPORT int AARU_CALL crc32_final(crc32_ctx* ctx, uint32_t* crc)
return 0;
}
/**
* @brief Frees the resources allocated for the CRC-32 checksum context.
*
* This function should be called to release the memory used by the CRC-32 checksum
* context structure after it is no longer needed.
*
* @param ctx The CRC-32 checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx)
{
if(ctx) free(ctx);

View File

@@ -41,6 +41,17 @@
#include "library.h"
#include "crc32.h"
/**
* @brief Calculates the CRC-32 using the ARMv8 instruction set in little endian mode.
*
* This function takes the previous CRC value, data and length as inputs and calculates
* the new CRC-32 using the ARMv8 instruction set in little endian mode.
*
* @param previous_crc The previous CRC value.
* @param data The input data to calculate the CRC over.
* @param len The length of the input data.
* @return The new CRC-32 value.
*/
TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t previous_crc, const uint8_t* data, uint32_t len)
{
uint32_t c = (uint32_t)previous_crc;

View File

@@ -34,8 +34,7 @@
#include "crc32.h"
#include "crc32_simd.h"
CLMUL
static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
CLMUL static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
{
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
@@ -57,8 +56,7 @@ static void fold_1(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
*xmm_crc3 = _mm_castps_si128(ps_res);
}
CLMUL
static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
CLMUL static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
{
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
@@ -88,8 +86,7 @@ static void fold_2(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
*xmm_crc3 = _mm_castps_si128(ps_res31);
}
CLMUL
static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
CLMUL static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
{
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
@@ -125,8 +122,7 @@ static void fold_3(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
*xmm_crc3 = _mm_castps_si128(ps_res32);
}
CLMUL
static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
CLMUL static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m128i* xmm_crc3)
{
const __m128i xmm_fold4 = _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
@@ -170,8 +166,7 @@ static void fold_4(__m128i* xmm_crc0, __m128i* xmm_crc1, __m128i* xmm_crc2, __m1
*xmm_crc3 = _mm_castps_si128(ps_res3);
}
CLMUL
static void partial_fold(const size_t len,
CLMUL static void partial_fold(const size_t len,
__m128i* xmm_crc0,
__m128i* xmm_crc1,
__m128i* xmm_crc2,
@@ -228,6 +223,15 @@ static void partial_fold(const size_t len,
*/
#define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial))
/**
* @brief Calculate the CRC32 checksum using CLMUL instruction extension.
*
* @param previous_crc The previously calculated CRC32 checksum.
* @param data Pointer to the input data buffer.
* @param len Length of the input data in bytes.
*
* @return The calculated CRC32 checksum.
*/
AARU_EXPORT CLMUL uint32_t AARU_CALL crc32_clmul(uint32_t previous_crc, const uint8_t* data, long len)
{
unsigned long algn_diff;

View File

@@ -233,6 +233,20 @@ TARGET_WITH_SIMD FORCE_INLINE void partial_fold(const size_t len,
*q_crc3 = vreinterpretq_u64_u32(ps_res);
}
/**
* @brief Calculates the CRC-32 checksum using the vmull instruction.
*
* This function calculates the CRC-32 checksum of the given data using the
* vmull instruction for optimized performance. It takes the previous CRC value,
* the data buffer, and the length of data as parameters. The function returns
* the resulting CRC-32 checksum.
*
* @param previous_crc The previous CRC value.
* @param data The data buffer.
* @param len The length of the data buffer.
*
* @return The CRC-32 checksum of the given data.
*/
TARGET_WITH_SIMD uint32_t crc32_vmull(uint32_t previous_crc, const uint8_t* data, long len)
{
unsigned long algn_diff;

42
crc64.c
View File

@@ -23,6 +23,15 @@
#include "crc64.h"
#include "simd.h"
/**
* @brief Initializes the CRC-64 checksum algorithm with the ECMA polynomial.
*
* This function initializes the state variables required for the CRC-ECMA
* checksum algorithm using the IBM polynomial. It prepares the algorithm
* to calculate the checksum for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
{
int i, slice;
@@ -35,6 +44,20 @@ AARU_EXPORT crc64_ctx* AARU_CALL crc64_init(void)
return ctx;
}
/**
* @brief Updates the CRC-64 checksum with new data.
*
* This function updates the CRC-64 checksum.
* The checksum is updated for the given data by using the ECMA polynomial.
* The algorithm continues the checksum calculation from the previous state,
* so it can be used to update the checksum with new data as it is read.
*
* @param ctx Pointer to the CRC-64 context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
{
if(!ctx || !data) return -1;
@@ -97,6 +120,17 @@ AARU_EXPORT void AARU_CALL crc64_slicing(uint64_t* previous_crc, const uint8_t*
*previous_crc = c;
}
/**
* @brief Finalizes the calculation of the CRC-64 checksum.
*
* This function finalizes the calculation of the CRC-64 checksum and returns
* its value.
*
* @param[in] ctx Pointer to the CRC-64 context structure.
* @param[out] checksum Pointer to a 64-bit unsigned integer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
{
if(!ctx) return -1;
@@ -106,6 +140,14 @@ AARU_EXPORT int AARU_CALL crc64_final(crc64_ctx* ctx, uint64_t* crc)
return 0;
}
/**
* @brief Frees the resources allocated for the CRC-64 checksum context.
*
* This function should be called to release the memory used by the CRC-64 checksum
* context structure after it is no longer needed.
*
* @param ctx The CRC-64 checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL crc64_free(crc64_ctx* ctx)
{
if(ctx) free(ctx);

View File

@@ -72,6 +72,15 @@ CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
return _mm_xor_si128(_mm_clmulepi64_si128(in, foldConstants, 0x00), _mm_clmulepi64_si128(in, foldConstants, 0x11));
}
/**
* @brief Calculate the CRC-64 checksum using CLMUL instruction extension.
*
* @param previous_crc The previously calculated CRC-64 checksum.
* @param data Pointer to the input data buffer.
* @param len Length of the input data in bytes.
*
* @return The calculated CRC-64 checksum.
*/
AARU_EXPORT CLMUL uint64_t AARU_CALL crc64_clmul(uint64_t crc, const uint8_t* data, long length)
{
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;

View File

@@ -39,6 +39,20 @@ TARGET_WITH_SIMD FORCE_INLINE uint64x2_t fold(uint64x2_t in, uint64x2_t foldCons
sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants)));
}
/**
* @brief Calculates the CRC-64 checksum using the vmull instruction.
*
* This function calculates the CRC-64 checksum of the given data using the
* vmull instruction for optimized performance. It takes the previous CRC value,
* the data buffer, and the length of data as parameters. The function returns
* the resulting CRC-32 checksum.
*
* @param previous_crc The previous CRC value.
* @param data The data buffer.
* @param len The length of the data buffer.
*
* @return The CRC-64 checksum of the given data.
*/
AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL crc64_vmull(uint64_t previous_crc, const uint8_t* data, long len)
{
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;

View File

@@ -28,6 +28,15 @@
#include "library.h"
#include "fletcher16.h"
/**
* @brief Initializes the Fletcher-16 checksum algorithm.
*
* This function initializes the state variables required for the Fletcher-16
* checksum algorithm. It prepares the algorithm to calculate the checksum
* for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT fletcher16_ctx* AARU_CALL fletcher16_init()
{
fletcher16_ctx* ctx;
@@ -42,6 +51,17 @@ AARU_EXPORT fletcher16_ctx* AARU_CALL fletcher16_init()
return ctx;
}
/**
* @brief Updates the Fletcher-16 checksum with new data.
*
* This function updates the Fletcher-16 checksum.
* The checksum is updated for the given data by iterating through each byte and
* applying the corresponding calculations to the rolling checksum values.
*
* @param ctx Pointer to the Fletcher-16 context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*/
AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t* data, uint32_t len)
{
if(!ctx || !data) return -1;
@@ -159,6 +179,17 @@ AARU_EXPORT int AARU_CALL fletcher16_update(fletcher16_ctx* ctx, const uint8_t*
return 0;
}
/**
* @brief Finalizes the calculation of the Fletcher-16 checksum.
*
* This function finalizes the calculation of the Fletcher-16 checksum and returns
* its value.
*
* @param[in] ctx Pointer to the Fletcher-32 context structure.
* @param[out] checksum Pointer to a 16-bit unsigned integer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx* ctx, uint16_t* checksum)
{
if(!ctx) return -1;
@@ -167,6 +198,14 @@ AARU_EXPORT int AARU_CALL fletcher16_final(fletcher16_ctx* ctx, uint16_t* checks
return 0;
}
/**
* @brief Frees the resources allocated for the Fletcher-16 checksum context.
*
* This function should be called to release the memory used by the Fletcher-16 checksum
* context structure after it is no longer needed.
*
* @param ctx The Fletcher-16 checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL fletcher16_free(fletcher16_ctx* ctx)
{
if(!ctx) return;

View File

@@ -28,6 +28,15 @@
#include "library.h"
#include "fletcher32.h"
/**
* @brief Initializes the Fletcher-32 checksum algorithm.
*
* This function initializes the state variables required for the Fletcher-32
* checksum algorithm. It prepares the algorithm to calculate the checksum
* for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT fletcher32_ctx* AARU_CALL fletcher32_init()
{
fletcher32_ctx* ctx;
@@ -42,6 +51,17 @@ AARU_EXPORT fletcher32_ctx* AARU_CALL fletcher32_init()
return ctx;
}
/**
* @brief Updates the Fletcher-32 checksum with new data.
*
* This function updates the Fletcher-32 checksum.
* The checksum is updated for the given data by iterating through each byte and
* applying the corresponding calculations to the rolling checksum values.
*
* @param ctx Pointer to the Fletcher-32 context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*/
AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t* data, uint32_t len)
{
if(!ctx || !data) return -1;
@@ -206,6 +226,17 @@ AARU_EXPORT int AARU_CALL fletcher32_update(fletcher32_ctx* ctx, const uint8_t*
return 0;
}
/**
* @brief Finalizes the calculation of the Fletcher-32 checksum.
*
* This function finalizes the calculation of the Fletcher-32 checksum and returns
* its value.
*
* @param[in] ctx Pointer to the Fletcher-32 context structure.
* @param[out] checksum Pointer to a 32-bit unsigned integer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx* ctx, uint32_t* checksum)
{
if(!ctx) return -1;
@@ -214,6 +245,14 @@ AARU_EXPORT int AARU_CALL fletcher32_final(fletcher32_ctx* ctx, uint32_t* checks
return 0;
}
/**
* @brief Frees the resources allocated for the Fletcher-32 checksum context.
*
* This function should be called to release the memory used by the Fletcher-32 checksum
* context structure after it is no longer needed.
*
* @param ctx The Fletcher-32 checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL fletcher32_free(fletcher32_ctx* ctx)
{
if(!ctx) return;

View File

@@ -32,6 +32,16 @@
#include "fletcher32.h"
#include "simd.h"
/**
* @brief Calculate Fletcher-32 checksum for a given data using NEON instructions.
*
* This function calculates the Fletcher-32 checksum for a block of data using NEON vector instructions.
*
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT AVX2 void AARU_CALL fletcher32_avx2(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, long len) {
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;

View File

@@ -41,14 +41,15 @@
TARGET_WITH_SIMD /***/
/**
* @brief Compute the Fletcher-32 checksum using NEON instructions.
* @brief Calculate Fletcher-32 checksum for a given data using NEON instructions.
*
* @param[out] sum1 Pointer to the first sum value.
* @param[out] sum2 Pointer to the second sum value.
* @param[in] data Pointer to the input data.
* @param[in] len The length of the input data.
* This function calculates the Fletcher-32 checksum for a block of data using NEON vector instructions.
*
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len) {
/*
* Split Fletcher-32 into component sums.

View File

@@ -39,6 +39,16 @@
#include "library.h"
#include "fletcher32.h"
/**
* @brief Calculate Fletcher-32 checksum for a given data using SSSE3 instructions.
*
* This function calculates the Fletcher-32 checksum for a block of data using SSSE3 vector instructions.
*
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.
* @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes.
*/
AARU_EXPORT SSSE3 void AARU_CALL fletcher32_ssse3(uint16_t* sum1, uint16_t* sum2, const uint8_t* data, long len)
{
uint32_t s1 = *sum1;

159
simd.c
View File

@@ -36,6 +36,26 @@
#endif
/**
* @brief Gets the CPUID information for the given info value.
*
* This function retrieves the CPUID information for the specified info argument
* and stores the results in the provided pointers: eax, ebx, ecx, and edx.
* Each register represents a 32-bit value returned by the CPUID instruction.
*
* @param info The CPUID info value specifying the desired information to retrieve.
* @param eax Pointer to store the value of the EAX register.
* @param ebx Pointer to store the value of the EBX register.
* @param ecx Pointer to store the value of the ECX register.
* @param edx Pointer to store the value of the EDX register.
*
* @note It is important to ensure that the provided pointers are valid and point
* to a memory location that can be modified by this function.
*
* @see https://en.wikipedia.org/wiki/CPUID
*
* @return None.
*/
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
{
#ifdef _MSC_VER
@@ -59,6 +79,26 @@ static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigne
#endif
}
/**
* @brief Get the CPU extended information using CPUID instruction.
*
* This function retrieves the extended information from the CPU by using the CPUID instruction.
* It reads the result into the output parameters eax, ebx, ecx, and edx based on the input parameters info and count.
*
* @param info The CPUID function number to be executed.
* @param count The sub-leaf index for certain CPUID functions.
* @param eax Pointer to store the value of the EAX register.
* @param ebx Pointer to store the value of the EBX register.
* @param ecx Pointer to store the value of the ECX register.
* @param edx Pointer to store the value of the EDX register.
*
* @note It is important to ensure that the provided pointers are valid and point
* to a memory location that can be modified by this function.
*
* @see https://en.wikipedia.org/wiki/CPUID
*
* @return None.
*/
static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
{
#ifdef _MSC_VER
@@ -82,6 +122,18 @@ static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned*
#endif
}
/**
* @brief Checks if the hardware supports the CLMUL instruction set.
*
* The function checks if the system's CPU supports the CLMUL (Carry-Less Multiplication) instruction set.
* CLMUL is an extension to the x86 instruction set architecture and provides hardware acceleration for
* carry-less multiplication operations.
*
* @return True if CLMUL instruction set is supported, False otherwise.
*
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=CLMUL
* @see https://en.wikipedia.org/wiki/Carry-less_multiplication
*/
int have_clmul(void)
{
unsigned eax, ebx, ecx, edx;
@@ -95,6 +147,19 @@ int have_clmul(void)
return has_pclmulqdq && has_sse41;
}
/**
* @brief Checks if the current processor supports SSSE3 instructions.
*
* The function detects whether the current processor supports SSSE3 instructions by
* checking the CPU feature flags. SSSE3 (Supplemental Streaming SIMD Extensions 3)
* is an extension to the x86 instruction set architecture that introduces
* additional SIMD instructions useful for multimedia and signal processing tasks.
*
* @return true if the current processor supports SSSE3 instructions, false otherwise.
*
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=SSSE3
* @see https://en.wikipedia.org/wiki/SSSE3
*/
int have_ssse3(void)
{
unsigned eax, ebx, ecx, edx;
@@ -103,6 +168,20 @@ int have_ssse3(void)
return ecx & 0x200;
}
/**
* @brief Checks if the current processor supports AVX2 instructions.
*
* The function detects whether the current processor supports AVX2 instructions by
* checking the CPU feature flags. AVX2 (Advanced Vector Extensions 2) is an extension
* to the x86 instruction set architecture that introduces additional SIMD instructions
* useful for multimedia and signal processing tasks.
*
* @return true if the current processor supports AVX2 instructions, false otherwise.
*
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX2
* @see https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
*/
int have_avx2(void)
{
unsigned eax, ebx, ecx, edx;
@@ -125,6 +204,19 @@ int have_avx2(void)
#endif
#if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__)
/**
* @brief Checks if the current processor supports NEON instructions.
*
* The function detects whether the current processor supports NEON instructions by
* checking the CPU feature flags. NEON is an extension to the ARM instruction set
* architecture that introduces additional SIMD instructions useful for multimedia
* and signal processing tasks.
*
* @return true if the current processor supports NEON instructions, false otherwise.
*
* @see https://developer.arm.com/architectures/instruction-sets/simd-isas/neon
* @see https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(NEON)
*/
int have_neon_apple()
{
int value;
@@ -136,6 +228,15 @@ int have_neon_apple()
return value == 1;
}
/**
* @brief Checks if the current processor supports CRC32 instructions.
*
* The function detects whether the current processor supports CRC32 instructions by
* checking the CPU feature flags. CRC32 is an extension to the ARM instruction set
* architecture that introduces additional instructions for calculating CRC32 checksums.
*
* @return true if the current processor supports CRC32 instructions, false otherwise.
*/
int have_crc32_apple()
{
int value;
@@ -147,6 +248,15 @@ int have_crc32_apple()
return value == 1;
}
/**
* @brief Checks if the current processor supports cryptographic instructions.
*
* The function detects whether the current processor supports cryptographic instructions by
* checking the CPU feature flags. Cryptographic instructions are an extension to the ARM instruction set
* architecture that introduces additional instructions for cryptographic operations.
*
* @return true if the current processor supports cryptographic instructions, false otherwise.
*/
int have_crypto_apple() { return 0; }
#endif
@@ -156,6 +266,15 @@ int have_neon(void)
return 1; // ARMv8-A made it mandatory
}
/**
* @brief Checks if the current processor supports CRC32 instructions.
*
* The function detects whether the current processor supports CRC32 instructions by
* checking the CPU feature flags. CRC32 is an extension to the ARM instruction set
* architecture that introduces additional instructions for calculating CRC32 checksums.
*
* @return true if the current processor supports CRC32 instructions, false otherwise.
*/
int have_arm_crc32(void)
{
#if defined(_WIN32)
@@ -167,6 +286,15 @@ int have_arm_crc32(void)
#endif
}
/**
* @brief Checks if the current processor supports cryptographic instructions.
*
* The function detects whether the current processor supports cryptographic instructions by
* checking the CPU feature flags. Cryptographic instructions are an extension to the ARM instruction set
* architecture that introduces additional instructions for cryptographic operations.
*
* @return true if the current processor supports cryptographic instructions, false otherwise.
*/
int have_arm_crypto(void)
{
#if defined(_WIN32)
@@ -180,6 +308,19 @@ int have_arm_crypto(void)
#endif
#if defined(__arm__) || defined(_M_ARM)
/**
* @brief Checks if the current processor supports NEON instructions.
*
* The function detects whether the current processor supports NEON instructions by
* checking the CPU feature flags. NEON is an extension to the ARM instruction set
* architecture that introduces additional SIMD instructions useful for multimedia
* and signal processing tasks.
*
* @return true if the current processor supports NEON instructions, false otherwise.
*
* @see https://developer.arm.com/architectures/instruction-sets/simd-isas/neon
* @see https://en.wikipedia.org/wiki/ARM_architecture#Advanced_SIMD_(NEON)
*/
int have_neon(void)
{
#if defined(_WIN32)
@@ -191,6 +332,15 @@ int have_neon(void)
#endif
}
/**
* @brief Checks if the current processor supports CRC32 instructions.
*
* The function detects whether the current processor supports CRC32 instructions by
* checking the CPU feature flags. CRC32 is an extension to the ARM instruction set
* architecture that introduces additional instructions for calculating CRC32 checksums.
*
* @return true if the current processor supports CRC32 instructions, false otherwise.
*/
int have_arm_crc32(void)
{
#if defined(_WIN32)
@@ -208,6 +358,15 @@ int have_arm_crc32(void)
#endif
}
/**
* @brief Checks if the current processor supports cryptographic instructions.
*
* The function detects whether the current processor supports cryptographic instructions by
* checking the CPU feature flags. Cryptographic instructions are an extension to the ARM instruction set
* architecture that introduces additional instructions for cryptographic operations.
*
* @return true if the current processor supports cryptographic instructions, false otherwise.
*/
int have_arm_crypto(void)
{
#if defined(_WIN32)

View File

@@ -33,6 +33,15 @@ static uint8_t b64[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x
0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F};
/**
* @brief Initializes the SpamSum checksum algorithm.
*
* This function initializes the state variables required for the SpamSum
* checksum algorithm. It prepares the algorithm to calculate the checksum
* for a new data set.
*
* @return Pointer to a structure containing the checksum state.
*/
AARU_EXPORT spamsum_ctx* AARU_CALL spamsum_init(void)
{
spamsum_ctx* ctx = (spamsum_ctx*)malloc(sizeof(spamsum_ctx));
@@ -47,6 +56,17 @@ AARU_EXPORT spamsum_ctx* AARU_CALL spamsum_init(void)
return ctx;
}
/**
* @brief Updates the SpamSum checksum with new data.
*
* This function updates the SpamSum checksum.
*
* @param ctx Pointer to the SpamSum context structure.
* @param data Pointer to the input data buffer.
* @param len The length of the input data buffer.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx* ctx, const uint8_t* data, uint32_t len)
{
int i;
@@ -59,6 +79,14 @@ AARU_EXPORT int AARU_CALL spamsum_update(spamsum_ctx* ctx, const uint8_t* data,
return 0;
}
/**
* @brief Frees the resources allocated for the SpamSum checksum context.
*
* This function should be called to release the memory used by the SpamSum checksum
* context structure after it is no longer needed.
*
* @param ctx The SpamSum checksum context structure, to be freed.
*/
AARU_EXPORT void AARU_CALL spamsum_free(spamsum_ctx* ctx)
{
if(ctx) free(ctx);
@@ -175,6 +203,17 @@ AARU_LOCAL inline void fuzzy_try_fork_blockhash(spamsum_ctx* ctx)
++ctx->bh_end;
}
/**
* @brief Finalizes the calculation of the SpamSum checksum.
*
* This function finalizes the calculation of the SpamSum checksum and returns
* its value.
*
* @param[in] ctx Pointer to the SpamSum context structure.
* @param[out] result Pointer to a buffer to store the checksum value.
*
* @returns 0 on success, -1 on error.
*/
AARU_EXPORT int AARU_CALL spamsum_final(spamsum_ctx* ctx, uint8_t* result)
{
uint32_t bi = ctx->bh_start;