Fix TARGET_WITH in wrong places.

This commit is contained in:
2023-09-23 19:51:50 +01:00
parent 0bcfa9c5e5
commit d325d82cf1
8 changed files with 27 additions and 29 deletions

View File

@@ -33,9 +33,9 @@
#include "simd.h" #include "simd.h"
/** /**
* @brief Calculate Adler-32 checksum for a given data using TARGET_WITH_AVX2 instructions. * @brief Calculate Adler-32 checksum for a given data using AVX2 instructions.
* *
* This function calculates the Adler-32 checksum for a block of data using TARGET_WITH_AVX2 vector instructions. * This function calculates the Adler-32 checksum for a block of data using AVX2 vector instructions.
* *
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored. * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored. * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.

View File

@@ -41,9 +41,9 @@
/** /**
* @brief Calculate Adler-32 checksum for a given data using TARGET_WITH_SSSE3 instructions. * @brief Calculate Adler-32 checksum for a given data using SSSE3 instructions.
* *
* This function calculates the Adler-32 checksum for a block of data using TARGET_WITH_SSSE3 vector instructions. * This function calculates the Adler-32 checksum for a block of data using SSSE3 vector instructions.
* *
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored. * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored. * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.

View File

@@ -224,7 +224,7 @@ TARGET_WITH_CLMUL static void partial_fold(const size_t len,
#define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial)) #define XOR_INITIAL(where) ONCE(where = _mm_xor_si128(where, xmm_initial))
/** /**
* @brief Calculate the CRC32 checksum using TARGET_WITH_CLMUL instruction extension. * @brief Calculate the CRC32 checksum using CLMUL instruction extension.
* *
* @param previous_crc The previously calculated CRC32 checksum. * @param previous_crc The previously calculated CRC32 checksum.
* @param data Pointer to the input data buffer. * @param data Pointer to the input data buffer.
@@ -437,7 +437,7 @@ done:
/* /*
* could just as well write xmm_crc3[2], doing a movaps and truncating, but * could just as well write xmm_crc3[2], doing a movaps and truncating, but
* no real advantage - it's a tiny bit slower per call, while no additional CPUs * no real advantage - it's a tiny bit slower per call, while no additional CPUs
* would be supported by only requiring TARGET_WITH_SSSE3 and TARGET_WITH_CLMUL instead of SSE4.1 + TARGET_WITH_CLMUL * would be supported by only requiring SSSE3 and CLMUL instead of SSE4.1 + CLMUL
*/ */
crc = _mm_extract_epi32(xmm_crc3, 2); crc = _mm_extract_epi32(xmm_crc3, 2);
return ~crc; return ~crc;

View File

@@ -460,7 +460,7 @@ done:
/* /*
* could just as well write q_crc3[2], doing a movaps and truncating, but * could just as well write q_crc3[2], doing a movaps and truncating, but
* no real advantage - it's a tiny bit slower per call, while no additional CPUs * no real advantage - it's a tiny bit slower per call, while no additional CPUs
* would be supported by only requiring TARGET_WITH_SSSE3 and TARGET_WITH_CLMUL instead of SSE4.1 + TARGET_WITH_CLMUL * would be supported by only requiring SSSE3 and CLMUL instead of SSE4.1 + CLMUL
*/ */
crc = vgetq_lane_u32(vreinterpretq_u32_u64(q_crc3), (2)); crc = vgetq_lane_u32(vreinterpretq_u32_u64(q_crc3), (2));
return ~crc; return ~crc;

View File

@@ -75,7 +75,7 @@ TARGET_WITH_CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
} }
/** /**
* @brief Calculate the CRC-64 checksum using TARGET_WITH_CLMUL instruction extension. * @brief Calculate the CRC-64 checksum using CLMUL instruction extension.
* *
* @param previous_crc The previously calculated CRC-64 checksum. * @param previous_crc The previously calculated CRC-64 checksum.
* @param data Pointer to the input data buffer. * @param data Pointer to the input data buffer.

View File

@@ -38,8 +38,6 @@
#include "fletcher32.h" #include "fletcher32.h"
#include "simd.h" #include "simd.h"
TARGET_WITH_NEON /***/
/** /**
* @brief Calculate Fletcher-32 checksum for a given data using NEON instructions. * @brief Calculate Fletcher-32 checksum for a given data using NEON instructions.
* *
@@ -50,7 +48,7 @@ TARGET_WITH_NEON /***/
* @param data Pointer to the data buffer. * @param data Pointer to the data buffer.
* @param len Length of the data buffer in bytes. * @param len Length of the data buffer in bytes.
*/ */
void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len) TARGET_WITH_NEON void fletcher32_neon(uint16_t *sum1, uint16_t *sum2, const uint8_t *data, uint32_t len)
{ {
/* /*
* Split Fletcher-32 into component sums. * Split Fletcher-32 into component sums.

View File

@@ -40,9 +40,9 @@
#include "fletcher32.h" #include "fletcher32.h"
/** /**
* @brief Calculate Fletcher-32 checksum for a given data using TARGET_WITH_SSSE3 instructions. * @brief Calculate Fletcher-32 checksum for a given data using SSSE3 instructions.
* *
* This function calculates the Fletcher-32 checksum for a block of data using TARGET_WITH_SSSE3 vector instructions. * This function calculates the Fletcher-32 checksum for a block of data using SSSE3 vector instructions.
* *
* @param sum1 Pointer to the variable where the first 16-bit checksum value is stored. * @param sum1 Pointer to the variable where the first 16-bit checksum value is stored.
* @param sum2 Pointer to the variable where the second 16-bit checksum value is stored. * @param sum2 Pointer to the variable where the second 16-bit checksum value is stored.

32
simd.c
View File

@@ -123,15 +123,15 @@ static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned*
} }
/** /**
* @brief Checks if the hardware supports the TARGET_WITH_CLMUL instruction set. * @brief Checks if the hardware supports the CLMUL instruction set.
* *
* The function checks if the system's CPU supports the TARGET_WITH_CLMUL (Carry-Less Multiplication) instruction set. * The function checks if the system's CPU supports the CLMUL (Carry-Less Multiplication) instruction set.
* TARGET_WITH_CLMUL is an extension to the x86 instruction set architecture and provides hardware acceleration for * CLMUL is an extension to the x86 instruction set architecture and provides hardware acceleration for
* carry-less multiplication operations. * carry-less multiplication operations.
* *
* @return True if TARGET_WITH_CLMUL instruction set is supported, False otherwise. * @return True if CLMUL instruction set is supported, False otherwise.
* *
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=TARGET_WITH_CLMUL * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=CLMUL
* @see https://en.wikipedia.org/wiki/Carry-less_multiplication * @see https://en.wikipedia.org/wiki/Carry-less_multiplication
*/ */
int have_clmul(void) int have_clmul(void)
@@ -148,17 +148,17 @@ int have_clmul(void)
} }
/** /**
* @brief Checks if the current processor supports TARGET_WITH_SSSE3 instructions. * @brief Checks if the current processor supports SSSE3 instructions.
* *
* The function detects whether the current processor supports TARGET_WITH_SSSE3 instructions by * The function detects whether the current processor supports SSSE3 instructions by
* checking the CPU feature flags. TARGET_WITH_SSSE3 (Supplemental Streaming SIMD Extensions 3) * checking the CPU feature flags. SSSE3 (Supplemental Streaming SIMD Extensions 3)
* is an extension to the x86 instruction set architecture that introduces * is an extension to the x86 instruction set architecture that introduces
* additional SIMD instructions useful for multimedia and signal processing tasks. * additional SIMD instructions useful for multimedia and signal processing tasks.
* *
* @return true if the current processor supports TARGET_WITH_SSSE3 instructions, false otherwise. * @return true if the current processor supports SSSE3 instructions, false otherwise.
* *
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=TARGET_WITH_SSSE3 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=SSSE3
* @see https://en.wikipedia.org/wiki/TARGET_WITH_SSSE3 * @see https://en.wikipedia.org/wiki/SSSE3
*/ */
int have_ssse3(void) int have_ssse3(void)
{ {
@@ -169,16 +169,16 @@ int have_ssse3(void)
} }
/** /**
* @brief Checks if the current processor supports TARGET_WITH_AVX2 instructions. * @brief Checks if the current processor supports AVX2 instructions.
* *
* The function detects whether the current processor supports TARGET_WITH_AVX2 instructions by * The function detects whether the current processor supports AVX2 instructions by
* checking the CPU feature flags. TARGET_WITH_AVX2 (Advanced Vector Extensions 2) is an extension * checking the CPU feature flags. AVX2 (Advanced Vector Extensions 2) is an extension
* to the x86 instruction set architecture that introduces additional SIMD instructions * to the x86 instruction set architecture that introduces additional SIMD instructions
* useful for multimedia and signal processing tasks. * useful for multimedia and signal processing tasks.
* *
* @return true if the current processor supports TARGET_WITH_AVX2 instructions, false otherwise. * @return true if the current processor supports AVX2 instructions, false otherwise.
* *
* @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=TARGET_WITH_AVX2 * @see https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=AVX2
* @see https://en.wikipedia.org/wiki/Advanced_Vector_Extensions * @see https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
*/ */