Add ARM special instructions implementation for CRC32.

This commit is contained in:
2021-09-29 02:49:40 +01:00
parent 2458863cb4
commit d433af7987
6 changed files with 101 additions and 1 deletions

View File

@@ -24,4 +24,4 @@ if("${CMAKE_BUILD_TYPE}" MATCHES "Release")
endif()
endif()
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c simd.c simd.h adler32_ssse3.c adler32_avx2.c adler32_neon.c)
add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h crc32_clmul.c crc64_clmul.c simd.c simd.h adler32_ssse3.c adler32_avx2.c adler32_neon.c crc32_arm_simd.c)

View File

@@ -48,6 +48,15 @@ AARU_EXPORT int AARU_CALL crc32_update(crc32_ctx* ctx, const uint8_t* data, uint
}
#endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
if(have_arm_crc32())
{
ctx->crc = armv8_crc32_little(ctx->crc, data, len);
return 0;
}
#endif
// Unroll according to Intel slicing by uint8_t
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
// http://sourceforge.net/projects/slicing-by-8/

10
crc32.h
View File

@@ -276,4 +276,14 @@ AARU_EXPORT void AARU_CALL crc32_free(crc32_ctx* ctx);
#endif
CLMUL uint32_t crc32_clmul(const uint8_t* src, long len, uint32_t initial_crc);
#endif
#if defined(__aarch64__)
#define TARGET_ARMV8_WITH_CRC __attribute__((target("+crc")))
#else // !defined(__aarch64__)
#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc")))
#endif // defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t crc, const unsigned char* buf, uint32_t len);
#endif

73
crc32_arm_simd.c Normal file
View File

@@ -0,0 +1,73 @@
//
// Created by claunia on 29/9/21.
//
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
#include <arm_acle.h>
#include "library.h"
#include "crc32.h"
TARGET_ARMV8_WITH_CRC uint32_t armv8_crc32_little(uint32_t crc, const unsigned char* buf, uint32_t len)
{
uint32_t c = (uint32_t)~crc;
#if defined(__aarch64__) || defined(_M_ARM64)
while(len && ((uintptr_t)buf & 7))
{
c = __crc32b(c, *buf++);
--len;
}
const uint64_t* buf8 = (const uint64_t*)buf;
while(len >= 64)
{
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
len -= 64;
}
while(len >= 8)
{
c = __crc32d(c, *buf8++);
len -= 8;
}
buf = (const unsigned char*)buf8;
#else // AARCH64
while(len && ((uintptr_t)buf & 3))
{
c = __crc32b(c, *buf++);
--len;
}
const uint32_t* buf4 = (const uint32_t*)buf;
while(len >= 32)
{
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
c = __crc32w(c, *buf4++);
len -= 32;
}
while(len >= 4)
{
c = __crc32d(c, *buf4++);
len -= 4;
}
buf = (const unsigned char*)buf4;
#endif
while(len--) { c = __crc32b(c, *buf++); }
return ~c;
}
#endif

5
simd.c
View File

@@ -99,8 +99,13 @@ int have_neon(void)
{
return 1; // ARMv8-A made it mandatory
}
int have_arm_crc32(void) { return getauxval(AT_HWCAP) & HWCAP_CRC32; }
#endif
#if defined(__arm__) || defined(_M_ARM)
int have_neon(void) { return getauxval(AT_HWCAP) & HWCAP_NEON; }
int have_arm_crc32(void) { return getauxval(AT_HWCAP2) & HWCAP2_CRC32; }
}
#endif

3
simd.h
View File

@@ -10,12 +10,15 @@ int have_avx2(void);
#if defined(__arm__) || defined(_M_ARM)
#define HWCAP_NEON (1 << 12)
#define HWCAP2_CRC32 (1 << 4)
#endif
#if defined(__aarch64__) || defined(_M_ARM64)
#define HWCAP_NEON (1 << 1)
#define HWCAP_CRC32 (1 << 7)
#endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
int have_neon(void);
int have_arm_crc32(void);
#endif