diff --git a/adler32.c b/adler32.c index eed4ec8..9a831b8 100644 --- a/adler32.c +++ b/adler32.c @@ -73,22 +73,30 @@ AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, } #endif - uint32_t sum1 = ctx->sum1; - uint32_t sum2 = ctx->sum2; + adler32_slicing(&ctx->sum1, &ctx->sum2, data, len); + + return 0; +} + +void adler32_slicing(uint16_t* sum1, uint16_t* sum2, const unsigned char* data, long len) +{ + uint32_t s1 = *sum1; + uint32_t s2 = *sum2; unsigned n; /* in case user likes doing a byte at a time, keep it fast */ if(len == 1) { - sum1 += data[0]; - if(sum1 >= ADLER_MODULE) sum1 -= ADLER_MODULE; - sum2 += sum1; - if(sum2 >= ADLER_MODULE) sum2 -= ADLER_MODULE; + s1 += data[0]; + if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE; + s2 += s1; + if(s2 >= ADLER_MODULE) s2 -= ADLER_MODULE; - ctx->sum1 = sum1 & 0xFFFF; - ctx->sum2 = sum2 & 0xFFFF; - return 0; + *sum1 = s1 & 0xFFFF; + *sum2 = s2 & 0xFFFF; + + return; } /* in case short lengths are provided, keep it somewhat fast */ @@ -96,14 +104,15 @@ AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, { while(len--) { - sum1 += *data++; - sum2 += sum1; + s1 += *data++; + s2 += s1; } - if(sum1 >= ADLER_MODULE) sum1 -= ADLER_MODULE; - sum2 %= ADLER_MODULE; /* only added so many ADLER_MODULE's */ - ctx->sum1 = sum1 & 0xFFFF; - ctx->sum2 = sum2 & 0xFFFF; - return 0; + if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE; + s2 %= ADLER_MODULE; /* only added so many ADLER_MODULE's */ + *sum1 = s1 & 0xFFFF; + *sum2 = s2 & 0xFFFF; + + return; } /* do length NMAX blocks -- requires just one modulo operation */ @@ -112,44 +121,44 @@ AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, len -= NMAX; n = NMAX / 16; /* NMAX is divisible by 16 */ do { - sum1 += (data)[0]; - sum2 += sum1; - sum1 += (data)[0 + 1]; - sum2 += sum1; - sum1 += (data)[0 + 2]; - sum2 += sum1; - sum1 += (data)[0 + 2 + 1]; - sum2 += sum1; - sum1 += (data)[0 + 4]; - sum2 += sum1; - sum1 += (data)[0 + 4 + 1]; - sum2 += sum1; - sum1 += (data)[0 + 4 + 2]; - sum2 += sum1; - sum1 += (data)[0 + 4 + 2 + 1]; - sum2 += sum1; - sum1 += (data)[8]; - sum2 += sum1; - sum1 += (data)[8 + 1]; - sum2 += sum1; - sum1 += (data)[8 + 2]; - sum2 += sum1; - sum1 += (data)[8 + 2 + 1]; - sum2 += sum1; - sum1 += (data)[8 + 4]; - sum2 += sum1; - sum1 += (data)[8 + 4 + 1]; - sum2 += sum1; - sum1 += (data)[8 + 4 + 2]; - sum2 += sum1; - sum1 += (data)[8 + 4 + 2 + 1]; - sum2 += sum1; + s1 += (data)[0]; + s2 += s1; + s1 += (data)[0 + 1]; + s2 += s1; + s1 += (data)[0 + 2]; + s2 += s1; + s1 += (data)[0 + 2 + 1]; + s2 += s1; + s1 += (data)[0 + 4]; + s2 += s1; + s1 += (data)[0 + 4 + 1]; + s2 += s1; + s1 += (data)[0 + 4 + 2]; + s2 += s1; + s1 += (data)[0 + 4 + 2 + 1]; + s2 += s1; + s1 += (data)[8]; + s2 += s1; + s1 += (data)[8 + 1]; + s2 += s1; + s1 += (data)[8 + 2]; + s2 += s1; + s1 += (data)[8 + 2 + 1]; + s2 += s1; + s1 += (data)[8 + 4]; + s2 += s1; + s1 += (data)[8 + 4 + 1]; + s2 += s1; + s1 += (data)[8 + 4 + 2]; + s2 += s1; + s1 += (data)[8 + 4 + 2 + 1]; + s2 += s1; /* 16 sums unrolled */ data += 16; } while(--n); - sum1 %= ADLER_MODULE; - sum2 %= ADLER_MODULE; + s1 %= ADLER_MODULE; + s2 %= ADLER_MODULE; } /* do remaining bytes (less than NMAX, still just one modulo) */ @@ -158,53 +167,52 @@ AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, while(len >= 16) { len -= 16; - sum1 += (data)[0]; - sum2 += sum1; - sum1 += (data)[0 + 1]; - sum2 += sum1; - sum1 += (data)[0 + 2]; - sum2 += sum1; - sum1 += (data)[0 + 2 + 1]; - sum2 += sum1; - sum1 += (data)[0 + 4]; - sum2 += sum1; - sum1 += (data)[0 + 4 + 1]; - sum2 += sum1; - sum1 += (data)[0 + 4 + 2]; - sum2 += sum1; - sum1 += (data)[0 + 4 + 2 + 1]; - sum2 += sum1; - sum1 += (data)[8]; - sum2 += sum1; - sum1 += (data)[8 + 1]; - sum2 += sum1; - sum1 += (data)[8 + 2]; - sum2 += sum1; - sum1 += (data)[8 + 2 + 1]; - sum2 += sum1; - sum1 += (data)[8 + 4]; - sum2 += sum1; - sum1 += (data)[8 + 4 + 1]; - sum2 += sum1; - sum1 += (data)[8 + 4 + 2]; - sum2 += sum1; - sum1 += (data)[8 + 4 + 2 + 1]; - sum2 += sum1; + s1 += (data)[0]; + s2 += s1; + s1 += (data)[0 + 1]; + s2 += s1; + s1 += (data)[0 + 2]; + s2 += s1; + s1 += (data)[0 + 2 + 1]; + s2 += s1; + s1 += (data)[0 + 4]; + s2 += s1; + s1 += (data)[0 + 4 + 1]; + s2 += s1; + s1 += (data)[0 + 4 + 2]; + s2 += s1; + s1 += (data)[0 + 4 + 2 + 1]; + s2 += s1; + s1 += (data)[8]; + s2 += s1; + s1 += (data)[8 + 1]; + s2 += s1; + s1 += (data)[8 + 2]; + s2 += s1; + s1 += (data)[8 + 2 + 1]; + s2 += s1; + s1 += (data)[8 + 4]; + s2 += s1; + s1 += (data)[8 + 4 + 1]; + s2 += s1; + s1 += (data)[8 + 4 + 2]; + s2 += s1; + s1 += (data)[8 + 4 + 2 + 1]; + s2 += s1; data += 16; } while(len--) { - sum1 += *data++; - sum2 += sum1; + s1 += *data++; + s2 += s1; } - sum1 %= ADLER_MODULE; - sum2 %= ADLER_MODULE; + s1 %= ADLER_MODULE; + s2 %= ADLER_MODULE; } - ctx->sum1 = sum1 & 0xFFFF; - ctx->sum2 = sum2 & 0xFFFF; - return 0; + *sum1 = s1 & 0xFFFF; + *sum2 = s2 & 0xFFFF; } AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum) diff --git a/adler32.h b/adler32.h index 1097b07..49094b3 100644 --- a/adler32.h +++ b/adler32.h @@ -33,18 +33,19 @@ AARU_EXPORT adler32_ctx* AARU_CALL adler32_init(); AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len); AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum); AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx); +AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const unsigned char* data, long len); #if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) -void adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, size_t len); -void adler32_avx2(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, size_t len); +AARU_EXPORT void AARU_CALL adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len); +AARU_EXPORT void AARU_CALL adler32_avx2(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len); #endif #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) -void adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, uint32_t len); +AARU_EXPORT void AARU_CALL adler32_neon(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, uint32_t len); #endif diff --git a/adler32_avx2.c b/adler32_avx2.c index 735d535..f3f3536 100644 --- a/adler32_avx2.c +++ b/adler32_avx2.c @@ -12,7 +12,7 @@ #include "adler32.h" #include "simd.h" -AVX2 void adler32_avx2(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, size_t len) +AVX2 void adler32_avx2(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len) { uint32_t s1 = *sum1; uint32_t s2 = *sum2; @@ -21,7 +21,7 @@ AVX2 void adler32_avx2(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, * Process the data in blocks. */ const unsigned BLOCK_SIZE = 1 << 5; - size_t blocks = len / BLOCK_SIZE; + long blocks = len / BLOCK_SIZE; len -= blocks * BLOCK_SIZE; while(blocks) diff --git a/adler32_ssse3.c b/adler32_ssse3.c index 107c03f..93b1d7f 100644 --- a/adler32_ssse3.c +++ b/adler32_ssse3.c @@ -51,7 +51,7 @@ #include "library.h" #include "adler32.h" -SSSE3 void adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, size_t len) +SSSE3 void adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const unsigned char* buf, long len) { uint32_t s1 = *sum1; uint32_t s2 = *sum2; @@ -60,7 +60,7 @@ SSSE3 void adler32_ssse3(uint16_t* sum1, uint16_t* sum2, const unsigned char* bu * Process the data in blocks. */ const unsigned BLOCK_SIZE = 1 << 5; - size_t blocks = len / BLOCK_SIZE; + long blocks = len / BLOCK_SIZE; len -= blocks * BLOCK_SIZE; while(blocks) { diff --git a/simd.c b/simd.c index fab4b8e..c32b74c 100644 --- a/simd.c +++ b/simd.c @@ -1,3 +1,4 @@ +#include "library.h" #include "simd.h" #if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ diff --git a/simd.h b/simd.h index dfbc6ef..639dd02 100644 --- a/simd.h +++ b/simd.h @@ -16,9 +16,9 @@ #define CLMUL __attribute__((target("pclmul,sse4.1"))) #endif -int have_clmul(void); -int have_ssse3(void); -int have_avx2(void); +AARU_EXPORT int have_clmul(void); +AARU_EXPORT int have_ssse3(void); +AARU_EXPORT int have_avx2(void); #endif #if defined(__arm__) || defined(_M_ARM) @@ -34,9 +34,9 @@ int have_avx2(void); #endif #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) -int have_neon(void); -int have_arm_crc32(void); -int have_arm_crypto(void); +AARU_EXPORT int have_neon(void); +AARU_EXPORT int have_arm_crc32(void); +AARU_EXPORT int have_arm_crypto(void); #endif #if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d8bc8f3..cdccb9a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,5 +10,5 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/random # 'Google_Tests_run' is the target name # 'test1.cpp tests2.cpp' are source files with tests -add_executable(tests_run crc32.cpp) +add_executable(tests_run adler32.cpp crc32.cpp) target_link_libraries(tests_run gtest gtest_main "Aaru.Checksums.Native") \ No newline at end of file diff --git a/tests/adler32.cpp b/tests/adler32.cpp new file mode 100644 index 0000000..2362294 --- /dev/null +++ b/tests/adler32.cpp @@ -0,0 +1,130 @@ +// +// Created by claunia on 5/10/21. +// + +#include + +#include "../library.h" +#include "../adler32.h" +#include "gtest/gtest.h" + +#define EXPECTED_ADLER32 0x3728d186 + +static const uint8_t* buffer; + +class adler32Fixture : public ::testing::Test +{ + public: + adler32Fixture() + { + // initialization; + // can also be done in SetUp() + } + + protected: + void SetUp() + { + FILE* file = fopen("/home/claunia/random", "rb"); + buffer = (const uint8_t*)malloc(1048576); + fread((void*)buffer, 1, 1048576, file); + fclose(file); + } + + void TearDown() { free((void*)buffer); } + + ~adler32Fixture() + { + // resources cleanup, no exceptions allowed + } + + // shared user data +}; + +TEST_F(adler32Fixture, adler32_auto) +{ + adler32_ctx* ctx = adler32_init(); + uint32_t adler32; + + EXPECT_NE(ctx, nullptr); + + adler32_update(ctx, buffer, 1048576); + adler32_final(ctx, &adler32); + + EXPECT_EQ(adler32, EXPECTED_ADLER32); +} + +TEST_F(adler32Fixture, adler32_slicing) +{ + uint16_t sum1; + uint16_t sum2; + uint32_t adler32; + + sum1 = 1; + sum2 = 0; + + adler32_slicing(&sum1, &sum2, buffer, 1048576); + + adler32 = (sum2 << 16) | sum1; + + EXPECT_EQ(adler32, EXPECTED_ADLER32); +} + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +TEST_F(adler32Fixture, adler32_neon) +{ + if(!have_neon()) return; + + uint16_t sum1; + uint16_t sum2; + uint32_t adler32; + + sum1 = 1; + sum2 = 0; + + adler32_neon(&sum1, &sum2, buffer, 1048576); + + adler32 = (sum2 << 16) | sum1; + + EXPECT_EQ(adler32, EXPECTED_ADLER32); +} +#endif + +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + +TEST_F(adler32Fixture, adler32_avx2) +{ + if(!have_avx2()) return; + + uint16_t sum1; + uint16_t sum2; + uint32_t adler32; + + sum1 = 1; + sum2 = 0; + + adler32_avx2(&sum1, &sum2, buffer, 1048576); + + adler32 = (sum2 << 16) | sum1; + + EXPECT_EQ(adler32, EXPECTED_ADLER32); +} + +TEST_F(adler32Fixture, adler32_ssse3) +{ + if(!have_ssse3()) return; + + uint16_t sum1; + uint16_t sum2; + uint32_t adler32; + + sum1 = 1; + sum2 = 0; + + adler32_ssse3(&sum1, &sum2, buffer, 1048576); + + adler32 = (sum2 << 16) | sum1; + + EXPECT_EQ(adler32, EXPECTED_ADLER32); +} +#endif diff --git a/tests/crc32.cpp b/tests/crc32.cpp index 061a302..85950a3 100644 --- a/tests/crc32.cpp +++ b/tests/crc32.cpp @@ -2,7 +2,7 @@ // Created by claunia on 5/10/21. // -#include +#include #include "../library.h" #include "../crc32.h" @@ -10,7 +10,7 @@ #define EXPECTED_CRC32 0x2B6E6854 -const uint8_t* buffer; +static const uint8_t* buffer; class crc32Fixture : public ::testing::Test {