mirror of
https://github.com/aaru-dps/Aaru.Checksums.Native.git
synced 2025-12-16 11:14:29 +00:00
223 lines
6.0 KiB
C
223 lines
6.0 KiB
C
/*
|
|
* This file is part of the Aaru Data Preservation Suite.
|
|
* Copyright (c) 2019-2021 Natalia Portillo.
|
|
*/
|
|
|
|
/* adler32.c -- compute the Adler-32 checksum of a data stream
|
|
Copyright (C) 1995-2011 Mark Adler
|
|
|
|
This software is provided 'as-is', without any express or implied
|
|
warranty. In no event will the authors be held liable for any damages
|
|
arising from the use of this software.
|
|
Permission is granted to anyone to use this software for any purpose,
|
|
including commercial applications, and to alter it and redistribute it
|
|
freely, subject to the following restrictions:
|
|
1. The origin of this software must not be misrepresented; you must not
|
|
claim that you wrote the original software. If you use this software
|
|
in a product, an acknowledgment in the product documentation would be
|
|
appreciated but is not required.
|
|
2. Altered source versions must be plainly marked as such, and must not be
|
|
misrepresented as being the original software.
|
|
3. This notice may not be removed or altered from any source distribution.
|
|
|
|
Jean-loup Gailly Mark Adler
|
|
jloup@gzip.org madler@alumni.caltech.edu
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "library.h"
|
|
#include "adler32.h"
|
|
#include "simd.h"
|
|
|
|
AARU_EXPORT adler32_ctx* AARU_CALL adler32_init()
|
|
{
|
|
adler32_ctx* ctx;
|
|
|
|
ctx = (adler32_ctx*)malloc(sizeof(adler32_ctx));
|
|
|
|
if(!ctx) return NULL;
|
|
|
|
ctx->sum1 = 1;
|
|
ctx->sum2 = 0;
|
|
|
|
return ctx;
|
|
}
|
|
|
|
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len)
|
|
{
|
|
if(!ctx || !data) return -1;
|
|
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
|
if(have_neon())
|
|
{
|
|
adler32_neon(&ctx->sum1, &ctx->sum2, data, len);
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
|
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
|
if(have_avx2())
|
|
{
|
|
adler32_avx2(&ctx->sum1, &ctx->sum2, data, len);
|
|
|
|
return 0;
|
|
}
|
|
|
|
if(have_ssse3())
|
|
{
|
|
adler32_ssse3(&ctx->sum1, &ctx->sum2, data, len);
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
uint32_t sum1 = ctx->sum1;
|
|
uint32_t sum2 = ctx->sum2;
|
|
|
|
unsigned n;
|
|
|
|
/* in case user likes doing a byte at a time, keep it fast */
|
|
if(len == 1)
|
|
{
|
|
sum1 += data[0];
|
|
if(sum1 >= ADLER_MODULE) sum1 -= ADLER_MODULE;
|
|
sum2 += sum1;
|
|
if(sum2 >= ADLER_MODULE) sum2 -= ADLER_MODULE;
|
|
|
|
ctx->sum1 = sum1 & 0xFFFF;
|
|
ctx->sum2 = sum2 & 0xFFFF;
|
|
return 0;
|
|
}
|
|
|
|
/* in case short lengths are provided, keep it somewhat fast */
|
|
if(len < 16)
|
|
{
|
|
while(len--)
|
|
{
|
|
sum1 += *data++;
|
|
sum2 += sum1;
|
|
}
|
|
if(sum1 >= ADLER_MODULE) sum1 -= ADLER_MODULE;
|
|
sum2 %= ADLER_MODULE; /* only added so many ADLER_MODULE's */
|
|
ctx->sum1 = sum1 & 0xFFFF;
|
|
ctx->sum2 = sum2 & 0xFFFF;
|
|
return 0;
|
|
}
|
|
|
|
/* do length NMAX blocks -- requires just one modulo operation */
|
|
while(len >= NMAX)
|
|
{
|
|
len -= NMAX;
|
|
n = NMAX / 16; /* NMAX is divisible by 16 */
|
|
do {
|
|
sum1 += (data)[0];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 2 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4 + 2 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 2 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4 + 2 + 1];
|
|
sum2 += sum1;
|
|
|
|
/* 16 sums unrolled */
|
|
data += 16;
|
|
} while(--n);
|
|
sum1 %= ADLER_MODULE;
|
|
sum2 %= ADLER_MODULE;
|
|
}
|
|
|
|
/* do remaining bytes (less than NMAX, still just one modulo) */
|
|
if(len)
|
|
{ /* avoid modulos if none remaining */
|
|
while(len >= 16)
|
|
{
|
|
len -= 16;
|
|
sum1 += (data)[0];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 2 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[0 + 4 + 2 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 2 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4 + 1];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4 + 2];
|
|
sum2 += sum1;
|
|
sum1 += (data)[8 + 4 + 2 + 1];
|
|
sum2 += sum1;
|
|
|
|
data += 16;
|
|
}
|
|
while(len--)
|
|
{
|
|
sum1 += *data++;
|
|
sum2 += sum1;
|
|
}
|
|
sum1 %= ADLER_MODULE;
|
|
sum2 %= ADLER_MODULE;
|
|
}
|
|
|
|
ctx->sum1 = sum1 & 0xFFFF;
|
|
ctx->sum2 = sum2 & 0xFFFF;
|
|
return 0;
|
|
}
|
|
|
|
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum)
|
|
{
|
|
if(!ctx) return -1;
|
|
|
|
*checksum = (ctx->sum2 << 16) | ctx->sum1;
|
|
return 0;
|
|
}
|
|
|
|
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx)
|
|
{
|
|
if(!ctx) return;
|
|
|
|
free(ctx);
|
|
} |