Files
Aaru.Checksums.Native/adler32.c

231 lines
5.9 KiB
C

/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2021 Natalia Portillo.
*/
/* adler32.c -- compute the Adler-32 checksum of a data stream
Copyright (C) 1995-2011 Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
*/
#include <stdint.h>
#include <stdlib.h>
#include "library.h"
#include "adler32.h"
#include "simd.h"
AARU_EXPORT adler32_ctx* AARU_CALL adler32_init()
{
adler32_ctx* ctx;
ctx = (adler32_ctx*)malloc(sizeof(adler32_ctx));
if(!ctx) return NULL;
ctx->sum1 = 1;
ctx->sum2 = 0;
return ctx;
}
AARU_EXPORT int AARU_CALL adler32_update(adler32_ctx* ctx, const uint8_t* data, uint32_t len)
{
if(!ctx || !data) return -1;
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
if(have_neon())
{
adler32_neon(&ctx->sum1, &ctx->sum2, data, len);
return 0;
}
#endif
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
if(have_avx2())
{
adler32_avx2(&ctx->sum1, &ctx->sum2, data, len);
return 0;
}
if(have_ssse3())
{
adler32_ssse3(&ctx->sum1, &ctx->sum2, data, len);
return 0;
}
#endif
adler32_slicing(&ctx->sum1, &ctx->sum2, data, len);
return 0;
}
AARU_EXPORT void AARU_CALL adler32_slicing(uint16_t* sum1, uint16_t* sum2, const unsigned char* data, long len)
{
uint32_t s1 = *sum1;
uint32_t s2 = *sum2;
unsigned n;
/* in case user likes doing a byte at a time, keep it fast */
if(len == 1)
{
s1 += data[0];
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 += s1;
if(s2 >= ADLER_MODULE) s2 -= ADLER_MODULE;
*sum1 = s1 & 0xFFFF;
*sum2 = s2 & 0xFFFF;
return;
}
/* in case short lengths are provided, keep it somewhat fast */
if(len < 16)
{
while(len--)
{
s1 += *data++;
s2 += s1;
}
if(s1 >= ADLER_MODULE) s1 -= ADLER_MODULE;
s2 %= ADLER_MODULE; /* only added so many ADLER_MODULE's */
*sum1 = s1 & 0xFFFF;
*sum2 = s2 & 0xFFFF;
return;
}
/* do length NMAX blocks -- requires just one modulo operation */
while(len >= NMAX)
{
len -= NMAX;
n = NMAX / 16; /* NMAX is divisible by 16 */
do {
s1 += (data)[0];
s2 += s1;
s1 += (data)[0 + 1];
s2 += s1;
s1 += (data)[0 + 2];
s2 += s1;
s1 += (data)[0 + 2 + 1];
s2 += s1;
s1 += (data)[0 + 4];
s2 += s1;
s1 += (data)[0 + 4 + 1];
s2 += s1;
s1 += (data)[0 + 4 + 2];
s2 += s1;
s1 += (data)[0 + 4 + 2 + 1];
s2 += s1;
s1 += (data)[8];
s2 += s1;
s1 += (data)[8 + 1];
s2 += s1;
s1 += (data)[8 + 2];
s2 += s1;
s1 += (data)[8 + 2 + 1];
s2 += s1;
s1 += (data)[8 + 4];
s2 += s1;
s1 += (data)[8 + 4 + 1];
s2 += s1;
s1 += (data)[8 + 4 + 2];
s2 += s1;
s1 += (data)[8 + 4 + 2 + 1];
s2 += s1;
/* 16 sums unrolled */
data += 16;
} while(--n);
s1 %= ADLER_MODULE;
s2 %= ADLER_MODULE;
}
/* do remaining bytes (less than NMAX, still just one modulo) */
if(len)
{ /* avoid modulos if none remaining */
while(len >= 16)
{
len -= 16;
s1 += (data)[0];
s2 += s1;
s1 += (data)[0 + 1];
s2 += s1;
s1 += (data)[0 + 2];
s2 += s1;
s1 += (data)[0 + 2 + 1];
s2 += s1;
s1 += (data)[0 + 4];
s2 += s1;
s1 += (data)[0 + 4 + 1];
s2 += s1;
s1 += (data)[0 + 4 + 2];
s2 += s1;
s1 += (data)[0 + 4 + 2 + 1];
s2 += s1;
s1 += (data)[8];
s2 += s1;
s1 += (data)[8 + 1];
s2 += s1;
s1 += (data)[8 + 2];
s2 += s1;
s1 += (data)[8 + 2 + 1];
s2 += s1;
s1 += (data)[8 + 4];
s2 += s1;
s1 += (data)[8 + 4 + 1];
s2 += s1;
s1 += (data)[8 + 4 + 2];
s2 += s1;
s1 += (data)[8 + 4 + 2 + 1];
s2 += s1;
data += 16;
}
while(len--)
{
s1 += *data++;
s2 += s1;
}
s1 %= ADLER_MODULE;
s2 %= ADLER_MODULE;
}
*sum1 = s1 & 0xFFFF;
*sum2 = s2 & 0xFFFF;
}
AARU_EXPORT int AARU_CALL adler32_final(adler32_ctx* ctx, uint32_t* checksum)
{
if(!ctx) return -1;
*checksum = (ctx->sum2 << 16) | ctx->sum1;
return 0;
}
AARU_EXPORT void AARU_CALL adler32_free(adler32_ctx* ctx)
{
if(!ctx) return;
free(ctx);
}