mirror of
https://github.com/aaru-dps/libaaruformat.git
synced 2025-12-16 19:24:40 +00:00
Add SIMD implementations of CRC64.
This commit is contained in:
110
src/crc64.c
110
src/crc64.c
@@ -1,110 +0,0 @@
|
||||
// /***************************************************************************
|
||||
// Aaru Data Preservation Suite
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// Filename : crc64.c
|
||||
// Author(s) : Natalia Portillo <claunia@claunia.com>
|
||||
//
|
||||
// Component : Checksums.
|
||||
//
|
||||
// --[ Description ] ----------------------------------------------------------
|
||||
//
|
||||
// Implements a CRC64 algorithm.
|
||||
//
|
||||
// --[ License ] --------------------------------------------------------------
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as
|
||||
// published by the Free Software Foundation; either version 2.1 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright © 2011-2022 Natalia Portillo
|
||||
// ****************************************************************************/
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <aaruformat.h>
|
||||
|
||||
void* aaruf_crc64_init(uint64_t polynomial, uint64_t seed)
|
||||
{
|
||||
Crc64Context* ctx;
|
||||
int i, j;
|
||||
|
||||
ctx = malloc(sizeof(Crc64Context));
|
||||
|
||||
if(ctx == NULL) return NULL;
|
||||
|
||||
memset(ctx, 1, sizeof(Crc64Context));
|
||||
|
||||
ctx->finalSeed = seed;
|
||||
ctx->hashInt = seed;
|
||||
|
||||
for(i = 0; i < 256; i++)
|
||||
{
|
||||
uint64_t entry = (uint64_t)i;
|
||||
for(j = 0; j < 8; j++)
|
||||
if((entry & 1) == 1) entry = (entry >> 1) ^ polynomial;
|
||||
else
|
||||
entry = entry >> 1;
|
||||
|
||||
ctx->table[i] = entry;
|
||||
}
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
void* aaruf_crc64_init_ecma(void) { return aaruf_crc64_init(CRC64_ECMA_POLY, CRC64_ECMA_SEED); }
|
||||
|
||||
void aaruf_crc64_update(void* context, const uint8_t* data, size_t len)
|
||||
{
|
||||
Crc64Context* ctx = context;
|
||||
size_t i;
|
||||
|
||||
for(i = 0; i < len; i++) ctx->hashInt = (ctx->hashInt >> 8) ^ ctx->table[data[i] ^ (ctx->hashInt & 0xFF)];
|
||||
}
|
||||
|
||||
uint64_t aaruf_crc64_final(void* context)
|
||||
{
|
||||
Crc64Context* ctx = context;
|
||||
|
||||
return ctx->hashInt ^ ctx->finalSeed;
|
||||
}
|
||||
|
||||
uint64_t aaruf_crc64_data(const uint8_t* data, size_t len, uint64_t polynomial, uint64_t seed)
|
||||
{
|
||||
uint64_t table[256];
|
||||
uint64_t hashInt = seed;
|
||||
int i, j;
|
||||
size_t s;
|
||||
|
||||
for(i = 0; i < 256; i++)
|
||||
{
|
||||
uint64_t entry = (uint64_t)i;
|
||||
for(j = 0; j < 8; j++)
|
||||
if((entry & 1) == 1) entry = (entry >> 1) ^ polynomial;
|
||||
else
|
||||
entry = entry >> 1;
|
||||
|
||||
table[i] = entry;
|
||||
}
|
||||
|
||||
for(s = 0; s < len; s++) hashInt = (hashInt >> 8) ^ table[data[s] ^ (hashInt & 0xFF)];
|
||||
|
||||
return hashInt ^ seed;
|
||||
}
|
||||
|
||||
uint64_t aaruf_crc64_data_ecma(const uint8_t* data, size_t len)
|
||||
{
|
||||
return aaruf_crc64_data(data, len, CRC64_ECMA_POLY, CRC64_ECMA_SEED);
|
||||
}
|
||||
166
src/crc64/arm_vmull.c
Normal file
166
src/crc64/arm_vmull.c
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2022 Natalia Portillo.
|
||||
* Copyright sse2neon.h contributors
|
||||
*
|
||||
* sse2neon is freely redistributable under the MIT License.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include <aaruformat.h>
|
||||
|
||||
#include "arm_vmull.h"
|
||||
|
||||
#if !defined(__MINGW32__) && (!defined(__ANDROID__) || !defined(__arm__))
|
||||
TARGET_WITH_CRYPTO static uint64x2_t sse2neon_vmull_p64_crypto(uint64x1_t _a, uint64x1_t _b)
|
||||
{
|
||||
poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0);
|
||||
poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0);
|
||||
return vreinterpretq_u64_p128(vmull_p64(a, b));
|
||||
}
|
||||
#endif
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b)
|
||||
{
|
||||
#if !defined(__MINGW32__) && (!defined(__ANDROID__) || !defined(__arm__))
|
||||
// Wraps vmull_p64
|
||||
if(have_arm_crypto()) return sse2neon_vmull_p64_crypto(_a, _b);
|
||||
#endif
|
||||
|
||||
// ARMv7 polyfill
|
||||
// ARMv7/some A64 lacks vmull_p64, but it has vmull_p8.
|
||||
//
|
||||
// vmull_p8 calculates 8 8-bit->16-bit polynomial multiplies, but we need a
|
||||
// 64-bit->128-bit polynomial multiply.
|
||||
//
|
||||
// It needs some work and is somewhat slow, but it is still faster than all
|
||||
// known scalar methods.
|
||||
//
|
||||
// Algorithm adapted to C from
|
||||
// https://www.workofard.com/2017/07/ghash-for-low-end-cores/, which is adapted
|
||||
// from "Fast Software Polynomial Multiplication on ARM Processors Using the
|
||||
// NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and Ricardo Dahab
|
||||
// (https://hal.inria.fr/hal-01506572)
|
||||
|
||||
poly8x8_t a = vreinterpret_p8_u64(_a);
|
||||
poly8x8_t b = vreinterpret_p8_u64(_b);
|
||||
|
||||
// Masks
|
||||
uint8x16_t k48_32 = vcombine_u8(vcreate_u8(0x0000ffffffffffff), vcreate_u8(0x00000000ffffffff));
|
||||
uint8x16_t k16_00 = vcombine_u8(vcreate_u8(0x000000000000ffff), vcreate_u8(0x0000000000000000));
|
||||
|
||||
// Do the multiplies, rotating with vext to get all combinations
|
||||
uint8x16_t d = vreinterpretq_u8_p16(vmull_p8(a, b)); // D = A0 * B0
|
||||
uint8x16_t e = vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 1))); // E = A0 * B1
|
||||
uint8x16_t f = vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 1), b)); // F = A1 * B0
|
||||
uint8x16_t g = vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 2))); // G = A0 * B2
|
||||
uint8x16_t h = vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 2), b)); // H = A2 * B0
|
||||
uint8x16_t i = vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 3))); // I = A0 * B3
|
||||
uint8x16_t j = vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 3), b)); // J = A3 * B0
|
||||
uint8x16_t k = vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 4))); // L = A0 * B4
|
||||
|
||||
// Add cross products
|
||||
uint8x16_t l = veorq_u8(e, f); // L = E + F
|
||||
uint8x16_t m = veorq_u8(g, h); // M = G + H
|
||||
uint8x16_t n = veorq_u8(i, j); // N = I + J
|
||||
|
||||
// Interleave. Using vzip1 and vzip2 prevents Clang from emitting TBL
|
||||
// instructions.
|
||||
#if defined(__aarch64__)
|
||||
uint8x16_t lm_p0 = vreinterpretq_u8_u64(vzip1q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
|
||||
uint8x16_t lm_p1 = vreinterpretq_u8_u64(vzip2q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m)));
|
||||
uint8x16_t nk_p0 = vreinterpretq_u8_u64(vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
|
||||
uint8x16_t nk_p1 = vreinterpretq_u8_u64(vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k)));
|
||||
#else
|
||||
uint8x16_t lm_p0 = vcombine_u8(vget_low_u8(l), vget_low_u8(m));
|
||||
uint8x16_t lm_p1 = vcombine_u8(vget_high_u8(l), vget_high_u8(m));
|
||||
uint8x16_t nk_p0 = vcombine_u8(vget_low_u8(n), vget_low_u8(k));
|
||||
uint8x16_t nk_p1 = vcombine_u8(vget_high_u8(n), vget_high_u8(k));
|
||||
#endif
|
||||
// t0 = (L) (P0 + P1) << 8
|
||||
// t1 = (M) (P2 + P3) << 16
|
||||
uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1);
|
||||
uint8x16_t t0t1_h = vandq_u8(lm_p1, k48_32);
|
||||
uint8x16_t t0t1_l = veorq_u8(t0t1_tmp, t0t1_h);
|
||||
|
||||
// t2 = (N) (P4 + P5) << 24
|
||||
// t3 = (K) (P6 + P7) << 32
|
||||
uint8x16_t t2t3_tmp = veorq_u8(nk_p0, nk_p1);
|
||||
uint8x16_t t2t3_h = vandq_u8(nk_p1, k16_00);
|
||||
uint8x16_t t2t3_l = veorq_u8(t2t3_tmp, t2t3_h);
|
||||
|
||||
// De-interleave
|
||||
#if defined(__aarch64__)
|
||||
uint8x16_t t0 = vreinterpretq_u8_u64(vuzp1q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
|
||||
uint8x16_t t1 = vreinterpretq_u8_u64(vuzp2q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h)));
|
||||
uint8x16_t t2 = vreinterpretq_u8_u64(vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
|
||||
uint8x16_t t3 = vreinterpretq_u8_u64(vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h)));
|
||||
#else
|
||||
uint8x16_t t1 = vcombine_u8(vget_high_u8(t0t1_l), vget_high_u8(t0t1_h));
|
||||
uint8x16_t t0 = vcombine_u8(vget_low_u8(t0t1_l), vget_low_u8(t0t1_h));
|
||||
uint8x16_t t3 = vcombine_u8(vget_high_u8(t2t3_l), vget_high_u8(t2t3_h));
|
||||
uint8x16_t t2 = vcombine_u8(vget_low_u8(t2t3_l), vget_low_u8(t2t3_h));
|
||||
#endif
|
||||
// Shift the cross products
|
||||
uint8x16_t t0_shift = vextq_u8(t0, t0, 15); // t0 << 8
|
||||
uint8x16_t t1_shift = vextq_u8(t1, t1, 14); // t1 << 16
|
||||
uint8x16_t t2_shift = vextq_u8(t2, t2, 13); // t2 << 24
|
||||
uint8x16_t t3_shift = vextq_u8(t3, t3, 12); // t3 << 32
|
||||
|
||||
// Accumulate the products
|
||||
uint8x16_t cross1 = veorq_u8(t0_shift, t1_shift);
|
||||
uint8x16_t cross2 = veorq_u8(t2_shift, t3_shift);
|
||||
uint8x16_t mix = veorq_u8(d, cross1);
|
||||
uint8x16_t r = veorq_u8(mix, cross2);
|
||||
return vreinterpretq_u64_u8(r);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b)
|
||||
{
|
||||
uint8x16_t tbl = vreinterpretq_u8_u64(a); // input a
|
||||
uint8x16_t idx = vreinterpretq_u8_u64(b); // input b
|
||||
uint8x16_t idx_masked = vandq_u8(idx, vdupq_n_u8(0x8F)); // avoid using meaningless bits
|
||||
#if defined(__aarch64__)
|
||||
return vreinterpretq_u64_u8(vqtbl1q_u8(tbl, idx_masked));
|
||||
#else
|
||||
// use this line if testing on aarch64
|
||||
uint8x8x2_t a_split = {vget_low_u8(tbl), vget_high_u8(tbl)};
|
||||
return vreinterpretq_u64_u8(
|
||||
vcombine_u8(vtbl2_u8(a_split, vget_low_u8(idx_masked)), vtbl2_u8(a_split, vget_high_u8(idx_masked))));
|
||||
#endif
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t mm_srli_si128(uint64x2_t a, int imm)
|
||||
{
|
||||
uint8x16_t tmp[2] = {vreinterpretq_u8_u64(a), vdupq_n_u8(0)};
|
||||
return vreinterpretq_u64_u8(vld1q_u8(((uint8_t const*)tmp) + imm));
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD uint64x2_t mm_slli_si128(uint64x2_t a, int imm)
|
||||
{
|
||||
uint8x16_t tmp[2] = {vdupq_n_u8(0), vreinterpretq_u8_u64(a)};
|
||||
return vreinterpretq_u64_u8(vld1q_u8(((uint8_t const*)tmp) + (16 - imm)));
|
||||
}
|
||||
|
||||
#endif
|
||||
32
src/crc64/arm_vmull.h
Normal file
32
src/crc64/arm_vmull.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2022 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef LIBAARUFORMAT_NATIVE_ARM_VMULL_H
|
||||
#define LIBAARUFORMAT_NATIVE_ARM_VMULL_H
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
TARGET_WITH_CRYPTO static uint64x2_t sse2neon_vmull_p64_crypto(uint64x1_t _a, uint64x1_t _b);
|
||||
TARGET_WITH_SIMD uint64x2_t sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b);
|
||||
TARGET_WITH_SIMD uint64x2_t mm_shuffle_epi8(uint64x2_t a, uint64x2_t b);
|
||||
TARGET_WITH_SIMD uint64x2_t mm_srli_si128(uint64x2_t a, int imm);
|
||||
TARGET_WITH_SIMD uint64x2_t mm_slli_si128(uint64x2_t a, int imm);
|
||||
|
||||
#endif
|
||||
|
||||
#endif // LIBAARUFORMAT_NATIVE_ARM_VMULL_H
|
||||
138
src/crc64/crc64.c
Normal file
138
src/crc64/crc64.c
Normal file
@@ -0,0 +1,138 @@
|
||||
// /***************************************************************************
|
||||
// Aaru Data Preservation Suite
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// Filename : crc64.c
|
||||
// Author(s) : Natalia Portillo <claunia@claunia.com>
|
||||
//
|
||||
// Component : libaaruformat.
|
||||
//
|
||||
// --[ Description ] ----------------------------------------------------------
|
||||
//
|
||||
// Calculates CRC64-ECMA checksums.
|
||||
//
|
||||
// --[ License ] --------------------------------------------------------------
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as
|
||||
// published by the Free Software Foundation; either version 2.1 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright © 2011-2022 Natalia Portillo
|
||||
// ****************************************************************************/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <aaruformat.h>
|
||||
|
||||
AARU_EXPORT crc64_ctx* AARU_CALL aaruf_crc64_init(void)
|
||||
{
|
||||
int i, slice;
|
||||
crc64_ctx* ctx = (crc64_ctx*)malloc(sizeof(crc64_ctx));
|
||||
|
||||
if(!ctx) return NULL;
|
||||
|
||||
ctx->crc = CRC64_ECMA_SEED;
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL aaruf_crc64_update(crc64_ctx* ctx, const uint8_t* data, uint32_t len)
|
||||
{
|
||||
if(!ctx || !data) return -1;
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
if(have_clmul())
|
||||
{
|
||||
ctx->crc = ~aaruf_crc64_clmul(~ctx->crc, data, len);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
if(have_neon())
|
||||
{
|
||||
ctx->crc = ~aaruf_crc64_vmull(~ctx->crc, data, len);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Unroll according to Intel slicing by uint8_t
|
||||
// http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
|
||||
// http://sourceforge.net/projects/slicing-by-8/
|
||||
|
||||
aaruf_crc64_slicing(&ctx->crc, data, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
AARU_EXPORT void AARU_CALL aaruf_crc64_slicing(uint64_t* previous_crc, const uint8_t* data, uint32_t len)
|
||||
{
|
||||
uint64_t c = *previous_crc;
|
||||
|
||||
if(len > 4)
|
||||
{
|
||||
const uint8_t* limit;
|
||||
|
||||
while((uintptr_t)(data)&3)
|
||||
{
|
||||
c = crc64_table[0][*data++ ^ ((c)&0xFF)] ^ ((c) >> 8);
|
||||
--len;
|
||||
}
|
||||
|
||||
limit = data + (len & ~(uint32_t)(3));
|
||||
len &= (uint32_t)(3);
|
||||
|
||||
while(data < limit)
|
||||
{
|
||||
const uint32_t tmp = c ^ *(const uint32_t*)(data);
|
||||
data += 4;
|
||||
|
||||
c = crc64_table[3][((tmp)&0xFF)] ^ crc64_table[2][(((tmp) >> 8) & 0xFF)] ^ ((c) >> 32) ^
|
||||
crc64_table[1][(((tmp) >> 16) & 0xFF)] ^ crc64_table[0][((tmp) >> 24)];
|
||||
}
|
||||
}
|
||||
|
||||
while(len-- != 0) c = crc64_table[0][*data++ ^ ((c)&0xFF)] ^ ((c) >> 8);
|
||||
|
||||
*previous_crc = c;
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL aaruf_crc64_final(crc64_ctx* ctx, uint64_t* crc)
|
||||
{
|
||||
if(!ctx) return -1;
|
||||
|
||||
*crc = ctx->crc ^ CRC64_ECMA_SEED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
AARU_EXPORT void AARU_CALL aaruf_crc64_free(crc64_ctx* ctx)
|
||||
{
|
||||
if(ctx) free(ctx);
|
||||
}
|
||||
|
||||
AARU_EXPORT uint64_t AARU_CALL aaruf_crc64_data(const uint8_t* data, uint32_t len)
|
||||
{
|
||||
crc64_ctx* ctx = aaruf_crc64_init();
|
||||
uint64_t crc = 0;
|
||||
|
||||
if(!ctx)
|
||||
return crc;
|
||||
|
||||
aaruf_crc64_update(ctx, data, len);
|
||||
aaruf_crc64_final(ctx, &crc);
|
||||
aaruf_crc64_free(ctx);
|
||||
|
||||
return crc;
|
||||
}
|
||||
221
src/crc64/crc64_clmul.c
Normal file
221
src/crc64/crc64_clmul.c
Normal file
@@ -0,0 +1,221 @@
|
||||
// /***************************************************************************
|
||||
// Aaru Data Preservation Suite
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// Filename : crc64.c
|
||||
// Author(s) : Natalia Portillo <claunia@claunia.com>
|
||||
//
|
||||
// Component : libaaruformat.
|
||||
//
|
||||
// --[ Description ] ----------------------------------------------------------
|
||||
//
|
||||
// Calculates CRC64-ECMA checksums using CLMUL.
|
||||
//
|
||||
// --[ License ] --------------------------------------------------------------
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as
|
||||
// published by the Free Software Foundation; either version 2.1 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright © 2011-2022 Natalia Portillo
|
||||
// ****************************************************************************/
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <smmintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <aaruformat.h>
|
||||
|
||||
// Reverses bits
|
||||
static uint64_t bitReflect(uint64_t v)
|
||||
{
|
||||
v = ((v >> 1) & 0x5555555555555555) | ((v & 0x5555555555555555) << 1);
|
||||
v = ((v >> 2) & 0x3333333333333333) | ((v & 0x3333333333333333) << 2);
|
||||
v = ((v >> 4) & 0x0F0F0F0F0F0F0F0F) | ((v & 0x0F0F0F0F0F0F0F0F) << 4);
|
||||
v = ((v >> 8) & 0x00FF00FF00FF00FF) | ((v & 0x00FF00FF00FF00FF) << 8);
|
||||
v = ((v >> 16) & 0x0000FFFF0000FFFF) | ((v & 0x0000FFFF0000FFFF) << 16);
|
||||
v = (v >> 32) | (v << 32);
|
||||
return v;
|
||||
}
|
||||
|
||||
// Computes r*x^N mod p(x)
|
||||
static uint64_t expMod65(uint32_t n, uint64_t p, uint64_t r)
|
||||
{
|
||||
return n == 0 ? r : expMod65(n - 1, p, (r << 1) ^ (p & ((int64_t)r >> 63)));
|
||||
}
|
||||
|
||||
// Computes x^129 / p(x); the result has an implicit 65th bit.
|
||||
static uint64_t div129by65(uint64_t poly)
|
||||
{
|
||||
uint64_t q = 0;
|
||||
uint64_t h = poly;
|
||||
uint32_t i;
|
||||
for(i = 0; i < 64; ++i)
|
||||
{
|
||||
q |= (h & (1ull << 63)) >> i;
|
||||
h = (h << 1) ^ (poly & ((int64_t)h >> 63));
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
static const uint8_t shuffleMasks[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
|
||||
};
|
||||
|
||||
CLMUL static void shiftRight128(__m128i in, size_t n, __m128i* outLeft, __m128i* outRight)
|
||||
{
|
||||
const __m128i maskA = _mm_loadu_si128((const __m128i*)(shuffleMasks + (16 - n)));
|
||||
const __m128i maskB = _mm_xor_si128(maskA, _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128()));
|
||||
|
||||
*outLeft = _mm_shuffle_epi8(in, maskB);
|
||||
*outRight = _mm_shuffle_epi8(in, maskA);
|
||||
}
|
||||
|
||||
CLMUL static __m128i fold(__m128i in, __m128i foldConstants)
|
||||
{
|
||||
return _mm_xor_si128(_mm_clmulepi64_si128(in, foldConstants, 0x00), _mm_clmulepi64_si128(in, foldConstants, 0x11));
|
||||
}
|
||||
|
||||
AARU_EXPORT CLMUL uint64_t AARU_CALL aaruf_crc64_clmul(uint64_t crc, const uint8_t* data, long length)
|
||||
{
|
||||
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
|
||||
const uint64_t k2 = 0xdabe95afc7875f40; // bitReflect(expMod65(128, poly, 1)) << 1;
|
||||
const uint64_t mu = 0x9c3e466c172963d5; // (bitReflect(div129by65(poly)) << 1) | 1;
|
||||
const uint64_t p = 0x92d8af2baf0e1e85; // (bitReflect(poly) << 1) | 1;
|
||||
|
||||
const __m128i foldConstants1 = _mm_set_epi64x(k2, k1);
|
||||
const __m128i foldConstants2 = _mm_set_epi64x(p, mu);
|
||||
|
||||
const uint8_t* end = data + length;
|
||||
|
||||
// Align pointers
|
||||
const __m128i* alignedData = (const __m128i*)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const __m128i* alignedEnd = (const __m128i*)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
|
||||
const size_t leadInSize = data - (const uint8_t*)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t*)alignedEnd - end;
|
||||
|
||||
const size_t alignedLength = alignedEnd - alignedData;
|
||||
|
||||
const __m128i leadInMask = _mm_loadu_si128((const __m128i*)(shuffleMasks + (16 - leadInSize)));
|
||||
const __m128i data0 = _mm_blendv_epi8(_mm_setzero_si128(), _mm_load_si128(alignedData), leadInMask);
|
||||
|
||||
#if defined(_WIN64)
|
||||
const __m128i initialCrc = _mm_cvtsi64x_si128(~crc);
|
||||
#else
|
||||
const __m128i initialCrc = _mm_set_epi64x(0, ~crc);
|
||||
#endif
|
||||
|
||||
__m128i R;
|
||||
if(alignedLength == 1)
|
||||
{
|
||||
// Single data block, initial CRC possibly bleeds into zero padding
|
||||
__m128i crc0, crc1;
|
||||
shiftRight128(initialCrc, 16 - length, &crc0, &crc1);
|
||||
|
||||
__m128i A, B;
|
||||
shiftRight128(data0, leadOutSize, &A, &B);
|
||||
|
||||
const __m128i P = _mm_xor_si128(A, crc0);
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
|
||||
_mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
|
||||
}
|
||||
else if(alignedLength == 2)
|
||||
{
|
||||
const __m128i data1 = _mm_load_si128(alignedData + 1);
|
||||
|
||||
if(length < 8)
|
||||
{
|
||||
// Initial CRC bleeds into the zero padding
|
||||
__m128i crc0, crc1;
|
||||
shiftRight128(initialCrc, 16 - length, &crc0, &crc1);
|
||||
|
||||
__m128i A, B, C, D;
|
||||
shiftRight128(data0, leadOutSize, &A, &B);
|
||||
shiftRight128(data1, leadOutSize, &C, &D);
|
||||
|
||||
const __m128i P = _mm_xor_si128(_mm_xor_si128(B, C), crc0);
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10),
|
||||
_mm_xor_si128(_mm_srli_si128(P, 8), _mm_slli_si128(crc1, 8)));
|
||||
}
|
||||
else
|
||||
{
|
||||
// We can fit the initial CRC into the data without bleeding into the zero padding
|
||||
__m128i crc0, crc1;
|
||||
shiftRight128(initialCrc, leadInSize, &crc0, &crc1);
|
||||
|
||||
__m128i A, B, C, D;
|
||||
shiftRight128(_mm_xor_si128(data0, crc0), leadOutSize, &A, &B);
|
||||
shiftRight128(_mm_xor_si128(data1, crc1), leadOutSize, &C, &D);
|
||||
|
||||
const __m128i P = _mm_xor_si128(fold(A, foldConstants1), _mm_xor_si128(B, C));
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10), _mm_srli_si128(P, 8));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
alignedData++;
|
||||
length -= 16 - leadInSize;
|
||||
|
||||
// Initial CRC can simply be added to data
|
||||
__m128i crc0, crc1;
|
||||
shiftRight128(initialCrc, leadInSize, &crc0, &crc1);
|
||||
|
||||
__m128i accumulator = _mm_xor_si128(fold(_mm_xor_si128(crc0, data0), foldConstants1), crc1);
|
||||
|
||||
while(length >= 32)
|
||||
{
|
||||
accumulator = fold(_mm_xor_si128(_mm_load_si128(alignedData), accumulator), foldConstants1);
|
||||
|
||||
length -= 16;
|
||||
alignedData++;
|
||||
}
|
||||
|
||||
__m128i P;
|
||||
if(length == 16) { P = _mm_xor_si128(accumulator, _mm_load_si128(alignedData)); }
|
||||
else
|
||||
{
|
||||
const __m128i end0 = _mm_xor_si128(accumulator, _mm_load_si128(alignedData));
|
||||
const __m128i end1 = _mm_load_si128(alignedData + 1);
|
||||
|
||||
__m128i A, B, C, D;
|
||||
shiftRight128(end0, leadOutSize, &A, &B);
|
||||
shiftRight128(end1, leadOutSize, &C, &D);
|
||||
|
||||
P = _mm_xor_si128(fold(A, foldConstants1), _mm_or_si128(B, C));
|
||||
}
|
||||
|
||||
R = _mm_xor_si128(_mm_clmulepi64_si128(P, foldConstants1, 0x10), _mm_srli_si128(P, 8));
|
||||
}
|
||||
|
||||
// Final Barrett reduction
|
||||
const __m128i T1 = _mm_clmulepi64_si128(R, foldConstants2, 0x00);
|
||||
const __m128i T2 =
|
||||
_mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(T1, foldConstants2, 0x10), _mm_slli_si128(T1, 8)), R);
|
||||
|
||||
#if defined(_WIN64)
|
||||
return ~_mm_extract_epi64(T2, 1);
|
||||
#else
|
||||
return ~(((uint64_t)(uint32_t)_mm_extract_epi32(T2, 3) << 32) | (uint64_t)(uint32_t)_mm_extract_epi32(T2, 2));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
192
src/crc64/crc64_vmull.c
Normal file
192
src/crc64/crc64_vmull.c
Normal file
@@ -0,0 +1,192 @@
|
||||
// /***************************************************************************
|
||||
// Aaru Data Preservation Suite
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// Filename : crc64.c
|
||||
// Author(s) : Natalia Portillo <claunia@claunia.com>
|
||||
//
|
||||
// Component : libaaruformat.
|
||||
//
|
||||
// --[ Description ] ----------------------------------------------------------
|
||||
//
|
||||
// Calculates CRC64-ECMA checksums using VMULL.
|
||||
//
|
||||
// --[ License ] --------------------------------------------------------------
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as
|
||||
// published by the Free Software Foundation; either version 2.1 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright © 2011-2022 Natalia Portillo
|
||||
// ****************************************************************************/
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <aaruformat.h>
|
||||
|
||||
#include "arm_vmull.h"
|
||||
|
||||
static const uint8_t shuffleMasks[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
|
||||
};
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE void shiftRight128(uint64x2_t in, size_t n, uint64x2_t* outLeft, uint64x2_t* outRight)
|
||||
{
|
||||
const uint64x2_t maskA =
|
||||
vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(const uint64x2_t*)(shuffleMasks + (16 - n))));
|
||||
uint64x2_t b = vreinterpretq_u64_u8(vceqq_u8(vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0))),
|
||||
vreinterpretq_u8_u64(vreinterpretq_u64_u32(vdupq_n_u32(0)))));
|
||||
const uint64x2_t maskB = vreinterpretq_u64_u32(veorq_u32(vreinterpretq_u32_u64(maskA), vreinterpretq_u32_u64(b)));
|
||||
|
||||
*outLeft = mm_shuffle_epi8(in, maskB);
|
||||
*outRight = mm_shuffle_epi8(in, maskA);
|
||||
}
|
||||
|
||||
TARGET_WITH_SIMD FORCE_INLINE uint64x2_t fold(uint64x2_t in, uint64x2_t foldConstants)
|
||||
{
|
||||
return veorq_u64(sse2neon_vmull_p64(vget_low_u64(in), vget_low_u64(foldConstants)),
|
||||
sse2neon_vmull_p64(vget_high_u64(in), vget_high_u64(foldConstants)));
|
||||
}
|
||||
|
||||
AARU_EXPORT TARGET_WITH_SIMD uint64_t AARU_CALL aaruf_crc64_vmull(uint64_t previous_crc, const uint8_t* data, long len)
|
||||
{
|
||||
const uint64_t k1 = 0xe05dd497ca393ae4; // bitReflect(expMod65(128 + 64, poly, 1)) << 1;
|
||||
const uint64_t k2 = 0xdabe95afc7875f40; // bitReflect(expMod65(128, poly, 1)) << 1;
|
||||
const uint64_t mu = 0x9c3e466c172963d5; // (bitReflect(div129by65(poly)) << 1) | 1;
|
||||
const uint64_t p = 0x92d8af2baf0e1e85; // (bitReflect(poly) << 1) | 1;
|
||||
|
||||
const uint64x2_t foldConstants1 = vcombine_u64(vcreate_u64(k1), vcreate_u64(k2));
|
||||
const uint64x2_t foldConstants2 = vcombine_u64(vcreate_u64(mu), vcreate_u64(p));
|
||||
|
||||
const uint8_t* end = data + len;
|
||||
|
||||
// Align pointers
|
||||
const uint64x2_t* alignedData = (const uint64x2_t*)((uintptr_t)data & ~(uintptr_t)15);
|
||||
const uint64x2_t* alignedEnd = (const uint64x2_t*)(((uintptr_t)end + 15) & ~(uintptr_t)15);
|
||||
|
||||
const size_t leadInSize = data - (const uint8_t*)alignedData;
|
||||
const size_t leadOutSize = (const uint8_t*)alignedEnd - end;
|
||||
|
||||
const size_t alignedLength = alignedEnd - alignedData;
|
||||
|
||||
const uint64x2_t leadInMask =
|
||||
vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(const uint64x2_t*)(shuffleMasks + (16 - leadInSize))));
|
||||
uint64x2_t a = vreinterpretq_u64_u32(vdupq_n_u32(0));
|
||||
uint64x2_t b = vreinterpretq_u64_u32(
|
||||
vld1q_u32((const uint32_t*)alignedData)); // Use a signed shift right to create a mask with the sign bit
|
||||
const uint64x2_t data0 =
|
||||
vreinterpretq_u64_u8(vbslq_u8(vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u64(leadInMask), 7)),
|
||||
vreinterpretq_u8_u64(b),
|
||||
vreinterpretq_u8_u64(a)));
|
||||
|
||||
const uint64x2_t initialCrc = vsetq_lane_u64(~previous_crc, vdupq_n_u64(0), 0);
|
||||
|
||||
uint64x2_t R;
|
||||
if(alignedLength == 1)
|
||||
{
|
||||
// Single data block, initial CRC possibly bleeds into zero padding
|
||||
uint64x2_t crc0, crc1;
|
||||
shiftRight128(initialCrc, 16 - len, &crc0, &crc1);
|
||||
|
||||
uint64x2_t A, B;
|
||||
shiftRight128(data0, leadOutSize, &A, &B);
|
||||
|
||||
const uint64x2_t P = veorq_u64(A, crc0);
|
||||
R = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)),
|
||||
veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8)));
|
||||
}
|
||||
else if(alignedLength == 2)
|
||||
{
|
||||
const uint64x2_t data1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(alignedData + 1)));
|
||||
|
||||
if(len < 8)
|
||||
{
|
||||
// Initial CRC bleeds into the zero padding
|
||||
uint64x2_t crc0, crc1;
|
||||
shiftRight128(initialCrc, 16 - len, &crc0, &crc1);
|
||||
|
||||
uint64x2_t A, B, C, D;
|
||||
shiftRight128(data0, leadOutSize, &A, &B);
|
||||
shiftRight128(data1, leadOutSize, &C, &D);
|
||||
|
||||
const uint64x2_t P = veorq_u64(veorq_u64(B, C), crc0);
|
||||
R = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)),
|
||||
veorq_u64(mm_srli_si128(P, 8), mm_slli_si128(crc1, 8)));
|
||||
}
|
||||
else
|
||||
{
|
||||
// We can fit the initial CRC into the data without bleeding into the zero padding
|
||||
uint64x2_t crc0, crc1;
|
||||
shiftRight128(initialCrc, leadInSize, &crc0, &crc1);
|
||||
|
||||
uint64x2_t A, B, C, D;
|
||||
shiftRight128(veorq_u64(data0, crc0), leadOutSize, &A, &B);
|
||||
shiftRight128(veorq_u64(data1, crc1), leadOutSize, &C, &D);
|
||||
|
||||
const uint64x2_t P = veorq_u64(fold(A, foldConstants1), veorq_u64(B, C));
|
||||
R = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)), mm_srli_si128(P, 8));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
alignedData++;
|
||||
len -= 16 - leadInSize;
|
||||
|
||||
// Initial CRC can simply be added to data
|
||||
uint64x2_t crc0, crc1;
|
||||
shiftRight128(initialCrc, leadInSize, &crc0, &crc1);
|
||||
|
||||
uint64x2_t accumulator = veorq_u64(fold(veorq_u64(crc0, data0), foldConstants1), crc1);
|
||||
|
||||
while(len >= 32)
|
||||
{
|
||||
accumulator = fold(veorq_u64(vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)), accumulator),
|
||||
foldConstants1);
|
||||
|
||||
len -= 16;
|
||||
alignedData++;
|
||||
}
|
||||
|
||||
uint64x2_t P;
|
||||
if(len == 16) P = veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)));
|
||||
else
|
||||
{
|
||||
const uint64x2_t end0 =
|
||||
veorq_u64(accumulator, vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)alignedData)));
|
||||
const uint64x2_t end1 = vreinterpretq_u64_u32(vld1q_u32((const uint32_t*)(alignedData + 1)));
|
||||
|
||||
uint64x2_t A, B, C, D;
|
||||
shiftRight128(end0, leadOutSize, &A, &B);
|
||||
shiftRight128(end1, leadOutSize, &C, &D);
|
||||
|
||||
P = veorq_u64(fold(A, foldConstants1),
|
||||
vreinterpretq_u64_u32(vorrq_u32(vreinterpretq_u32_u64(B), vreinterpretq_u32_u64(C))));
|
||||
}
|
||||
|
||||
R = veorq_u64(sse2neon_vmull_p64(vget_low_u64(P), vget_high_u64(foldConstants1)), mm_srli_si128(P, 8));
|
||||
}
|
||||
|
||||
// Final Barrett reduction
|
||||
const uint64x2_t T1 = sse2neon_vmull_p64(vget_low_u64(R), vget_low_u64(foldConstants2));
|
||||
const uint64x2_t T2 = veorq_u64(
|
||||
veorq_u64(sse2neon_vmull_p64(vget_low_u64(T1), vget_high_u64(foldConstants2)), mm_slli_si128(T1, 8)), R);
|
||||
|
||||
return ~vgetq_lane_u64(T2, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -291,7 +291,7 @@ void* aaruf_open(const char* filepath)
|
||||
break;
|
||||
}
|
||||
|
||||
crc64 = aaruf_crc64_data_ecma(data, blockHeader.length);
|
||||
crc64 = aaruf_crc64_data(data, blockHeader.length);
|
||||
if(crc64 != blockHeader.crc64)
|
||||
{
|
||||
fprintf(stderr,
|
||||
@@ -758,7 +758,7 @@ void* aaruf_open(const char* filepath)
|
||||
fprintf(stderr, "libaaruformat: Could not read metadata block, continuing...");
|
||||
}
|
||||
|
||||
crc64 = aaruf_crc64_data_ecma((const uint8_t*)ctx->trackEntries,
|
||||
crc64 = aaruf_crc64_data((const uint8_t*)ctx->trackEntries,
|
||||
ctx->tracksHeader.entries * sizeof(TrackEntry));
|
||||
if(crc64 != ctx->tracksHeader.crc64)
|
||||
{
|
||||
@@ -873,7 +873,7 @@ void* aaruf_open(const char* filepath)
|
||||
|
||||
if(readBytes == ctx->dumpHardwareHeader.length)
|
||||
{
|
||||
crc64 = aaruf_crc64_data_ecma(data, ctx->dumpHardwareHeader.length);
|
||||
crc64 = aaruf_crc64_data(data, ctx->dumpHardwareHeader.length);
|
||||
if(crc64 != ctx->dumpHardwareHeader.crc64)
|
||||
{
|
||||
free(data);
|
||||
|
||||
Reference in New Issue
Block a user