Implement SpamSum.

This commit is contained in:
2022-05-28 13:17:05 +01:00
parent 3c09e60d93
commit 807e1cf244
8 changed files with 560 additions and 3 deletions

View File

@@ -29,6 +29,7 @@
#include "aaruformat/enums.h"
#include "aaruformat/errors.h"
#include "aaruformat/simd.h"
#include "aaruformat/spamsum.h"
#include "aaruformat/structs.h"
#endif // LIBAARUFORMAT_AARUFORMAT_H

View File

@@ -20,6 +20,7 @@
#define LIBAARUFORMAT_DECLS_H
#include "simd.h"
#include "spamsum.h"
#ifdef __cplusplus
#define EXTERNC extern "C"
#else
@@ -131,6 +132,16 @@ AARU_LOCAL int32_t AARU_CALL aaruf_get_media_tag_type_for_datatype(int32_t type)
AARU_LOCAL int32_t AARU_CALL aaruf_get_xml_mediatype(int32_t type);
AARU_EXPORT spamsum_ctx* AARU_CALL aaruf_spamsum_init(void);
AARU_EXPORT int AARU_CALL aaruf_spamsum_update(spamsum_ctx* ctx, const uint8_t* data, uint32_t len);
AARU_EXPORT int AARU_CALL aaruf_spamsum_final(spamsum_ctx* ctx, uint8_t* result);
AARU_EXPORT void AARU_CALL aaruf_spamsum_free(spamsum_ctx* ctx);
AARU_LOCAL void fuzzy_engine_step(spamsum_ctx* ctx, uint8_t c);
AARU_LOCAL void roll_hash(spamsum_ctx* ctx, uint8_t c);
AARU_LOCAL void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx);
AARU_LOCAL void fuzzy_try_fork_blockhash(spamsum_ctx* ctx);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)

View File

@@ -0,0 +1,61 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2022 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LIBAARUFORMAT_SPAMSUM_H_
#define LIBAARUFORMAT_SPAMSUM_H_
#include <stdint.h>
#include "decls.h"
#define SPAMSUM_LENGTH 64
#define NUM_BLOCKHASHES 31
#define ROLLING_WINDOW 7
#define HASH_INIT 0x28021967
#define HASH_PRIME 0x01000193
#define MIN_BLOCKSIZE 3
#define FUZZY_MAX_RESULT ((2 * SPAMSUM_LENGTH) + 20)
typedef struct
{
uint32_t h;
uint32_t half_h;
uint8_t digest[SPAMSUM_LENGTH];
uint8_t half_digest;
uint32_t d_len;
} blockhash_ctx;
typedef struct
{
uint8_t window[ROLLING_WINDOW];
uint32_t h1;
uint32_t h2;
uint32_t h3;
uint32_t n;
} roll_state;
typedef struct
{
uint32_t bh_start;
uint32_t bh_end;
blockhash_ctx bh[NUM_BLOCKHASHES];
uint64_t total_size;
roll_state roll;
} spamsum_ctx;
#endif // LIBAARUFORMAT_SPAMSUM_H_