diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0a05e24..e324259 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,4 +3,4 @@ project("Aaru.Checksums.Native" C)
set(CMAKE_C_STANDARD 90)
-add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h)
+add_library("Aaru.Checksums.Native" SHARED adler32.h adler32.c crc16.h crc16.c crc16_ccitt.h crc16_ccitt.c crc32.c crc32.h crc64.c crc64.h fletcher16.h fletcher16.c fletcher32.h fletcher32.c library.h spamsum.c spamsum.h)
diff --git a/spamsum.c b/spamsum.c
new file mode 100644
index 0000000..0d16cfb
--- /dev/null
+++ b/spamsum.c
@@ -0,0 +1,312 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2021 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+#include
+#include
+#include
+#include
+#include
+
+#include "spamsum.h"
+
+#include "library.h"
+
+static uint8_t _b64[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D,
+ 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A,
+ 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
+ 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F};
+
+spamsum_ctx* spamsum_init(void)
+{
+ spamsum_ctx* ctx = (spamsum_ctx*)malloc(sizeof(spamsum_ctx));
+ if(!ctx) return NULL;
+
+ memset(ctx, 0, sizeof(spamsum_ctx));
+
+ ctx->Bhend = 1;
+ ctx->Bh[0].H = HASH_INIT;
+ ctx->Bh[0].Halfh = HASH_INIT;
+
+ return ctx;
+}
+
+int spamsum_update(spamsum_ctx* ctx, const uint8_t* data, uint32_t len)
+{
+ if(!ctx || !data) return -1;
+
+ for(int i = 0; i < len; i++) fuzzy_engine_step(ctx, data[i]);
+
+ ctx->TotalSize += len;
+
+ return 0;
+}
+
+void spamsum_free(spamsum_ctx* ctx)
+{
+ if(ctx) free(ctx);
+}
+
+#define roll_sum(ctx) (ctx->Roll.H1 + ctx->Roll.H2 + ctx->Roll.H3)
+#define sum_hash(c, h) ((h * HASH_PRIME) ^ c);
+#define SSDEEP_BS(index) (MIN_BLOCKSIZE << index)
+
+void fuzzy_engine_step(spamsum_ctx* ctx, uint8_t c)
+{
+ uint32_t i;
+ /* At each character we update the rolling hash and the normal hashes.
+ * When the rolling hash hits a reset value then we emit a normal hash
+ * as a element of the signature and reset the normal hash. */
+ roll_hash(ctx, c);
+ uint64_t h = roll_sum(ctx);
+
+ for(i = ctx->Bhstart; i < ctx->Bhend; ++i)
+ {
+ ctx->Bh[i].H = sum_hash(c, ctx->Bh[i].H);
+ ctx->Bh[i].Halfh = sum_hash(c, ctx->Bh[i].Halfh);
+ }
+
+ for(i = ctx->Bhstart; i < ctx->Bhend; ++i)
+ {
+ /* With growing blocksize almost no runs fail the next test. */
+ if(h % SSDEEP_BS(i) != SSDEEP_BS(i) - 1)
+ /* Once this condition is false for one bs, it is
+ * automatically false for all further bs. I.e. if
+ * h === -1 (mod 2*bs) then h === -1 (mod bs). */
+ break;
+
+ /* We have hit a reset point. We now emit hashes which are
+ * based on all characters in the piece of the message between
+ * the last reset point and this one */
+ if(0 == ctx->Bh[i].Dlen) fuzzy_try_fork_blockhash(ctx);
+
+ ctx->Bh[i].Digest[ctx->Bh[i].Dlen] = _b64[ctx->Bh[i].H % 64];
+ ctx->Bh[i].Halfdigest = _b64[ctx->Bh[i].Halfh % 64];
+
+ if(ctx->Bh[i].Dlen < SPAMSUM_LENGTH - 1)
+ {
+ /* We can have a problem with the tail overflowing. The
+ * easiest way to cope with this is to only reset the
+ * normal hash if we have room for more characters in
+ * our signature. This has the effect of combining the
+ * last few pieces of the message into a single piece
+ * */
+ ctx->Bh[i].Digest[++ctx->Bh[i].Dlen] = 0;
+ ctx->Bh[i].H = HASH_INIT;
+
+ if(ctx->Bh[i].Dlen >= SPAMSUM_LENGTH / 2) continue;
+
+ ctx->Bh[i].Halfh = HASH_INIT;
+ ctx->Bh[i].Halfdigest = 0;
+ }
+ else
+ fuzzy_try_reduce_blockhash(ctx);
+ }
+}
+
+void roll_hash(spamsum_ctx* ctx, uint8_t c)
+{
+ ctx->Roll.H2 -= ctx->Roll.H1;
+ ctx->Roll.H2 += ROLLING_WINDOW * c;
+
+ ctx->Roll.H1 += c;
+ ctx->Roll.H1 -= ctx->Roll.Window[ctx->Roll.N % ROLLING_WINDOW];
+
+ ctx->Roll.Window[ctx->Roll.N % ROLLING_WINDOW] = c;
+ ctx->Roll.N++;
+
+ /* The original spamsum AND'ed this value with 0xFFFFFFFF which
+ * in theory should have no effect. This AND has been removed
+ * for performance (jk) */
+ ctx->Roll.H3 <<= 5;
+ ctx->Roll.H3 ^= c;
+}
+
+void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx)
+{
+ assert(ctx->Bhstart < ctx->Bhend);
+
+ if(ctx->Bhend - ctx->Bhstart < 2) /* Need at least two working hashes. */
+ return;
+
+ if((uint64_t)SSDEEP_BS(ctx->Bhstart) * SPAMSUM_LENGTH >= ctx->TotalSize)
+ /* Initial blocksize estimate would select this or a smaller
+ * blocksize. */
+ return;
+
+ if(ctx->Bh[ctx->Bhstart + 1].Dlen < SPAMSUM_LENGTH / 2) /* Estimate adjustment would select this blocksize. */
+ return;
+
+ /* At this point we are clearly no longer interested in the
+ * start_blocksize. Get rid of it. */
+ ++ctx->Bhstart;
+}
+
+void fuzzy_try_fork_blockhash(spamsum_ctx* ctx)
+{
+ if(ctx->Bhend >= NUM_BLOCKHASHES) return;
+
+ assert(ctx->Bhend != 0);
+
+ uint32_t obh = ctx->Bhend - 1;
+ uint32_t nbh = ctx->Bhend;
+ ctx->Bh[nbh].H = ctx->Bh[obh].H;
+ ctx->Bh[nbh].Halfh = ctx->Bh[obh].Halfh;
+ ctx->Bh[nbh].Digest[0] = 0;
+ ctx->Bh[nbh].Halfdigest = 0;
+ ctx->Bh[nbh].Dlen = 0;
+ ++ctx->Bhend;
+}
+
+uint8_t* spamsum_final(spamsum_ctx* ctx)
+{
+ uint32_t bi = ctx->Bhstart;
+ uint32_t h = roll_sum(ctx);
+ int remain = (int)(FUZZY_MAX_RESULT - 1); /* Exclude terminating '\0'. */
+ uint8_t* result = (uint8_t*)malloc(FUZZY_MAX_RESULT);
+
+ if(!result) return NULL;
+
+ /* Verify that our elimination was not overeager. */
+ assert(bi == 0 || (uint64_t)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH < ctx->TotalSize);
+
+ /* Initial blocksize guess. */
+ while((uint64_t)SSDEEP_BS(bi) * SPAMSUM_LENGTH < ctx->TotalSize)
+ {
+ ++bi;
+
+ if(bi >= NUM_BLOCKHASHES)
+ {
+ errno = EOVERFLOW;
+ return NULL;
+ }
+ }
+
+ /* Adapt blocksize guess to actual digest length. */
+ while(bi >= ctx->Bhend) --bi;
+
+ while(bi > ctx->Bhstart && ctx->Bh[bi].Dlen < SPAMSUM_LENGTH / 2) --bi;
+
+ assert(!(bi > 0 && ctx->Bh[bi].Dlen < SPAMSUM_LENGTH / 2));
+
+ int i = snprintf((char*)result, (size_t)remain, "%lu:", (unsigned long)SSDEEP_BS(bi));
+
+ if(i <= 0) /* Maybe snprintf has set errno here? */
+ return NULL;
+
+ assert(i < remain);
+
+ remain -= i;
+ result += i;
+
+ i = (int)ctx->Bh[bi].Dlen;
+
+ assert(i <= remain);
+
+ memcpy(result, ctx->Bh[bi].Digest, (size_t)i);
+ result += i;
+ remain -= i;
+
+ if(h != 0)
+ {
+ assert(remain > 0);
+
+ *result = _b64[ctx->Bh[bi].H % 64];
+
+ if(i < 3 || *result != result[-1] || *result != result[-2] || *result != result[-3])
+ {
+ ++result;
+ --remain;
+ }
+ }
+ else if(ctx->Bh[bi].Digest[i] != 0)
+ {
+ assert(remain > 0);
+
+ *result = ctx->Bh[bi].Digest[i];
+
+ if(i < 3 || *result != result[-1] || *result != result[-2] || *result != result[-3])
+ {
+ ++result;
+ --remain;
+ }
+ }
+
+ assert(remain > 0);
+
+ *result++ = ':';
+ --remain;
+
+ if(bi < ctx->Bhend - 1)
+ {
+ ++bi;
+ i = (int)ctx->Bh[bi].Dlen;
+
+ if(i <= remain)
+ ;
+
+ memcpy(result, ctx->Bh[bi].Digest, (size_t)i);
+ result += i;
+ remain -= i;
+
+ if(h != 0)
+ {
+ assert(remain > 0);
+
+ h = ctx->Bh[bi].Halfh;
+ *result = _b64[h % 64];
+
+ if(i < 3 || *result != result[-1] || *result != result[-2] || *result != result[-3])
+ {
+ ++result;
+ --remain;
+ }
+ }
+ else
+ {
+ i = ctx->Bh[bi].Halfdigest;
+
+ if(i != 0)
+ {
+ assert(remain > 0);
+
+ *result = (uint8_t)i;
+
+ if(i < 3 || *result != result[-1] || *result != result[-2] || *result != result[-3])
+ {
+ ++result;
+ --remain;
+ }
+ }
+ }
+ }
+ else if(h != 0)
+ {
+ assert(ctx->Bh[bi].Dlen == 0);
+
+ assert(remain > 0);
+
+ *result++ = _b64[ctx->Bh[bi].H % 64];
+ /* No need to bother with FUZZY_FLAG_ELIMSEQ, because this
+ * digest has length 1. */
+ --remain;
+ }
+
+ *result = 0;
+
+ return result;
+}
diff --git a/spamsum.h b/spamsum.h
new file mode 100644
index 0000000..46e8dbb
--- /dev/null
+++ b/spamsum.h
@@ -0,0 +1,63 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2021 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#define SPAMSUM_LENGTH 64
+#define NUM_BLOCKHASHES 31
+#define ROLLING_WINDOW 7
+#define HASH_INIT 0x28021967
+#define HASH_PRIME 0x01000193
+#define MIN_BLOCKSIZE 3
+#define FUZZY_MAX_RESULT ((2 * SPAMSUM_LENGTH) + 20)
+
+typedef struct
+{
+ uint32_t H;
+ uint32_t Halfh;
+ uint8_t Digest[SPAMSUM_LENGTH];
+ uint8_t Halfdigest;
+ uint32_t Dlen;
+} BlockhashContext;
+
+typedef struct
+{
+ uint8_t Window[ROLLING_WINDOW];
+ uint32_t H1;
+ uint32_t H2;
+ uint32_t H3;
+ uint32_t N;
+}RollState;
+
+typedef struct
+{
+ uint32_t Bhstart;
+ uint32_t Bhend;
+ BlockhashContext Bh[NUM_BLOCKHASHES];
+ uint64_t TotalSize;
+ RollState Roll;
+} spamsum_ctx;
+
+spamsum_ctx* spamsum_init(void);
+int spamsum_update(spamsum_ctx* ctx, const uint8_t* data, uint32_t len);
+uint8_t* spamsum_final(spamsum_ctx* ctx);
+void spamsum_free(spamsum_ctx* ctx);
+
+void fuzzy_engine_step(spamsum_ctx* ctx, uint8_t c);
+void roll_hash(spamsum_ctx* ctx, uint8_t c);
+void fuzzy_try_reduce_blockhash(spamsum_ctx* ctx);
+void fuzzy_try_fork_blockhash(spamsum_ctx* ctx);
+