Code restyling.

This commit is contained in:
2020-02-29 18:03:33 +00:00
parent 2d5bc1f8ed
commit 5f2ddad918
15 changed files with 690 additions and 739 deletions

View File

@@ -46,9 +46,7 @@ using Aaru.CommonTypes.Interfaces;
namespace Aaru.Checksums
{
/// <summary>
/// Implements the SpamSum fuzzy hashing algorithm.
/// </summary>
/// <summary>Implements the SpamSum fuzzy hashing algorithm.</summary>
public class SpamSumContext : IChecksum
{
const uint ROLLING_WINDOW = 7;
@@ -57,25 +55,29 @@ namespace Aaru.Checksums
const uint HASH_INIT = 0x28021967;
const uint NUM_BLOCKHASHES = 31;
const uint SPAMSUM_LENGTH = 64;
const uint FUZZY_MAX_RESULT = 2 * SPAMSUM_LENGTH + 20;
const uint FUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH) + 20;
//"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
readonly byte[] b64 =
{
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51,
0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7A, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52,
0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A,
0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31,
0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
};
FuzzyState self;
/// <summary>
/// Initializes the SpamSum structures
/// </summary>
/// <summary>Initializes the SpamSum structures</summary>
public SpamSumContext()
{
self = new FuzzyState {Bh = new BlockhashContext[NUM_BLOCKHASHES]};
for(int i = 0; i < NUM_BLOCKHASHES; i++) self.Bh[i].Digest = new byte[SPAMSUM_LENGTH];
self = new FuzzyState
{
Bh = new BlockhashContext[NUM_BLOCKHASHES]
};
for(int i = 0; i < NUM_BLOCKHASHES; i++)
self.Bh[i].Digest = new byte[SPAMSUM_LENGTH];
self.Bhstart = 0;
self.Bhend = 1;
@@ -88,34 +90,25 @@ namespace Aaru.Checksums
roll_init();
}
/// <summary>
/// Updates the hash with data.
/// </summary>
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of buffer to hash.</param>
public void Update(byte[] data, uint len)
{
self.TotalSize += len;
for(int i = 0; i < len; i++) fuzzy_engine_step(data[i]);
for(int i = 0; i < len; i++)
fuzzy_engine_step(data[i]);
}
/// <summary>
/// Updates the hash with data.
/// </summary>
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
public void Update(byte[] data)
{
Update(data, (uint)data.Length);
}
public void Update(byte[] data) => Update(data, (uint)data.Length);
/// <summary>
/// Returns a byte array of the hash value.
/// </summary>
/// <summary>Returns a byte array of the hash value.</summary>
public byte[] Final() => throw new NotImplementedException("SpamSum does not have a binary representation.");
/// <summary>
/// Returns a base64 representation of the hash value.
/// </summary>
/// <summary>Returns a base64 representation of the hash value.</summary>
public string End()
{
FuzzyDigest(out byte[] result);
@@ -124,10 +117,10 @@ namespace Aaru.Checksums
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void roll_init()
void roll_init() => self.Roll = new RollState
{
self.Roll = new RollState {Window = new byte[ROLLING_WINDOW]};
}
Window = new byte[ROLLING_WINDOW]
};
/*
* a rolling hash, based on the Adler checksum. By using a rolling hash
@@ -171,7 +164,8 @@ namespace Aaru.Checksums
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void fuzzy_try_fork_blockhash()
{
if(self.Bhend >= NUM_BLOCKHASHES) return;
if(self.Bhend >= NUM_BLOCKHASHES)
return;
if(self.Bhend == 0) // assert
throw new Exception("Assertion failed");
@@ -189,15 +183,21 @@ namespace Aaru.Checksums
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void fuzzy_try_reduce_blockhash()
{
if(self.Bhstart >= self.Bhend) throw new Exception("Assertion failed");
if(self.Bhstart >= self.Bhend)
throw new Exception("Assertion failed");
if(self.Bhend - self.Bhstart < 2)
/* Need at least two working hashes. */ return;
/* Need at least two working hashes. */
return;
if((ulong)SSDEEP_BS(self.Bhstart) * SPAMSUM_LENGTH >= self.TotalSize)
/* Initial blocksize estimate would select this or a smaller
* blocksize. */ return;
* blocksize. */
return;
if(self.Bh[self.Bhstart + 1].Dlen < SPAMSUM_LENGTH / 2)
/* Estimate adjustment would select this blocksize. */ return;
/* Estimate adjustment would select this blocksize. */
return;
/* At this point we are clearly no longer interested in the
* start_blocksize. Get rid of it. */
@@ -226,14 +226,18 @@ namespace Aaru.Checksums
if(h % SSDEEP_BS(i) != SSDEEP_BS(i) - 1)
/* Once this condition is false for one bs, it is
* automatically false for all further bs. I.e. if
* h === -1 (mod 2*bs) then h === -1 (mod bs). */ break;
* h === -1 (mod 2*bs) then h === -1 (mod bs). */
break;
/* We have hit a reset point. We now emit hashes which are
* based on all characters in the piece of the message between
* the last reset point and this one */
if(0 == self.Bh[i].Dlen) fuzzy_try_fork_blockhash();
if(0 == self.Bh[i].Dlen)
fuzzy_try_fork_blockhash();
self.Bh[i].Digest[self.Bh[i].Dlen] = b64[self.Bh[i].H % 64];
self.Bh[i].Halfdigest = b64[self.Bh[i].Halfh % 64];
if(self.Bh[i].Dlen < SPAMSUM_LENGTH - 1)
{
/* We can have a problem with the tail overflowing. The
@@ -244,12 +248,15 @@ namespace Aaru.Checksums
* */
self.Bh[i].Digest[++self.Bh[i].Dlen] = 0;
self.Bh[i].H = HASH_INIT;
if(self.Bh[i].Dlen >= SPAMSUM_LENGTH / 2) continue;
if(self.Bh[i].Dlen >= SPAMSUM_LENGTH / 2)
continue;
self.Bh[i].Halfh = HASH_INIT;
self.Bh[i].Halfdigest = 0;
}
else fuzzy_try_reduce_blockhash();
else
fuzzy_try_reduce_blockhash();
}
}
@@ -257,13 +264,14 @@ namespace Aaru.Checksums
[MethodImpl(MethodImplOptions.AggressiveInlining)]
uint FuzzyDigest(out byte[] result)
{
StringBuilder sb = new StringBuilder();
uint bi = self.Bhstart;
uint h = roll_sum();
int remain = (int)(FUZZY_MAX_RESULT - 1); /* Exclude terminating '\0'. */
var sb = new StringBuilder();
uint bi = self.Bhstart;
uint h = roll_sum();
int remain = (int)(FUZZY_MAX_RESULT - 1); /* Exclude terminating '\0'. */
result = new byte[FUZZY_MAX_RESULT];
/* Verify that our elimination was not overeager. */
if(!(bi == 0 || (ulong)SSDEEP_BS(bi) / 2 * SPAMSUM_LENGTH < self.TotalSize))
if(!(bi == 0 || ((ulong)SSDEEP_BS(bi) / 2) * SPAMSUM_LENGTH < self.TotalSize))
throw new Exception("Assertion failed");
int resultOff = 0;
@@ -272,20 +280,32 @@ namespace Aaru.Checksums
while((ulong)SSDEEP_BS(bi) * SPAMSUM_LENGTH < self.TotalSize)
{
++bi;
if(bi >= NUM_BLOCKHASHES) throw new OverflowException("The input exceeds data types.");
if(bi >= NUM_BLOCKHASHES)
throw new OverflowException("The input exceeds data types.");
}
/* Adapt blocksize guess to actual digest length. */
while(bi >= self.Bhend) --bi;
while(bi > self.Bhstart && self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2) --bi;
while(bi >= self.Bhend)
--bi;
if(bi > 0 && self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2) throw new Exception("Assertion failed");
while(bi > self.Bhstart &&
self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
--bi;
if(bi > 0 &&
self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
throw new Exception("Assertion failed");
sb.AppendFormat("{0}:", SSDEEP_BS(bi));
int i = Encoding.ASCII.GetBytes(sb.ToString()).Length;
if(i <= 0)
/* Maybe snprintf has set errno here? */ throw new OverflowException("The input exceeds data types.");
if(i >= remain) throw new Exception("Assertion failed");
/* Maybe snprintf has set errno here? */
throw new OverflowException("The input exceeds data types.");
if(i >= remain)
throw new Exception("Assertion failed");
remain -= i;
@@ -294,19 +314,25 @@ namespace Aaru.Checksums
resultOff += i;
i = (int)self.Bh[bi].Dlen;
if(i > remain) throw new Exception("Assertion failed");
if(i > remain)
throw new Exception("Assertion failed");
Array.Copy(self.Bh[bi].Digest, 0, result, resultOff, i);
resultOff += i;
remain -= i;
if(h != 0)
{
if(remain <= 0) throw new Exception("Assertion failed");
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = b64[self.Bh[bi].H % 64];
if(i < 3 || result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
@@ -314,27 +340,34 @@ namespace Aaru.Checksums
}
else if(self.Bh[bi].Digest[i] != 0)
{
if(remain <= 0) throw new Exception("Assertion failed");
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = self.Bh[bi].Digest[i];
if(i < 3 || result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
if(remain <= 0) throw new Exception("Assertion failed");
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff++] = 0x3A; // ':'
--remain;
if(bi < self.Bhend - 1)
{
++bi;
i = (int)self.Bh[bi].Dlen;
if(i > remain) throw new Exception("Assertion failed");
if(i > remain)
throw new Exception("Assertion failed");
Array.Copy(self.Bh[bi].Digest, 0, result, resultOff, i);
resultOff += i;
@@ -342,13 +375,16 @@ namespace Aaru.Checksums
if(h != 0)
{
if(remain <= 0) throw new Exception("Assertion failed");
if(remain <= 0)
throw new Exception("Assertion failed");
h = self.Bh[bi].Halfh;
result[resultOff] = b64[h % 64];
if(i < 3 || result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
@@ -357,14 +393,18 @@ namespace Aaru.Checksums
else
{
i = self.Bh[bi].Halfdigest;
if(i != 0)
{
if(remain <= 0) throw new Exception("Assertion failed");
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = (byte)i;
if(i < 3 || result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
@@ -374,8 +414,11 @@ namespace Aaru.Checksums
}
else if(h != 0)
{
if(self.Bh[bi].Dlen != 0) throw new Exception("Assertion failed");
if(remain <= 0) throw new Exception("Assertion failed");
if(self.Bh[bi].Dlen != 0)
throw new Exception("Assertion failed");
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff++] = b64[self.Bh[bi].H % 64];
/* No need to bother with FUZZY_FLAG_ELIMSEQ, because this
@@ -384,34 +427,29 @@ namespace Aaru.Checksums
}
result[resultOff] = 0;
return 0;
}
/// <summary>
/// Gets the hash of a file
/// </summary>
/// <summary>Gets the hash of a file</summary>
/// <param name="filename">File path.</param>
public static byte[] File(string filename) =>
throw new NotImplementedException("SpamSum does not have a binary representation.");
/// <summary>
/// Gets the hash of a file in hexadecimal and as a byte array.
/// </summary>
/// <summary>Gets the hash of a file in hexadecimal and as a byte array.</summary>
/// <param name="filename">File path.</param>
/// <param name="hash">Byte array of the hash value.</param>
public static string File(string filename, out byte[] hash) =>
throw new NotImplementedException("Not yet implemented.");
/// <summary>
/// Gets the hash of the specified data buffer.
/// </summary>
/// <summary>Gets the hash of the specified data buffer.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of the data buffer to hash.</param>
/// <param name="hash">null</param>
/// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
public static string Data(byte[] data, uint len, out byte[] hash)
{
SpamSumContext fuzzyContext = new SpamSumContext();
var fuzzyContext = new SpamSumContext();
fuzzyContext.Update(data, len);
@@ -420,9 +458,7 @@ namespace Aaru.Checksums
return fuzzyContext.End();
}
/// <summary>
/// Gets the hash of the specified data buffer.
/// </summary>
/// <summary>Gets the hash of the specified data buffer.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="hash">null</param>
/// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
@@ -438,7 +474,8 @@ namespace Aaru.Checksums
// LINQ is six times slower
foreach(byte c in cString)
{
if(c == 0) break;
if(c == 0)
break;
count++;
}
@@ -449,6 +486,7 @@ namespace Aaru.Checksums
struct RollState
{
public byte[] Window;
// ROLLING_WINDOW
public uint H1;
public uint H2;
@@ -466,6 +504,7 @@ namespace Aaru.Checksums
public uint H;
public uint Halfh;
public byte[] Digest;
// SPAMSUM_LENGTH
public byte Halfdigest;
public uint Dlen;
@@ -476,6 +515,7 @@ namespace Aaru.Checksums
public uint Bhstart;
public uint Bhend;
public BlockhashContext[] Bh;
//NUM_BLOCKHASHES
public ulong TotalSize;
public RollState Roll;