Use SabreTools.Hashing

This commit is contained in:
Matt Nadareski
2024-03-04 21:20:39 -05:00
parent 17c023efaa
commit 076b80e805
7 changed files with 52 additions and 1085 deletions

View File

@@ -1,58 +0,0 @@
// /***************************************************************************
// Aaru Data Preservation Suite
// ----------------------------------------------------------------------------
//
// Filename : IChecksum.cs
// Author(s) : Natalia Portillo <claunia@claunia.com>
//
// Component : Checksums.
//
// --[ Description ] ----------------------------------------------------------
//
// Provides an interface for implementing checksums and hashes.
//
// --[ License ] --------------------------------------------------------------
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
// ----------------------------------------------------------------------------
// Copyright © 2011-2020 Natalia Portillo
// ****************************************************************************/
namespace Aaru.CommonTypes.Interfaces
{
public interface IChecksum
{
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of buffer to hash.</param>
void Update(byte[] data, uint len);
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
void Update(byte[] data);
/// <summary>Returns a byte array of the hash value.</summary>
byte[] Final();
/// <summary>Returns a hexadecimal representation of the hash value.</summary>
string End();
}
}

View File

@@ -1,583 +0,0 @@
// /***************************************************************************
// Aaru Data Preservation Suite
// ----------------------------------------------------------------------------
//
// Filename : SpamSumContext.cs
// Author(s) : Natalia Portillo <claunia@claunia.com>
//
// Component : Checksums.
//
// --[ Description ] ----------------------------------------------------------
//
// Implements the SpamSum fuzzy hashing algorithm.
//
// --[ License ] --------------------------------------------------------------
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation; either version 2.1 of the
// License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
//
// ----------------------------------------------------------------------------
// Copyright © 2011-2020 Natalia Portillo
// ****************************************************************************/
// Based on ssdeep
// Copyright (C) 2002 Andrew Tridgell <tridge@samba.org>
// Copyright (C) 2006 ManTech International Corporation
// Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
//
// Earlier versions of this code were named fuzzy.c and can be found at:
// http://www.samba.org/ftp/unpacked/junkcode/spamsum/
// http://ssdeep.sf.net/
using System;
using System.Runtime.CompilerServices;
using System.Text;
using Aaru.CommonTypes.Interfaces;
namespace Aaru.Checksums
{
/// <summary>Implements the SpamSum fuzzy hashing algorithm.</summary>
public sealed class SpamSumContext : IChecksum, IDisposable
{
const uint ROLLING_WINDOW = 7;
const uint MIN_BLOCKSIZE = 3;
const uint HASH_PRIME = 0x01000193;
const uint HASH_INIT = 0x28021967;
const uint NUM_BLOCKHASHES = 31;
const uint SPAMSUM_LENGTH = 64;
const uint FUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH) + 20;
//"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
readonly byte[] _b64 =
{
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52,
0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A,
0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31,
0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
};
FuzzyState _self;
/// <summary>Initializes the SpamSum structures</summary>
public SpamSumContext()
{
_self = new FuzzyState
{
Bh = new BlockhashContext[NUM_BLOCKHASHES]
};
for (int i = 0; i < NUM_BLOCKHASHES; i++)
_self.Bh[i].Digest = new byte[SPAMSUM_LENGTH];
_self.Bhstart = 0;
_self.Bhend = 1;
_self.Bh[0].H = HASH_INIT;
_self.Bh[0].Halfh = HASH_INIT;
_self.Bh[0].Digest[0] = 0;
_self.Bh[0].Halfdigest = 0;
_self.Bh[0].Dlen = 0;
_self.TotalSize = 0;
roll_init();
}
/// <inheritdoc />
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of buffer to hash.</param>
public void Update(byte[] data, uint len)
{
_self.TotalSize += len;
for (int i = 0; i < len; i++)
fuzzy_engine_step(data[i]);
}
/// <inheritdoc />
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
public void Update(byte[] data) => Update(data, (uint)data.Length);
/// <inheritdoc />
/// <summary>Returns a byte array of the hash value.</summary>
public byte[] Final()
{
FuzzyDigest(out byte[] result);
return CToArray(result);
}
/// <inheritdoc />
/// <summary>Returns a base64 representation of the hash value.</summary>
public string End()
{
FuzzyDigest(out byte[] result);
return CToString(result);
}
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
void roll_init() => _self.Roll = new RollState
{
Window = new byte[ROLLING_WINDOW]
};
/*
* a rolling hash, based on the Adler checksum. By using a rolling hash
* we can perform auto resynchronisation after inserts/deletes
* internally, h1 is the sum of the bytes in the window and h2
* is the sum of the bytes times the index
* h3 is a shift/xor based rolling hash, and is mostly needed to ensure that
* we can cope with large blocksize values
*/
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
void roll_hash(byte c)
{
_self.Roll.H2 -= _self.Roll.H1;
_self.Roll.H2 += ROLLING_WINDOW * c;
_self.Roll.H1 += c;
_self.Roll.H1 -= _self.Roll.Window[_self.Roll.N % ROLLING_WINDOW];
_self.Roll.Window[_self.Roll.N % ROLLING_WINDOW] = c;
_self.Roll.N++;
/* The original spamsum AND'ed this value with 0xFFFFFFFF which
* in theory should have no effect. This AND has been removed
* for performance (jk) */
_self.Roll.H3 <<= 5;
_self.Roll.H3 ^= c;
}
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
uint roll_sum() => _self.Roll.H1 + _self.Roll.H2 + _self.Roll.H3;
/* A simple non-rolling hash, based on the FNV hash. */
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
static uint sum_hash(byte c, uint h) => (h * HASH_PRIME) ^ c;
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
static uint SSDEEP_BS(uint index) => MIN_BLOCKSIZE << (int)index;
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
void fuzzy_try_fork_blockhash()
{
if (_self.Bhend >= NUM_BLOCKHASHES)
return;
if (_self.Bhend == 0) // assert
throw new Exception("Assertion failed");
uint obh = _self.Bhend - 1;
uint nbh = _self.Bhend;
_self.Bh[nbh].H = _self.Bh[obh].H;
_self.Bh[nbh].Halfh = _self.Bh[obh].Halfh;
_self.Bh[nbh].Digest[0] = 0;
_self.Bh[nbh].Halfdigest = 0;
_self.Bh[nbh].Dlen = 0;
++_self.Bhend;
}
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
void fuzzy_try_reduce_blockhash()
{
if (_self.Bhstart >= _self.Bhend)
throw new Exception("Assertion failed");
if (_self.Bhend - _self.Bhstart < 2)
/* Need at least two working hashes. */
return;
if ((ulong)SSDEEP_BS(_self.Bhstart) * SPAMSUM_LENGTH >= _self.TotalSize)
/* Initial blocksize estimate would select this or a smaller
* blocksize. */
return;
if (_self.Bh[_self.Bhstart + 1].Dlen < SPAMSUM_LENGTH / 2)
/* Estimate adjustment would select this blocksize. */
return;
/* At this point we are clearly no longer interested in the
* start_blocksize. Get rid of it. */
++_self.Bhstart;
}
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
void fuzzy_engine_step(byte c)
{
uint i;
/* At each character we update the rolling hash and the normal hashes.
* When the rolling hash hits a reset value then we emit a normal hash
* as a element of the signature and reset the normal hash. */
roll_hash(c);
ulong h = roll_sum();
for (i = _self.Bhstart; i < _self.Bhend; ++i)
{
_self.Bh[i].H = sum_hash(c, _self.Bh[i].H);
_self.Bh[i].Halfh = sum_hash(c, _self.Bh[i].Halfh);
}
for (i = _self.Bhstart; i < _self.Bhend; ++i)
{
/* With growing blocksize almost no runs fail the next test. */
if (h % SSDEEP_BS(i) != SSDEEP_BS(i) - 1)
/* Once this condition is false for one bs, it is
* automatically false for all further bs. I.e. if
* h === -1 (mod 2*bs) then h === -1 (mod bs). */
break;
/* We have hit a reset point. We now emit hashes which are
* based on all characters in the piece of the message between
* the last reset point and this one */
if (0 == _self.Bh[i].Dlen)
fuzzy_try_fork_blockhash();
_self.Bh[i].Digest[_self.Bh[i].Dlen] = _b64[_self.Bh[i].H % 64];
_self.Bh[i].Halfdigest = _b64[_self.Bh[i].Halfh % 64];
if (_self.Bh[i].Dlen < SPAMSUM_LENGTH - 1)
{
/* We can have a problem with the tail overflowing. The
* easiest way to cope with this is to only reset the
* normal hash if we have room for more characters in
* our signature. This has the effect of combining the
* last few pieces of the message into a single piece
* */
_self.Bh[i].Digest[++_self.Bh[i].Dlen] = 0;
_self.Bh[i].H = HASH_INIT;
if (_self.Bh[i].Dlen >= SPAMSUM_LENGTH / 2)
continue;
_self.Bh[i].Halfh = HASH_INIT;
_self.Bh[i].Halfdigest = 0;
}
else
fuzzy_try_reduce_blockhash();
}
}
// CLAUNIA: Flags seems to never be used in ssdeep, so I just removed it for code simplicity
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
void FuzzyDigest(out byte[] result)
{
var sb = new StringBuilder();
uint bi = _self.Bhstart;
uint h = roll_sum();
int remain = (int)(FUZZY_MAX_RESULT - 1); /* Exclude terminating '\0'. */
result = new byte[FUZZY_MAX_RESULT];
/* Verify that our elimination was not overeager. */
if (!(bi == 0 || ((ulong)SSDEEP_BS(bi) / 2) * SPAMSUM_LENGTH < _self.TotalSize))
throw new Exception("Assertion failed");
int resultOff = 0;
/* Initial blocksize guess. */
while ((ulong)SSDEEP_BS(bi) * SPAMSUM_LENGTH < _self.TotalSize)
{
++bi;
if (bi >= NUM_BLOCKHASHES)
throw new OverflowException("The input exceeds data types.");
}
/* Adapt blocksize guess to actual digest length. */
while (bi >= _self.Bhend)
--bi;
while (bi > _self.Bhstart &&
_self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
--bi;
if (bi > 0 &&
_self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
throw new Exception("Assertion failed");
sb.AppendFormat("{0}:", SSDEEP_BS(bi));
int i = Encoding.ASCII.GetBytes(sb.ToString()).Length;
if (i <= 0)
/* Maybe snprintf has set errno here? */
throw new OverflowException("The input exceeds data types.");
if (i >= remain)
throw new Exception("Assertion failed");
remain -= i;
Array.Copy(Encoding.ASCII.GetBytes(sb.ToString()), 0, result, 0, i);
resultOff = i;
i = (int)_self.Bh[bi].Dlen;
if (i > remain)
throw new Exception("Assertion failed");
Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i);
resultOff += i;
remain -= i;
if (h != 0)
{
if (remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = _b64[_self.Bh[bi].H % 64];
if (i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
else if (_self.Bh[bi].Digest[i] != 0)
{
if (remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = _self.Bh[bi].Digest[i];
if (i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
if (remain <= 0)
throw new Exception("Assertion failed");
result[resultOff++] = 0x3A; // ':'
--remain;
if (bi < _self.Bhend - 1)
{
++bi;
i = (int)_self.Bh[bi].Dlen;
if (i > remain)
throw new Exception("Assertion failed");
Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i);
resultOff += i;
remain -= i;
if (h != 0)
{
if (remain <= 0)
throw new Exception("Assertion failed");
h = _self.Bh[bi].Halfh;
result[resultOff] = _b64[h % 64];
if (i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
else
{
i = _self.Bh[bi].Halfdigest;
if (i != 0)
{
if (remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = (byte)i;
if (i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
}
}
else if (h != 0)
{
if (_self.Bh[bi].Dlen != 0)
throw new Exception("Assertion failed");
if (remain <= 0)
throw new Exception("Assertion failed");
result[resultOff++] = _b64[_self.Bh[bi].H % 64];
/* No need to bother with FUZZY_FLAG_ELIMSEQ, because this
* digest has length 1. */
--remain;
}
result[resultOff] = 0;
}
/// <summary>Gets the hash of a file</summary>
/// <param name="filename">File path.</param>
public static byte[] File(string filename) =>
throw new NotImplementedException("SpamSum does not have a binary representation.");
/// <summary>Gets the hash of a file in hexadecimal and as a byte array.</summary>
/// <param name="filename">File path.</param>
/// <param name="hash">Byte array of the hash value.</param>
public static string File(string filename, out byte[] hash) =>
throw new NotImplementedException("Not yet implemented.");
/// <summary>Gets the hash of the specified data buffer.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of the data buffer to hash.</param>
/// <param name="hash">null</param>
/// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
public static string Data(byte[] data, uint len, out byte[]? hash)
{
var fuzzyContext = new SpamSumContext();
fuzzyContext.Update(data, len);
hash = null;
return fuzzyContext.End();
}
/// <summary>Gets the hash of the specified data buffer.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="hash">null</param>
/// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
public static string Data(byte[] data, out byte[]? hash) => Data(data, (uint)data.Length, out hash);
// Converts an ASCII null-terminated string to .NET string
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
static string CToString(byte[] cString)
{
int count = 0;
// ReSharper disable once LoopCanBeConvertedToQuery
// LINQ is six times slower
foreach (byte c in cString)
{
if (c == 0)
break;
count++;
}
return Encoding.ASCII.GetString(cString, 0, count);
}
// Converts an ASCII null-terminated string to .NET string
#if NET452_OR_GREATER || NETCOREAPP
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
static byte[] CToArray(byte[] cString)
{
int count = 0;
// ReSharper disable once LoopCanBeConvertedToQuery
// LINQ is six times slower
foreach (byte c in cString)
{
if (c == 0)
break;
count++;
}
#if NETFRAMEWORK
byte[] temp = new byte[count];
Array.Copy(cString, temp, count);
return temp;
#else
return new ReadOnlySpan<byte>(cString, 0, count).ToArray();
#endif
}
public void Dispose()
{
// TODO: No-op to dispose for now
}
struct RollState
{
public byte[] Window;
// ROLLING_WINDOW
public uint H1;
public uint H2;
public uint H3;
public uint N;
}
/* A blockhash contains a signature state for a specific (implicit) blocksize.
* The blocksize is given by SSDEEP_BS(index). The h and halfh members are the
* FNV hashes, where halfh stops to be reset after digest is SPAMSUM_LENGTH/2
* long. The halfh hash is needed be able to truncate digest for the second
* output hash to stay compatible with ssdeep output. */
struct BlockhashContext
{
public uint H;
public uint Halfh;
public byte[] Digest;
// SPAMSUM_LENGTH
public byte Halfdigest;
public uint Dlen;
}
struct FuzzyState
{
public uint Bhstart;
public uint Bhend;
public BlockhashContext[] Bh;
//NUM_BLOCKHASHES
public ulong TotalSize;
public RollState Roll;
}
}
}

View File

@@ -1,164 +0,0 @@
using System;
using System.Linq;
using System.Security.Cryptography;
using Aaru.Checksums;
namespace SabreTools.Core.Tools
{
/// <summary>
/// Async hashing class wraper
/// </summary>
public class Hasher
{
public Hash HashType { get; private set; }
private IDisposable? _hasher;
public Hasher(Hash hashType)
{
this.HashType = hashType;
GetHasher();
}
/// <summary>
/// Generate the correct hashing class based on the hash type
/// </summary>
private void GetHasher()
{
switch (HashType)
{
case Hash.CRC:
_hasher = new OptimizedCRC.OptimizedCRC();
break;
case Hash.MD5:
_hasher = MD5.Create();
break;
case Hash.SHA1:
_hasher = SHA1.Create();
break;
case Hash.SHA256:
_hasher = SHA256.Create();
break;
case Hash.SHA384:
_hasher = SHA384.Create();
break;
case Hash.SHA512:
_hasher = SHA512.Create();
break;
case Hash.SpamSum:
_hasher = new SpamSumContext();
break;
}
}
public void Dispose()
{
_hasher?.Dispose();
}
/// <summary>
/// Process a buffer of some length with the internal hash algorithm
/// </summary>
public void Process(byte[] buffer, int size)
{
if (_hasher == null)
return;
switch (_hasher)
{
case OptimizedCRC.OptimizedCRC crc:
crc.Update(buffer, 0, size);
break;
case MD5 md5:
md5.TransformBlock(buffer, 0, size, null, 0);
break;
case SHA1 sha1:
sha1.TransformBlock(buffer, 0, size, null, 0);
break;
case SHA256 sha256:
sha256.TransformBlock(buffer, 0, size, null, 0);
break;
case SHA384 sha384:
sha384.TransformBlock(buffer, 0, size, null, 0);
break;
case SHA512 sha512:
sha512.TransformBlock(buffer, 0, size, null, 0);
break;
case SpamSumContext spamSum:
spamSum.Update(buffer);
break;
}
}
/// <summary>
/// Terminate the internal hash algorigthm
/// </summary>
public void Terminate()
{
if (_hasher == null)
return;
byte[] emptyBuffer = [];
switch (_hasher)
{
case OptimizedCRC.OptimizedCRC crc:
crc.Update(emptyBuffer, 0, 0);
break;
case MD5 md5:
md5.TransformFinalBlock(emptyBuffer, 0, 0);
break;
case SHA1 sha1:
sha1.TransformFinalBlock(emptyBuffer, 0, 0);
break;
case SHA256 sha256:
sha256.TransformFinalBlock(emptyBuffer, 0, 0);
break;
case SHA384 sha384:
sha384.TransformFinalBlock(emptyBuffer, 0, 0);
break;
case SHA512 sha512:
sha512.TransformFinalBlock(emptyBuffer, 0, 0);
break;
case SpamSumContext:
// No finalization step needed
break;
}
}
/// <summary>
/// Get internal hash as a byte array
/// </summary>
public byte[]? GetHash()
{
return _hasher switch
{
OptimizedCRC.OptimizedCRC crc => BitConverter.GetBytes(crc.Value).Reverse().ToArray(),
MD5 md5 => md5.Hash,
SHA1 sha1 => sha1.Hash,
SHA256 sha256 => sha256.Hash,
SHA384 sha384 => sha384.Hash,
SHA512 sha512 => sha512.Hash,
SpamSumContext spamSum => spamSum.Final(),
_ => null,
};
}
}
}

View File

@@ -1,153 +0,0 @@
/*
Copyright (c) 2012-2015 Eugene Larchenko (spct@mail.ru)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
using System;
namespace OptimizedCRC
{
internal class OptimizedCRC : IDisposable
{
private const uint kCrcPoly = 0xEDB88320;
private const uint kInitial = 0xFFFFFFFF;
private const int CRC_NUM_TABLES = 8;
private static readonly uint[] Table;
static OptimizedCRC()
{
unchecked
{
Table = new uint[256 * CRC_NUM_TABLES];
int i;
for (i = 0; i < 256; i++)
{
uint r = (uint)i;
for (int j = 0; j < 8; j++)
{
r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
}
Table[i] = r;
}
for (; i < 256 * CRC_NUM_TABLES; i++)
{
uint r = Table[i - 256];
Table[i] = Table[r & 0xFF] ^ (r >> 8);
}
}
}
public uint UnsignedValue;
public OptimizedCRC()
{
Init();
}
/// <summary>
/// Reset CRC
/// </summary>
public void Init()
{
UnsignedValue = kInitial;
}
public int Value
{
get { return (int)~UnsignedValue; }
}
public void Update(byte[] data, int offset, int count)
{
new ArraySegment<byte>(data, offset, count); // check arguments
if (count == 0)
{
return;
}
var table = OptimizedCRC.Table;
uint crc = UnsignedValue;
for (; (offset & 7) != 0 && count != 0; count--)
{
crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]];
}
if (count >= 8)
{
/*
* Idea from 7-zip project sources (http://7-zip.org/sdk.html)
*/
int end = (count - 8) & ~7;
count -= end;
end += offset;
while (offset != end)
{
crc ^= (uint)(data[offset] + (data[offset + 1] << 8) + (data[offset + 2] << 16) + (data[offset + 3] << 24));
uint high = (uint)(data[offset + 4] + (data[offset + 5] << 8) + (data[offset + 6] << 16) + (data[offset + 7] << 24));
offset += 8;
crc = table[(byte)crc + 0x700]
^ table[(byte)(crc >>= 8) + 0x600]
^ table[(byte)(crc >>= 8) + 0x500]
^ table[/*(byte)*/(crc >> 8) + 0x400]
^ table[(byte)(high) + 0x300]
^ table[(byte)(high >>= 8) + 0x200]
^ table[(byte)(high >>= 8) + 0x100]
^ table[/*(byte)*/(high >> 8) + 0x000];
}
}
while (count-- != 0)
{
crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]];
}
UnsignedValue = crc;
}
static public int Compute(byte[] data, int offset, int count)
{
var crc = new OptimizedCRC();
crc.Update(data, offset, count);
return crc.Value;
}
static public int Compute(byte[] data)
{
return Compute(data, 0, data.Length);
}
static public int Compute(ArraySegment<byte> block)
{
return Compute(block.Array!, block.Offset, block.Count);
}
public void Dispose()
{
UnsignedValue = 0;
}
}
}

View File

@@ -2,10 +2,12 @@
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Threading.Tasks; using System.Threading.Tasks;
using Compress.Support.Compression.LZMA;
using SabreTools.Core; using SabreTools.Core;
using SabreTools.Core.Tools; using SabreTools.Core.Tools;
using SabreTools.FileTypes.Aaru; using SabreTools.FileTypes.Aaru;
using SabreTools.FileTypes.CHD; using SabreTools.FileTypes.CHD;
using SabreTools.Hashing;
using SabreTools.IO; using SabreTools.IO;
using SabreTools.Logging; using SabreTools.Logging;
using SabreTools.Matching; using SabreTools.Matching;
@@ -345,139 +347,62 @@ namespace SabreTools.FileTypes
if (size == -1) if (size == -1)
size = input.Length; size = input.Length;
try // Get a list of hash types to run
{ List<HashType> hashTypes = [];
// Get a list of hashers to run over the buffer
List<Hasher> hashers = [];
#if NETFRAMEWORK #if NETFRAMEWORK
if ((hashes & Hash.CRC) != 0) if ((hashes & Hash.CRC) != 0) hashTypes.Add(HashType.CRC32);
hashers.Add(new Hasher(Hash.CRC)); if ((hashes & Hash.MD5) != 0) hashTypes.Add(HashType.MD5);
if ((hashes & Hash.MD5) != 0) if ((hashes & Hash.SHA1) != 0) hashTypes.Add(HashType.SHA1);
hashers.Add(new Hasher(Hash.MD5)); if ((hashes & Hash.SHA256) != 0) hashTypes.Add(HashType.SHA256);
if ((hashes & Hash.SHA1) != 0) if ((hashes & Hash.SHA384) != 0) hashTypes.Add(HashType.SHA384);
hashers.Add(new Hasher(Hash.SHA1)); if ((hashes & Hash.SHA512) != 0) hashTypes.Add(HashType.SHA512);
if ((hashes & Hash.SHA256) != 0) if ((hashes & Hash.SpamSum) != 0) hashTypes.Add(HashType.SpamSum);
hashers.Add(new Hasher(Hash.SHA256));
if ((hashes & Hash.SHA384) != 0)
hashers.Add(new Hasher(Hash.SHA384));
if ((hashes & Hash.SHA512) != 0)
hashers.Add(new Hasher(Hash.SHA512));
if ((hashes & Hash.SpamSum) != 0)
hashers.Add(new Hasher(Hash.SpamSum));
#else #else
if (hashes.HasFlag(Hash.CRC)) if (hashes.HasFlag(Hash.CRC)) hashTypes.Add(HashType.CRC32);
hashers.Add(new Hasher(Hash.CRC)); if (hashes.HasFlag(Hash.MD5)) hashTypes.Add(HashType.MD5);
if (hashes.HasFlag(Hash.MD5)) if (hashes.HasFlag(Hash.SHA1)) hashTypes.Add(HashType.SHA1);
hashers.Add(new Hasher(Hash.MD5)); if (hashes.HasFlag(Hash.SHA256)) hashTypes.Add(HashType.SHA256);
if (hashes.HasFlag(Hash.SHA1)) if (hashes.HasFlag(Hash.SHA384)) hashTypes.Add(HashType.SHA384);
hashers.Add(new Hasher(Hash.SHA1)); if (hashes.HasFlag(Hash.SHA512)) hashTypes.Add(HashType.SHA512);
if (hashes.HasFlag(Hash.SHA256)) if (hashes.HasFlag(Hash.SpamSum)) hashTypes.Add(HashType.SpamSum);
hashers.Add(new Hasher(Hash.SHA256));
if (hashes.HasFlag(Hash.SHA384))
hashers.Add(new Hasher(Hash.SHA384));
if (hashes.HasFlag(Hash.SHA512))
hashers.Add(new Hasher(Hash.SHA512));
if (hashes.HasFlag(Hash.SpamSum))
hashers.Add(new Hasher(Hash.SpamSum));
#endif #endif
// Initialize the hashing helpers // Run the hashing on the input stream
int buffersize = 3 * 1024 * 1024; var hashDict = HashTool.GetStreamHashes(input, hashTypes.ToArray());
byte[] buffer = new byte[buffersize]; if (hashDict == null)
return new BaseFile();
/* // Create a base file with the resulting hashes
Please note that some of the following code is adapted from var baseFile = new BaseFile()
RomVault. This is a modified version of how RomVault does
threaded hashing. As such, some of the terminology and code
is the same, though variable names and comments may have
been tweaked to better fit this code base.
*/
// Pre load the buffer
int next = buffersize > size ? (int)size : buffersize;
int current = input.Read(buffer, 0, next);
long refsize = size;
while (refsize > 0)
{
// Run hashes in parallel
if (current > 0)
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(hashers, Globals.ParallelOptions, h => h.Process(buffer, current));
#elif NET40_OR_GREATER
Parallel.ForEach(hashers, h => h.Process(buffer, current));
#else
foreach (var h in hashers)
{
h.Process(buffer, current);
}
#endif
// Load the next buffer
refsize -= current;
next = buffersize > refsize ? (int)refsize : buffersize;
if (next > 0)
{
current = input.Read(buffer, 0, next);
if (current == 0)
break;
}
}
// Finalize all hashing helpers
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(hashers, Globals.ParallelOptions, h => h.Terminate());
#elif NET40_OR_GREATER
Parallel.ForEach(hashers, h => h.Terminate());
#else
foreach (var h in hashers)
{
h.Terminate();
}
#endif
// Get the results
BaseFile baseFile = new()
{ {
Size = size, Size = size,
#if NETFRAMEWORK #if NETFRAMEWORK
CRC = (hashes & Hash.CRC) != 0 ? hashers.First(h => h.HashType == Hash.CRC).GetHash() : null, CRC = (hashes & Hash.CRC) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.CRC32]) : null,
MD5 = (hashes & Hash.MD5) != 0 ? hashers.First(h => h.HashType == Hash.MD5).GetHash() : null, MD5 = (hashes & Hash.MD5) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.MD5]) : null,
SHA1 = (hashes & Hash.SHA1) != 0 ? hashers.First(h => h.HashType == Hash.SHA1).GetHash() : null, SHA1 = (hashes & Hash.SHA1) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA1]) : null,
SHA256 = (hashes & Hash.SHA256) != 0 ? hashers.First(h => h.HashType == Hash.SHA256).GetHash() : null, SHA256 = (hashes & Hash.SHA256) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA256]) : null,
SHA384 = (hashes & Hash.SHA384) != 0 ? hashers.First(h => h.HashType == Hash.SHA384).GetHash() : null, SHA384 = (hashes & Hash.SHA384) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA384]) : null,
SHA512 = (hashes & Hash.SHA512) != 0 ? hashers.First(h => h.HashType == Hash.SHA512).GetHash() : null, SHA512 = (hashes & Hash.SHA512) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA512]) : null,
SpamSum = (hashes & Hash.SpamSum) != 0 ? hashers.First(h => h.HashType == Hash.SpamSum).GetHash() : null, SpamSum = (hashes & Hash.SpamSum) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SpamSum]) : null,
#else #else
CRC = hashes.HasFlag(Hash.CRC) ? hashers.First(h => h.HashType == Hash.CRC).GetHash() : null, CRC = hashes.HasFlag(Hash.CRC) ? TextHelper.StringToByteArray(hashDict[HashType.CRC32]) : null,
MD5 = hashes.HasFlag(Hash.MD5) ? hashers.First(h => h.HashType == Hash.MD5).GetHash() : null, MD5 = hashes.HasFlag(Hash.MD5) ? TextHelper.StringToByteArray(hashDict[HashType.MD5]) : null,
SHA1 = hashes.HasFlag(Hash.SHA1) ? hashers.First(h => h.HashType == Hash.SHA1).GetHash() : null, SHA1 = hashes.HasFlag(Hash.SHA1) ? TextHelper.StringToByteArray(hashDict[HashType.SHA1]) : null,
SHA256 = hashes.HasFlag(Hash.SHA256) ? hashers.First(h => h.HashType == Hash.SHA256).GetHash() : null, SHA256 = hashes.HasFlag(Hash.SHA256) ? TextHelper.StringToByteArray(hashDict[HashType.SHA256]) : null,
SHA384 = hashes.HasFlag(Hash.SHA384) ? hashers.First(h => h.HashType == Hash.SHA384).GetHash() : null, SHA384 = hashes.HasFlag(Hash.SHA384) ? TextHelper.StringToByteArray(hashDict[HashType.SHA384]) : null,
SHA512 = hashes.HasFlag(Hash.SHA512) ? hashers.First(h => h.HashType == Hash.SHA512).GetHash() : null, SHA512 = hashes.HasFlag(Hash.SHA512) ? TextHelper.StringToByteArray(hashDict[HashType.SHA512]) : null,
SpamSum = hashes.HasFlag(Hash.SpamSum) ? hashers.First(h => h.HashType == Hash.SpamSum).GetHash() : null, SpamSum = hashes.HasFlag(Hash.SpamSum) ? TextHelper.StringToByteArray(hashDict[HashType.SpamSum]) : null,
#endif #endif
}; };
// Dispose of the hashers // Deal with the input stream
hashers.ForEach(h => h.Dispose());
return baseFile;
}
catch (IOException ex)
{
LoggerImpl.Warning(ex, "An exception occurred during hashing.");
return new BaseFile();
}
finally
{
if (!keepReadOpen) if (!keepReadOpen)
input.Dispose(); input.Dispose();
else else
input.SeekIfPossible(); input.SeekIfPossible();
}
return baseFile;
} }
/// <summary> /// <summary>

View File

@@ -33,6 +33,7 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="SabreTools.Hashing" Version="1.1.2" />
<PackageReference Include="SabreTools.IO" Version="1.3.0" /> <PackageReference Include="SabreTools.IO" Version="1.3.0" />
<PackageReference Include="SabreTools.Matching" Version="1.3.1" /> <PackageReference Include="SabreTools.Matching" Version="1.3.1" />
<PackageReference Include="SabreTools.Skippers" Version="1.1.2" /> <PackageReference Include="SabreTools.Skippers" Version="1.1.2" />

View File

@@ -1,7 +1,6 @@
using System; using System;
using System.IO; using System.IO;
using System.Text; using System.Text;
using SabreTools.Core; using SabreTools.Core;
using SabreTools.Core.Tools; using SabreTools.Core.Tools;
using SabreTools.DatItems.Formats; using SabreTools.DatItems.Formats;