From 076b80e8058da7ecc0e2d33c265c361514eb7af6 Mon Sep 17 00:00:00 2001 From: Matt Nadareski Date: Mon, 4 Mar 2024 21:20:39 -0500 Subject: [PATCH] Use SabreTools.Hashing --- SabreTools.Core/Tools/Aaru/IChecksum.cs | 58 -- SabreTools.Core/Tools/Aaru/SpamSumContext.cs | 583 ------------------ SabreTools.Core/Tools/Hasher.cs | 164 ----- SabreTools.Core/Tools/OptimizedCRC.cs | 153 ----- SabreTools.FileTypes/BaseFile.cs | 177 ++---- .../SabreTools.FileTypes.csproj | 1 + SabreTools.Test/FileTypes/BaseFileTests.cs | 1 - 7 files changed, 52 insertions(+), 1085 deletions(-) delete mode 100644 SabreTools.Core/Tools/Aaru/IChecksum.cs delete mode 100644 SabreTools.Core/Tools/Aaru/SpamSumContext.cs delete mode 100644 SabreTools.Core/Tools/Hasher.cs delete mode 100644 SabreTools.Core/Tools/OptimizedCRC.cs diff --git a/SabreTools.Core/Tools/Aaru/IChecksum.cs b/SabreTools.Core/Tools/Aaru/IChecksum.cs deleted file mode 100644 index 94acdc8a..00000000 --- a/SabreTools.Core/Tools/Aaru/IChecksum.cs +++ /dev/null @@ -1,58 +0,0 @@ -// /*************************************************************************** -// Aaru Data Preservation Suite -// ---------------------------------------------------------------------------- -// -// Filename : IChecksum.cs -// Author(s) : Natalia Portillo -// -// Component : Checksums. -// -// --[ Description ] ---------------------------------------------------------- -// -// Provides an interface for implementing checksums and hashes. -// -// --[ License ] -------------------------------------------------------------- -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the -// "Software"), to deal in the Software without restriction, including -// without limitation the rights to use, copy, modify, merge, publish, -// distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to -// the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// -// ---------------------------------------------------------------------------- -// Copyright © 2011-2020 Natalia Portillo -// ****************************************************************************/ - -namespace Aaru.CommonTypes.Interfaces -{ - public interface IChecksum - { - /// Updates the hash with data. - /// Data buffer. - /// Length of buffer to hash. - void Update(byte[] data, uint len); - - /// Updates the hash with data. - /// Data buffer. - void Update(byte[] data); - - /// Returns a byte array of the hash value. - byte[] Final(); - - /// Returns a hexadecimal representation of the hash value. - string End(); - } -} \ No newline at end of file diff --git a/SabreTools.Core/Tools/Aaru/SpamSumContext.cs b/SabreTools.Core/Tools/Aaru/SpamSumContext.cs deleted file mode 100644 index ea1d4f5e..00000000 --- a/SabreTools.Core/Tools/Aaru/SpamSumContext.cs +++ /dev/null @@ -1,583 +0,0 @@ -// /*************************************************************************** -// Aaru Data Preservation Suite -// ---------------------------------------------------------------------------- -// -// Filename : SpamSumContext.cs -// Author(s) : Natalia Portillo -// -// Component : Checksums. -// -// --[ Description ] ---------------------------------------------------------- -// -// Implements the SpamSum fuzzy hashing algorithm. -// -// --[ License ] -------------------------------------------------------------- -// -// This library is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; either version 2.1 of the -// License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, see . -// -// ---------------------------------------------------------------------------- -// Copyright © 2011-2020 Natalia Portillo -// ****************************************************************************/ - -// Based on ssdeep -// Copyright (C) 2002 Andrew Tridgell -// Copyright (C) 2006 ManTech International Corporation -// Copyright (C) 2013 Helmut Grohne -// -// Earlier versions of this code were named fuzzy.c and can be found at: -// http://www.samba.org/ftp/unpacked/junkcode/spamsum/ -// http://ssdeep.sf.net/ - -using System; -using System.Runtime.CompilerServices; -using System.Text; -using Aaru.CommonTypes.Interfaces; - -namespace Aaru.Checksums -{ - /// Implements the SpamSum fuzzy hashing algorithm. - public sealed class SpamSumContext : IChecksum, IDisposable - { - const uint ROLLING_WINDOW = 7; - const uint MIN_BLOCKSIZE = 3; - const uint HASH_PRIME = 0x01000193; - const uint HASH_INIT = 0x28021967; - const uint NUM_BLOCKHASHES = 31; - const uint SPAMSUM_LENGTH = 64; - const uint FUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH) + 20; - - //"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - readonly byte[] _b64 = - { - 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, - 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, - 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, - 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F - }; - - FuzzyState _self; - - /// Initializes the SpamSum structures - public SpamSumContext() - { - _self = new FuzzyState - { - Bh = new BlockhashContext[NUM_BLOCKHASHES] - }; - - for (int i = 0; i < NUM_BLOCKHASHES; i++) - _self.Bh[i].Digest = new byte[SPAMSUM_LENGTH]; - - _self.Bhstart = 0; - _self.Bhend = 1; - _self.Bh[0].H = HASH_INIT; - _self.Bh[0].Halfh = HASH_INIT; - _self.Bh[0].Digest[0] = 0; - _self.Bh[0].Halfdigest = 0; - _self.Bh[0].Dlen = 0; - _self.TotalSize = 0; - roll_init(); - } - - /// - /// Updates the hash with data. - /// Data buffer. - /// Length of buffer to hash. - public void Update(byte[] data, uint len) - { - _self.TotalSize += len; - - for (int i = 0; i < len; i++) - fuzzy_engine_step(data[i]); - } - - /// - /// Updates the hash with data. - /// Data buffer. - public void Update(byte[] data) => Update(data, (uint)data.Length); - - /// - /// Returns a byte array of the hash value. - public byte[] Final() - { - FuzzyDigest(out byte[] result); - - return CToArray(result); - } - - /// - /// Returns a base64 representation of the hash value. - public string End() - { - FuzzyDigest(out byte[] result); - - return CToString(result); - } - -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - void roll_init() => _self.Roll = new RollState - { - Window = new byte[ROLLING_WINDOW] - }; - - /* - * a rolling hash, based on the Adler checksum. By using a rolling hash - * we can perform auto resynchronisation after inserts/deletes - - * internally, h1 is the sum of the bytes in the window and h2 - * is the sum of the bytes times the index - - * h3 is a shift/xor based rolling hash, and is mostly needed to ensure that - * we can cope with large blocksize values - */ -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - void roll_hash(byte c) - { - _self.Roll.H2 -= _self.Roll.H1; - _self.Roll.H2 += ROLLING_WINDOW * c; - - _self.Roll.H1 += c; - _self.Roll.H1 -= _self.Roll.Window[_self.Roll.N % ROLLING_WINDOW]; - - _self.Roll.Window[_self.Roll.N % ROLLING_WINDOW] = c; - _self.Roll.N++; - - /* The original spamsum AND'ed this value with 0xFFFFFFFF which - * in theory should have no effect. This AND has been removed - * for performance (jk) */ - _self.Roll.H3 <<= 5; - _self.Roll.H3 ^= c; - } - -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - uint roll_sum() => _self.Roll.H1 + _self.Roll.H2 + _self.Roll.H3; - - /* A simple non-rolling hash, based on the FNV hash. */ -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - static uint sum_hash(byte c, uint h) => (h * HASH_PRIME) ^ c; - -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - static uint SSDEEP_BS(uint index) => MIN_BLOCKSIZE << (int)index; - -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - void fuzzy_try_fork_blockhash() - { - if (_self.Bhend >= NUM_BLOCKHASHES) - return; - - if (_self.Bhend == 0) // assert - throw new Exception("Assertion failed"); - - uint obh = _self.Bhend - 1; - uint nbh = _self.Bhend; - _self.Bh[nbh].H = _self.Bh[obh].H; - _self.Bh[nbh].Halfh = _self.Bh[obh].Halfh; - _self.Bh[nbh].Digest[0] = 0; - _self.Bh[nbh].Halfdigest = 0; - _self.Bh[nbh].Dlen = 0; - ++_self.Bhend; - } - -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - void fuzzy_try_reduce_blockhash() - { - if (_self.Bhstart >= _self.Bhend) - throw new Exception("Assertion failed"); - - if (_self.Bhend - _self.Bhstart < 2) - /* Need at least two working hashes. */ - return; - - if ((ulong)SSDEEP_BS(_self.Bhstart) * SPAMSUM_LENGTH >= _self.TotalSize) - /* Initial blocksize estimate would select this or a smaller - * blocksize. */ - return; - - if (_self.Bh[_self.Bhstart + 1].Dlen < SPAMSUM_LENGTH / 2) - /* Estimate adjustment would select this blocksize. */ - return; - - /* At this point we are clearly no longer interested in the - * start_blocksize. Get rid of it. */ - ++_self.Bhstart; - } - -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - void fuzzy_engine_step(byte c) - { - uint i; - /* At each character we update the rolling hash and the normal hashes. - * When the rolling hash hits a reset value then we emit a normal hash - * as a element of the signature and reset the normal hash. */ - roll_hash(c); - ulong h = roll_sum(); - - for (i = _self.Bhstart; i < _self.Bhend; ++i) - { - _self.Bh[i].H = sum_hash(c, _self.Bh[i].H); - _self.Bh[i].Halfh = sum_hash(c, _self.Bh[i].Halfh); - } - - for (i = _self.Bhstart; i < _self.Bhend; ++i) - { - /* With growing blocksize almost no runs fail the next test. */ - if (h % SSDEEP_BS(i) != SSDEEP_BS(i) - 1) - /* Once this condition is false for one bs, it is - * automatically false for all further bs. I.e. if - * h === -1 (mod 2*bs) then h === -1 (mod bs). */ - break; - - /* We have hit a reset point. We now emit hashes which are - * based on all characters in the piece of the message between - * the last reset point and this one */ - if (0 == _self.Bh[i].Dlen) - fuzzy_try_fork_blockhash(); - - _self.Bh[i].Digest[_self.Bh[i].Dlen] = _b64[_self.Bh[i].H % 64]; - _self.Bh[i].Halfdigest = _b64[_self.Bh[i].Halfh % 64]; - - if (_self.Bh[i].Dlen < SPAMSUM_LENGTH - 1) - { - /* We can have a problem with the tail overflowing. The - * easiest way to cope with this is to only reset the - * normal hash if we have room for more characters in - * our signature. This has the effect of combining the - * last few pieces of the message into a single piece - * */ - _self.Bh[i].Digest[++_self.Bh[i].Dlen] = 0; - _self.Bh[i].H = HASH_INIT; - - if (_self.Bh[i].Dlen >= SPAMSUM_LENGTH / 2) - continue; - - _self.Bh[i].Halfh = HASH_INIT; - _self.Bh[i].Halfdigest = 0; - } - else - fuzzy_try_reduce_blockhash(); - } - } - - // CLAUNIA: Flags seems to never be used in ssdeep, so I just removed it for code simplicity -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - void FuzzyDigest(out byte[] result) - { - var sb = new StringBuilder(); - uint bi = _self.Bhstart; - uint h = roll_sum(); - int remain = (int)(FUZZY_MAX_RESULT - 1); /* Exclude terminating '\0'. */ - result = new byte[FUZZY_MAX_RESULT]; - - /* Verify that our elimination was not overeager. */ - if (!(bi == 0 || ((ulong)SSDEEP_BS(bi) / 2) * SPAMSUM_LENGTH < _self.TotalSize)) - throw new Exception("Assertion failed"); - - int resultOff = 0; - - /* Initial blocksize guess. */ - while ((ulong)SSDEEP_BS(bi) * SPAMSUM_LENGTH < _self.TotalSize) - { - ++bi; - - if (bi >= NUM_BLOCKHASHES) - throw new OverflowException("The input exceeds data types."); - } - - /* Adapt blocksize guess to actual digest length. */ - while (bi >= _self.Bhend) - --bi; - - while (bi > _self.Bhstart && - _self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2) - --bi; - - if (bi > 0 && - _self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2) - throw new Exception("Assertion failed"); - - sb.AppendFormat("{0}:", SSDEEP_BS(bi)); - int i = Encoding.ASCII.GetBytes(sb.ToString()).Length; - - if (i <= 0) - /* Maybe snprintf has set errno here? */ - throw new OverflowException("The input exceeds data types."); - - if (i >= remain) - throw new Exception("Assertion failed"); - - remain -= i; - - Array.Copy(Encoding.ASCII.GetBytes(sb.ToString()), 0, result, 0, i); - - resultOff = i; - - i = (int)_self.Bh[bi].Dlen; - - if (i > remain) - throw new Exception("Assertion failed"); - - Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i); - resultOff += i; - remain -= i; - - if (h != 0) - { - if (remain <= 0) - throw new Exception("Assertion failed"); - - result[resultOff] = _b64[_self.Bh[bi].H % 64]; - - if (i < 3 || - result[resultOff] != result[resultOff - 1] || - result[resultOff] != result[resultOff - 2] || - result[resultOff] != result[resultOff - 3]) - { - ++resultOff; - --remain; - } - } - else if (_self.Bh[bi].Digest[i] != 0) - { - if (remain <= 0) - throw new Exception("Assertion failed"); - - result[resultOff] = _self.Bh[bi].Digest[i]; - - if (i < 3 || - result[resultOff] != result[resultOff - 1] || - result[resultOff] != result[resultOff - 2] || - result[resultOff] != result[resultOff - 3]) - { - ++resultOff; - --remain; - } - } - - if (remain <= 0) - throw new Exception("Assertion failed"); - - result[resultOff++] = 0x3A; // ':' - --remain; - - if (bi < _self.Bhend - 1) - { - ++bi; - i = (int)_self.Bh[bi].Dlen; - - if (i > remain) - throw new Exception("Assertion failed"); - - Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i); - resultOff += i; - remain -= i; - - if (h != 0) - { - if (remain <= 0) - throw new Exception("Assertion failed"); - - h = _self.Bh[bi].Halfh; - result[resultOff] = _b64[h % 64]; - - if (i < 3 || - result[resultOff] != result[resultOff - 1] || - result[resultOff] != result[resultOff - 2] || - result[resultOff] != result[resultOff - 3]) - { - ++resultOff; - --remain; - } - } - else - { - i = _self.Bh[bi].Halfdigest; - - if (i != 0) - { - if (remain <= 0) - throw new Exception("Assertion failed"); - - result[resultOff] = (byte)i; - - if (i < 3 || - result[resultOff] != result[resultOff - 1] || - result[resultOff] != result[resultOff - 2] || - result[resultOff] != result[resultOff - 3]) - { - ++resultOff; - --remain; - } - } - } - } - else if (h != 0) - { - if (_self.Bh[bi].Dlen != 0) - throw new Exception("Assertion failed"); - - if (remain <= 0) - throw new Exception("Assertion failed"); - - result[resultOff++] = _b64[_self.Bh[bi].H % 64]; - /* No need to bother with FUZZY_FLAG_ELIMSEQ, because this - * digest has length 1. */ - --remain; - } - - result[resultOff] = 0; - } - - /// Gets the hash of a file - /// File path. - public static byte[] File(string filename) => - throw new NotImplementedException("SpamSum does not have a binary representation."); - - /// Gets the hash of a file in hexadecimal and as a byte array. - /// File path. - /// Byte array of the hash value. - public static string File(string filename, out byte[] hash) => - throw new NotImplementedException("Not yet implemented."); - - /// Gets the hash of the specified data buffer. - /// Data buffer. - /// Length of the data buffer to hash. - /// null - /// Base64 representation of SpamSum $blocksize:$hash:$hash - public static string Data(byte[] data, uint len, out byte[]? hash) - { - var fuzzyContext = new SpamSumContext(); - - fuzzyContext.Update(data, len); - - hash = null; - - return fuzzyContext.End(); - } - - /// Gets the hash of the specified data buffer. - /// Data buffer. - /// null - /// Base64 representation of SpamSum $blocksize:$hash:$hash - public static string Data(byte[] data, out byte[]? hash) => Data(data, (uint)data.Length, out hash); - - // Converts an ASCII null-terminated string to .NET string -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - static string CToString(byte[] cString) - { - int count = 0; - - // ReSharper disable once LoopCanBeConvertedToQuery - // LINQ is six times slower - foreach (byte c in cString) - { - if (c == 0) - break; - - count++; - } - - return Encoding.ASCII.GetString(cString, 0, count); - } - - // Converts an ASCII null-terminated string to .NET string -#if NET452_OR_GREATER || NETCOREAPP - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - static byte[] CToArray(byte[] cString) - { - int count = 0; - - // ReSharper disable once LoopCanBeConvertedToQuery - // LINQ is six times slower - foreach (byte c in cString) - { - if (c == 0) - break; - - count++; - } - -#if NETFRAMEWORK - byte[] temp = new byte[count]; - Array.Copy(cString, temp, count); - return temp; -#else - return new ReadOnlySpan(cString, 0, count).ToArray(); -#endif - } - - public void Dispose() - { - // TODO: No-op to dispose for now - } - - struct RollState - { - public byte[] Window; - - // ROLLING_WINDOW - public uint H1; - public uint H2; - public uint H3; - public uint N; - } - - /* A blockhash contains a signature state for a specific (implicit) blocksize. - * The blocksize is given by SSDEEP_BS(index). The h and halfh members are the - * FNV hashes, where halfh stops to be reset after digest is SPAMSUM_LENGTH/2 - * long. The halfh hash is needed be able to truncate digest for the second - * output hash to stay compatible with ssdeep output. */ - struct BlockhashContext - { - public uint H; - public uint Halfh; - public byte[] Digest; - - // SPAMSUM_LENGTH - public byte Halfdigest; - public uint Dlen; - } - - struct FuzzyState - { - public uint Bhstart; - public uint Bhend; - public BlockhashContext[] Bh; - - //NUM_BLOCKHASHES - public ulong TotalSize; - public RollState Roll; - } - } -} \ No newline at end of file diff --git a/SabreTools.Core/Tools/Hasher.cs b/SabreTools.Core/Tools/Hasher.cs deleted file mode 100644 index 3594d346..00000000 --- a/SabreTools.Core/Tools/Hasher.cs +++ /dev/null @@ -1,164 +0,0 @@ -using System; -using System.Linq; -using System.Security.Cryptography; - -using Aaru.Checksums; - -namespace SabreTools.Core.Tools -{ - /// - /// Async hashing class wraper - /// - public class Hasher - { - public Hash HashType { get; private set; } - private IDisposable? _hasher; - - public Hasher(Hash hashType) - { - this.HashType = hashType; - GetHasher(); - } - - /// - /// Generate the correct hashing class based on the hash type - /// - private void GetHasher() - { - switch (HashType) - { - case Hash.CRC: - _hasher = new OptimizedCRC.OptimizedCRC(); - break; - - case Hash.MD5: - _hasher = MD5.Create(); - break; - - case Hash.SHA1: - _hasher = SHA1.Create(); - break; - - case Hash.SHA256: - _hasher = SHA256.Create(); - break; - - case Hash.SHA384: - _hasher = SHA384.Create(); - break; - - case Hash.SHA512: - _hasher = SHA512.Create(); - break; - - case Hash.SpamSum: - _hasher = new SpamSumContext(); - break; - } - } - - public void Dispose() - { - _hasher?.Dispose(); - } - - /// - /// Process a buffer of some length with the internal hash algorithm - /// - public void Process(byte[] buffer, int size) - { - if (_hasher == null) - return; - - switch (_hasher) - { - case OptimizedCRC.OptimizedCRC crc: - crc.Update(buffer, 0, size); - break; - - case MD5 md5: - md5.TransformBlock(buffer, 0, size, null, 0); - break; - - case SHA1 sha1: - sha1.TransformBlock(buffer, 0, size, null, 0); - break; - - case SHA256 sha256: - sha256.TransformBlock(buffer, 0, size, null, 0); - break; - - case SHA384 sha384: - sha384.TransformBlock(buffer, 0, size, null, 0); - break; - - case SHA512 sha512: - sha512.TransformBlock(buffer, 0, size, null, 0); - break; - - case SpamSumContext spamSum: - spamSum.Update(buffer); - break; - } - } - - /// - /// Terminate the internal hash algorigthm - /// - public void Terminate() - { - if (_hasher == null) - return; - - byte[] emptyBuffer = []; - switch (_hasher) - { - case OptimizedCRC.OptimizedCRC crc: - crc.Update(emptyBuffer, 0, 0); - break; - - case MD5 md5: - md5.TransformFinalBlock(emptyBuffer, 0, 0); - break; - - case SHA1 sha1: - sha1.TransformFinalBlock(emptyBuffer, 0, 0); - break; - - case SHA256 sha256: - sha256.TransformFinalBlock(emptyBuffer, 0, 0); - break; - - case SHA384 sha384: - sha384.TransformFinalBlock(emptyBuffer, 0, 0); - break; - - case SHA512 sha512: - sha512.TransformFinalBlock(emptyBuffer, 0, 0); - break; - - case SpamSumContext: - // No finalization step needed - break; - } - } - - /// - /// Get internal hash as a byte array - /// - public byte[]? GetHash() - { - return _hasher switch - { - OptimizedCRC.OptimizedCRC crc => BitConverter.GetBytes(crc.Value).Reverse().ToArray(), - MD5 md5 => md5.Hash, - SHA1 sha1 => sha1.Hash, - SHA256 sha256 => sha256.Hash, - SHA384 sha384 => sha384.Hash, - SHA512 sha512 => sha512.Hash, - SpamSumContext spamSum => spamSum.Final(), - _ => null, - }; - } - } -} diff --git a/SabreTools.Core/Tools/OptimizedCRC.cs b/SabreTools.Core/Tools/OptimizedCRC.cs deleted file mode 100644 index 700800be..00000000 --- a/SabreTools.Core/Tools/OptimizedCRC.cs +++ /dev/null @@ -1,153 +0,0 @@ -/* - - Copyright (c) 2012-2015 Eugene Larchenko (spct@mail.ru) - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - -*/ - -using System; - -namespace OptimizedCRC -{ - internal class OptimizedCRC : IDisposable - { - private const uint kCrcPoly = 0xEDB88320; - private const uint kInitial = 0xFFFFFFFF; - private const int CRC_NUM_TABLES = 8; - private static readonly uint[] Table; - - static OptimizedCRC() - { - unchecked - { - Table = new uint[256 * CRC_NUM_TABLES]; - int i; - for (i = 0; i < 256; i++) - { - uint r = (uint)i; - for (int j = 0; j < 8; j++) - { - r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); - } - Table[i] = r; - } - for (; i < 256 * CRC_NUM_TABLES; i++) - { - uint r = Table[i - 256]; - Table[i] = Table[r & 0xFF] ^ (r >> 8); - } - } - } - - public uint UnsignedValue; - - public OptimizedCRC() - { - Init(); - } - - /// - /// Reset CRC - /// - public void Init() - { - UnsignedValue = kInitial; - } - - public int Value - { - get { return (int)~UnsignedValue; } - } - - public void Update(byte[] data, int offset, int count) - { - new ArraySegment(data, offset, count); // check arguments - if (count == 0) - { - return; - } - - var table = OptimizedCRC.Table; - - uint crc = UnsignedValue; - - for (; (offset & 7) != 0 && count != 0; count--) - { - crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]]; - } - - if (count >= 8) - { - /* - * Idea from 7-zip project sources (http://7-zip.org/sdk.html) - */ - - int end = (count - 8) & ~7; - count -= end; - end += offset; - - while (offset != end) - { - crc ^= (uint)(data[offset] + (data[offset + 1] << 8) + (data[offset + 2] << 16) + (data[offset + 3] << 24)); - uint high = (uint)(data[offset + 4] + (data[offset + 5] << 8) + (data[offset + 6] << 16) + (data[offset + 7] << 24)); - offset += 8; - - crc = table[(byte)crc + 0x700] - ^ table[(byte)(crc >>= 8) + 0x600] - ^ table[(byte)(crc >>= 8) + 0x500] - ^ table[/*(byte)*/(crc >> 8) + 0x400] - ^ table[(byte)(high) + 0x300] - ^ table[(byte)(high >>= 8) + 0x200] - ^ table[(byte)(high >>= 8) + 0x100] - ^ table[/*(byte)*/(high >> 8) + 0x000]; - } - } - - while (count-- != 0) - { - crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]]; - } - - UnsignedValue = crc; - } - - static public int Compute(byte[] data, int offset, int count) - { - var crc = new OptimizedCRC(); - crc.Update(data, offset, count); - return crc.Value; - } - - static public int Compute(byte[] data) - { - return Compute(data, 0, data.Length); - } - - static public int Compute(ArraySegment block) - { - return Compute(block.Array!, block.Offset, block.Count); - } - - public void Dispose() - { - UnsignedValue = 0; - } - } -} \ No newline at end of file diff --git a/SabreTools.FileTypes/BaseFile.cs b/SabreTools.FileTypes/BaseFile.cs index 8cc6ac2a..a06f4c64 100644 --- a/SabreTools.FileTypes/BaseFile.cs +++ b/SabreTools.FileTypes/BaseFile.cs @@ -2,10 +2,12 @@ using System.IO; using System.Linq; using System.Threading.Tasks; +using Compress.Support.Compression.LZMA; using SabreTools.Core; using SabreTools.Core.Tools; using SabreTools.FileTypes.Aaru; using SabreTools.FileTypes.CHD; +using SabreTools.Hashing; using SabreTools.IO; using SabreTools.Logging; using SabreTools.Matching; @@ -345,139 +347,62 @@ namespace SabreTools.FileTypes if (size == -1) size = input.Length; - try - { - // Get a list of hashers to run over the buffer - List hashers = []; + // Get a list of hash types to run + List hashTypes = []; #if NETFRAMEWORK - if ((hashes & Hash.CRC) != 0) - hashers.Add(new Hasher(Hash.CRC)); - if ((hashes & Hash.MD5) != 0) - hashers.Add(new Hasher(Hash.MD5)); - if ((hashes & Hash.SHA1) != 0) - hashers.Add(new Hasher(Hash.SHA1)); - if ((hashes & Hash.SHA256) != 0) - hashers.Add(new Hasher(Hash.SHA256)); - if ((hashes & Hash.SHA384) != 0) - hashers.Add(new Hasher(Hash.SHA384)); - if ((hashes & Hash.SHA512) != 0) - hashers.Add(new Hasher(Hash.SHA512)); - if ((hashes & Hash.SpamSum) != 0) - hashers.Add(new Hasher(Hash.SpamSum)); + if ((hashes & Hash.CRC) != 0) hashTypes.Add(HashType.CRC32); + if ((hashes & Hash.MD5) != 0) hashTypes.Add(HashType.MD5); + if ((hashes & Hash.SHA1) != 0) hashTypes.Add(HashType.SHA1); + if ((hashes & Hash.SHA256) != 0) hashTypes.Add(HashType.SHA256); + if ((hashes & Hash.SHA384) != 0) hashTypes.Add(HashType.SHA384); + if ((hashes & Hash.SHA512) != 0) hashTypes.Add(HashType.SHA512); + if ((hashes & Hash.SpamSum) != 0) hashTypes.Add(HashType.SpamSum); #else - if (hashes.HasFlag(Hash.CRC)) - hashers.Add(new Hasher(Hash.CRC)); - if (hashes.HasFlag(Hash.MD5)) - hashers.Add(new Hasher(Hash.MD5)); - if (hashes.HasFlag(Hash.SHA1)) - hashers.Add(new Hasher(Hash.SHA1)); - if (hashes.HasFlag(Hash.SHA256)) - hashers.Add(new Hasher(Hash.SHA256)); - if (hashes.HasFlag(Hash.SHA384)) - hashers.Add(new Hasher(Hash.SHA384)); - if (hashes.HasFlag(Hash.SHA512)) - hashers.Add(new Hasher(Hash.SHA512)); - if (hashes.HasFlag(Hash.SpamSum)) - hashers.Add(new Hasher(Hash.SpamSum)); + if (hashes.HasFlag(Hash.CRC)) hashTypes.Add(HashType.CRC32); + if (hashes.HasFlag(Hash.MD5)) hashTypes.Add(HashType.MD5); + if (hashes.HasFlag(Hash.SHA1)) hashTypes.Add(HashType.SHA1); + if (hashes.HasFlag(Hash.SHA256)) hashTypes.Add(HashType.SHA256); + if (hashes.HasFlag(Hash.SHA384)) hashTypes.Add(HashType.SHA384); + if (hashes.HasFlag(Hash.SHA512)) hashTypes.Add(HashType.SHA512); + if (hashes.HasFlag(Hash.SpamSum)) hashTypes.Add(HashType.SpamSum); #endif - // Initialize the hashing helpers - int buffersize = 3 * 1024 * 1024; - byte[] buffer = new byte[buffersize]; - - /* - Please note that some of the following code is adapted from - RomVault. This is a modified version of how RomVault does - threaded hashing. As such, some of the terminology and code - is the same, though variable names and comments may have - been tweaked to better fit this code base. - */ - - // Pre load the buffer - int next = buffersize > size ? (int)size : buffersize; - int current = input.Read(buffer, 0, next); - long refsize = size; - - while (refsize > 0) - { - // Run hashes in parallel - if (current > 0) -#if NET452_OR_GREATER || NETCOREAPP - Parallel.ForEach(hashers, Globals.ParallelOptions, h => h.Process(buffer, current)); -#elif NET40_OR_GREATER - Parallel.ForEach(hashers, h => h.Process(buffer, current)); -#else - foreach (var h in hashers) - { - h.Process(buffer, current); - } -#endif - - // Load the next buffer - refsize -= current; - next = buffersize > refsize ? (int)refsize : buffersize; - - if (next > 0) - { - current = input.Read(buffer, 0, next); - if (current == 0) - break; - } - } - - // Finalize all hashing helpers -#if NET452_OR_GREATER || NETCOREAPP - Parallel.ForEach(hashers, Globals.ParallelOptions, h => h.Terminate()); -#elif NET40_OR_GREATER - Parallel.ForEach(hashers, h => h.Terminate()); -#else - foreach (var h in hashers) - { - h.Terminate(); - } -#endif - - // Get the results - BaseFile baseFile = new() - { - Size = size, -#if NETFRAMEWORK - CRC = (hashes & Hash.CRC) != 0 ? hashers.First(h => h.HashType == Hash.CRC).GetHash() : null, - MD5 = (hashes & Hash.MD5) != 0 ? hashers.First(h => h.HashType == Hash.MD5).GetHash() : null, - SHA1 = (hashes & Hash.SHA1) != 0 ? hashers.First(h => h.HashType == Hash.SHA1).GetHash() : null, - SHA256 = (hashes & Hash.SHA256) != 0 ? hashers.First(h => h.HashType == Hash.SHA256).GetHash() : null, - SHA384 = (hashes & Hash.SHA384) != 0 ? hashers.First(h => h.HashType == Hash.SHA384).GetHash() : null, - SHA512 = (hashes & Hash.SHA512) != 0 ? hashers.First(h => h.HashType == Hash.SHA512).GetHash() : null, - SpamSum = (hashes & Hash.SpamSum) != 0 ? hashers.First(h => h.HashType == Hash.SpamSum).GetHash() : null, -#else - CRC = hashes.HasFlag(Hash.CRC) ? hashers.First(h => h.HashType == Hash.CRC).GetHash() : null, - MD5 = hashes.HasFlag(Hash.MD5) ? hashers.First(h => h.HashType == Hash.MD5).GetHash() : null, - SHA1 = hashes.HasFlag(Hash.SHA1) ? hashers.First(h => h.HashType == Hash.SHA1).GetHash() : null, - SHA256 = hashes.HasFlag(Hash.SHA256) ? hashers.First(h => h.HashType == Hash.SHA256).GetHash() : null, - SHA384 = hashes.HasFlag(Hash.SHA384) ? hashers.First(h => h.HashType == Hash.SHA384).GetHash() : null, - SHA512 = hashes.HasFlag(Hash.SHA512) ? hashers.First(h => h.HashType == Hash.SHA512).GetHash() : null, - SpamSum = hashes.HasFlag(Hash.SpamSum) ? hashers.First(h => h.HashType == Hash.SpamSum).GetHash() : null, -#endif - }; - - // Dispose of the hashers - hashers.ForEach(h => h.Dispose()); - - return baseFile; - } - catch (IOException ex) - { - LoggerImpl.Warning(ex, "An exception occurred during hashing."); + // Run the hashing on the input stream + var hashDict = HashTool.GetStreamHashes(input, hashTypes.ToArray()); + if (hashDict == null) return new BaseFile(); - } - finally + + // Create a base file with the resulting hashes + var baseFile = new BaseFile() { - if (!keepReadOpen) - input.Dispose(); - else - input.SeekIfPossible(); - } + Size = size, +#if NETFRAMEWORK + CRC = (hashes & Hash.CRC) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.CRC32]) : null, + MD5 = (hashes & Hash.MD5) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.MD5]) : null, + SHA1 = (hashes & Hash.SHA1) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA1]) : null, + SHA256 = (hashes & Hash.SHA256) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA256]) : null, + SHA384 = (hashes & Hash.SHA384) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA384]) : null, + SHA512 = (hashes & Hash.SHA512) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SHA512]) : null, + SpamSum = (hashes & Hash.SpamSum) != 0 ? TextHelper.StringToByteArray(hashDict[HashType.SpamSum]) : null, +#else + CRC = hashes.HasFlag(Hash.CRC) ? TextHelper.StringToByteArray(hashDict[HashType.CRC32]) : null, + MD5 = hashes.HasFlag(Hash.MD5) ? TextHelper.StringToByteArray(hashDict[HashType.MD5]) : null, + SHA1 = hashes.HasFlag(Hash.SHA1) ? TextHelper.StringToByteArray(hashDict[HashType.SHA1]) : null, + SHA256 = hashes.HasFlag(Hash.SHA256) ? TextHelper.StringToByteArray(hashDict[HashType.SHA256]) : null, + SHA384 = hashes.HasFlag(Hash.SHA384) ? TextHelper.StringToByteArray(hashDict[HashType.SHA384]) : null, + SHA512 = hashes.HasFlag(Hash.SHA512) ? TextHelper.StringToByteArray(hashDict[HashType.SHA512]) : null, + SpamSum = hashes.HasFlag(Hash.SpamSum) ? TextHelper.StringToByteArray(hashDict[HashType.SpamSum]) : null, +#endif + }; + + // Deal with the input stream + if (!keepReadOpen) + input.Dispose(); + else + input.SeekIfPossible(); + + return baseFile; } /// diff --git a/SabreTools.FileTypes/SabreTools.FileTypes.csproj b/SabreTools.FileTypes/SabreTools.FileTypes.csproj index e209aa41..d94d736e 100644 --- a/SabreTools.FileTypes/SabreTools.FileTypes.csproj +++ b/SabreTools.FileTypes/SabreTools.FileTypes.csproj @@ -33,6 +33,7 @@ + diff --git a/SabreTools.Test/FileTypes/BaseFileTests.cs b/SabreTools.Test/FileTypes/BaseFileTests.cs index d815418a..5ee132b1 100644 --- a/SabreTools.Test/FileTypes/BaseFileTests.cs +++ b/SabreTools.Test/FileTypes/BaseFileTests.cs @@ -1,7 +1,6 @@ using System; using System.IO; using System.Text; - using SabreTools.Core; using SabreTools.Core.Tools; using SabreTools.DatItems.Formats;