Distinguish IO extensions and helpers

This commit is contained in:
Matt Nadareski
2020-12-10 22:16:53 -08:00
parent 7141904fc8
commit 58b3cda921
47 changed files with 325 additions and 353 deletions

View File

@@ -1,58 +0,0 @@
// /***************************************************************************
// Aaru Data Preservation Suite
// ----------------------------------------------------------------------------
//
// Filename : IChecksum.cs
// Author(s) : Natalia Portillo <claunia@claunia.com>
//
// Component : Checksums.
//
// --[ Description ] ----------------------------------------------------------
//
// Provides an interface for implementing checksums and hashes.
//
// --[ License ] --------------------------------------------------------------
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
// ----------------------------------------------------------------------------
// Copyright © 2011-2020 Natalia Portillo
// ****************************************************************************/
namespace Aaru.CommonTypes.Interfaces
{
public interface IChecksum
{
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of buffer to hash.</param>
void Update(byte[] data, uint len);
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
void Update(byte[] data);
/// <summary>Returns a byte array of the hash value.</summary>
byte[] Final();
/// <summary>Returns a hexadecimal representation of the hash value.</summary>
string End();
}
}

View File

@@ -1,561 +0,0 @@
// /***************************************************************************
// Aaru Data Preservation Suite
// ----------------------------------------------------------------------------
//
// Filename : SpamSumContext.cs
// Author(s) : Natalia Portillo <claunia@claunia.com>
//
// Component : Checksums.
//
// --[ Description ] ----------------------------------------------------------
//
// Implements the SpamSum fuzzy hashing algorithm.
//
// --[ License ] --------------------------------------------------------------
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation; either version 2.1 of the
// License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
//
// ----------------------------------------------------------------------------
// Copyright © 2011-2020 Natalia Portillo
// ****************************************************************************/
// Based on ssdeep
// Copyright (C) 2002 Andrew Tridgell <tridge@samba.org>
// Copyright (C) 2006 ManTech International Corporation
// Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
//
// Earlier versions of this code were named fuzzy.c and can be found at:
// http://www.samba.org/ftp/unpacked/junkcode/spamsum/
// http://ssdeep.sf.net/
using System;
using System.Runtime.CompilerServices;
using System.Text;
using Aaru.CommonTypes.Interfaces;
namespace Aaru.Checksums
{
/// <summary>Implements the SpamSum fuzzy hashing algorithm.</summary>
public sealed class SpamSumContext : IChecksum, IDisposable
{
const uint ROLLING_WINDOW = 7;
const uint MIN_BLOCKSIZE = 3;
const uint HASH_PRIME = 0x01000193;
const uint HASH_INIT = 0x28021967;
const uint NUM_BLOCKHASHES = 31;
const uint SPAMSUM_LENGTH = 64;
const uint FUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH) + 20;
//"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
readonly byte[] _b64 =
{
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52,
0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A,
0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31,
0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
};
FuzzyState _self;
/// <summary>Initializes the SpamSum structures</summary>
public SpamSumContext()
{
_self = new FuzzyState
{
Bh = new BlockhashContext[NUM_BLOCKHASHES]
};
for(int i = 0; i < NUM_BLOCKHASHES; i++)
_self.Bh[i].Digest = new byte[SPAMSUM_LENGTH];
_self.Bhstart = 0;
_self.Bhend = 1;
_self.Bh[0].H = HASH_INIT;
_self.Bh[0].Halfh = HASH_INIT;
_self.Bh[0].Digest[0] = 0;
_self.Bh[0].Halfdigest = 0;
_self.Bh[0].Dlen = 0;
_self.TotalSize = 0;
roll_init();
}
/// <inheritdoc />
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of buffer to hash.</param>
public void Update(byte[] data, uint len)
{
_self.TotalSize += len;
for(int i = 0; i < len; i++)
fuzzy_engine_step(data[i]);
}
/// <inheritdoc />
/// <summary>Updates the hash with data.</summary>
/// <param name="data">Data buffer.</param>
public void Update(byte[] data) => Update(data, (uint)data.Length);
/// <inheritdoc />
/// <summary>Returns a byte array of the hash value.</summary>
public byte[] Final()
{
FuzzyDigest(out byte[] result);
return CToArray(result);
}
/// <inheritdoc />
/// <summary>Returns a base64 representation of the hash value.</summary>
public string End()
{
FuzzyDigest(out byte[] result);
return CToString(result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void roll_init() => _self.Roll = new RollState
{
Window = new byte[ROLLING_WINDOW]
};
/*
* a rolling hash, based on the Adler checksum. By using a rolling hash
* we can perform auto resynchronisation after inserts/deletes
* internally, h1 is the sum of the bytes in the window and h2
* is the sum of the bytes times the index
* h3 is a shift/xor based rolling hash, and is mostly needed to ensure that
* we can cope with large blocksize values
*/
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void roll_hash(byte c)
{
_self.Roll.H2 -= _self.Roll.H1;
_self.Roll.H2 += ROLLING_WINDOW * c;
_self.Roll.H1 += c;
_self.Roll.H1 -= _self.Roll.Window[_self.Roll.N % ROLLING_WINDOW];
_self.Roll.Window[_self.Roll.N % ROLLING_WINDOW] = c;
_self.Roll.N++;
/* The original spamsum AND'ed this value with 0xFFFFFFFF which
* in theory should have no effect. This AND has been removed
* for performance (jk) */
_self.Roll.H3 <<= 5;
_self.Roll.H3 ^= c;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
uint roll_sum() => _self.Roll.H1 + _self.Roll.H2 + _self.Roll.H3;
/* A simple non-rolling hash, based on the FNV hash. */
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static uint sum_hash(byte c, uint h) => (h * HASH_PRIME) ^ c;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static uint SSDEEP_BS(uint index) => MIN_BLOCKSIZE << (int)index;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void fuzzy_try_fork_blockhash()
{
if(_self.Bhend >= NUM_BLOCKHASHES)
return;
if(_self.Bhend == 0) // assert
throw new Exception("Assertion failed");
uint obh = _self.Bhend - 1;
uint nbh = _self.Bhend;
_self.Bh[nbh].H = _self.Bh[obh].H;
_self.Bh[nbh].Halfh = _self.Bh[obh].Halfh;
_self.Bh[nbh].Digest[0] = 0;
_self.Bh[nbh].Halfdigest = 0;
_self.Bh[nbh].Dlen = 0;
++_self.Bhend;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void fuzzy_try_reduce_blockhash()
{
if(_self.Bhstart >= _self.Bhend)
throw new Exception("Assertion failed");
if(_self.Bhend - _self.Bhstart < 2)
/* Need at least two working hashes. */
return;
if((ulong)SSDEEP_BS(_self.Bhstart) * SPAMSUM_LENGTH >= _self.TotalSize)
/* Initial blocksize estimate would select this or a smaller
* blocksize. */
return;
if(_self.Bh[_self.Bhstart + 1].Dlen < SPAMSUM_LENGTH / 2)
/* Estimate adjustment would select this blocksize. */
return;
/* At this point we are clearly no longer interested in the
* start_blocksize. Get rid of it. */
++_self.Bhstart;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void fuzzy_engine_step(byte c)
{
uint i;
/* At each character we update the rolling hash and the normal hashes.
* When the rolling hash hits a reset value then we emit a normal hash
* as a element of the signature and reset the normal hash. */
roll_hash(c);
ulong h = roll_sum();
for(i = _self.Bhstart; i < _self.Bhend; ++i)
{
_self.Bh[i].H = sum_hash(c, _self.Bh[i].H);
_self.Bh[i].Halfh = sum_hash(c, _self.Bh[i].Halfh);
}
for(i = _self.Bhstart; i < _self.Bhend; ++i)
{
/* With growing blocksize almost no runs fail the next test. */
if(h % SSDEEP_BS(i) != SSDEEP_BS(i) - 1)
/* Once this condition is false for one bs, it is
* automatically false for all further bs. I.e. if
* h === -1 (mod 2*bs) then h === -1 (mod bs). */
break;
/* We have hit a reset point. We now emit hashes which are
* based on all characters in the piece of the message between
* the last reset point and this one */
if(0 == _self.Bh[i].Dlen)
fuzzy_try_fork_blockhash();
_self.Bh[i].Digest[_self.Bh[i].Dlen] = _b64[_self.Bh[i].H % 64];
_self.Bh[i].Halfdigest = _b64[_self.Bh[i].Halfh % 64];
if(_self.Bh[i].Dlen < SPAMSUM_LENGTH - 1)
{
/* We can have a problem with the tail overflowing. The
* easiest way to cope with this is to only reset the
* normal hash if we have room for more characters in
* our signature. This has the effect of combining the
* last few pieces of the message into a single piece
* */
_self.Bh[i].Digest[++_self.Bh[i].Dlen] = 0;
_self.Bh[i].H = HASH_INIT;
if(_self.Bh[i].Dlen >= SPAMSUM_LENGTH / 2)
continue;
_self.Bh[i].Halfh = HASH_INIT;
_self.Bh[i].Halfdigest = 0;
}
else
fuzzy_try_reduce_blockhash();
}
}
// CLAUNIA: Flags seems to never be used in ssdeep, so I just removed it for code simplicity
[MethodImpl(MethodImplOptions.AggressiveInlining)]
void FuzzyDigest(out byte[] result)
{
var sb = new StringBuilder();
uint bi = _self.Bhstart;
uint h = roll_sum();
int remain = (int)(FUZZY_MAX_RESULT - 1); /* Exclude terminating '\0'. */
result = new byte[FUZZY_MAX_RESULT];
/* Verify that our elimination was not overeager. */
if(!(bi == 0 || ((ulong)SSDEEP_BS(bi) / 2) * SPAMSUM_LENGTH < _self.TotalSize))
throw new Exception("Assertion failed");
int resultOff = 0;
/* Initial blocksize guess. */
while((ulong)SSDEEP_BS(bi) * SPAMSUM_LENGTH < _self.TotalSize)
{
++bi;
if(bi >= NUM_BLOCKHASHES)
throw new OverflowException("The input exceeds data types.");
}
/* Adapt blocksize guess to actual digest length. */
while(bi >= _self.Bhend)
--bi;
while(bi > _self.Bhstart &&
_self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
--bi;
if(bi > 0 &&
_self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
throw new Exception("Assertion failed");
sb.AppendFormat("{0}:", SSDEEP_BS(bi));
int i = Encoding.ASCII.GetBytes(sb.ToString()).Length;
if(i <= 0)
/* Maybe snprintf has set errno here? */
throw new OverflowException("The input exceeds data types.");
if(i >= remain)
throw new Exception("Assertion failed");
remain -= i;
Array.Copy(Encoding.ASCII.GetBytes(sb.ToString()), 0, result, 0, i);
resultOff = i;
i = (int)_self.Bh[bi].Dlen;
if(i > remain)
throw new Exception("Assertion failed");
Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i);
resultOff += i;
remain -= i;
if(h != 0)
{
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = _b64[_self.Bh[bi].H % 64];
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
else if(_self.Bh[bi].Digest[i] != 0)
{
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = _self.Bh[bi].Digest[i];
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff++] = 0x3A; // ':'
--remain;
if(bi < _self.Bhend - 1)
{
++bi;
i = (int)_self.Bh[bi].Dlen;
if(i > remain)
throw new Exception("Assertion failed");
Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i);
resultOff += i;
remain -= i;
if(h != 0)
{
if(remain <= 0)
throw new Exception("Assertion failed");
h = _self.Bh[bi].Halfh;
result[resultOff] = _b64[h % 64];
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
else
{
i = _self.Bh[bi].Halfdigest;
if(i != 0)
{
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff] = (byte)i;
if(i < 3 ||
result[resultOff] != result[resultOff - 1] ||
result[resultOff] != result[resultOff - 2] ||
result[resultOff] != result[resultOff - 3])
{
++resultOff;
--remain;
}
}
}
}
else if(h != 0)
{
if(_self.Bh[bi].Dlen != 0)
throw new Exception("Assertion failed");
if(remain <= 0)
throw new Exception("Assertion failed");
result[resultOff++] = _b64[_self.Bh[bi].H % 64];
/* No need to bother with FUZZY_FLAG_ELIMSEQ, because this
* digest has length 1. */
--remain;
}
result[resultOff] = 0;
}
/// <summary>Gets the hash of a file</summary>
/// <param name="filename">File path.</param>
public static byte[] File(string filename) =>
throw new NotImplementedException("SpamSum does not have a binary representation.");
/// <summary>Gets the hash of a file in hexadecimal and as a byte array.</summary>
/// <param name="filename">File path.</param>
/// <param name="hash">Byte array of the hash value.</param>
public static string File(string filename, out byte[] hash) =>
throw new NotImplementedException("Not yet implemented.");
/// <summary>Gets the hash of the specified data buffer.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="len">Length of the data buffer to hash.</param>
/// <param name="hash">null</param>
/// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
public static string Data(byte[] data, uint len, out byte[] hash)
{
var fuzzyContext = new SpamSumContext();
fuzzyContext.Update(data, len);
hash = null;
return fuzzyContext.End();
}
/// <summary>Gets the hash of the specified data buffer.</summary>
/// <param name="data">Data buffer.</param>
/// <param name="hash">null</param>
/// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
public static string Data(byte[] data, out byte[] hash) => Data(data, (uint)data.Length, out hash);
// Converts an ASCII null-terminated string to .NET string
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static string CToString(byte[] cString)
{
int count = 0;
// ReSharper disable once LoopCanBeConvertedToQuery
// LINQ is six times slower
foreach(byte c in cString)
{
if(c == 0)
break;
count++;
}
return Encoding.ASCII.GetString(cString, 0, count);
}
// Converts an ASCII null-terminated string to .NET string
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static byte[] CToArray(byte[] cString)
{
int count = 0;
// ReSharper disable once LoopCanBeConvertedToQuery
// LINQ is six times slower
foreach (byte c in cString)
{
if (c == 0)
break;
count++;
}
#if NET_FRAMEWORK
byte[] newString = new byte[count];
Array.Copy(cString, newString, count);
return newString;
#else
return new ReadOnlySpan<byte>(cString, 0, count).ToArray();
#endif
}
public void Dispose()
{
// TODO: No-op to dispose for now
}
struct RollState
{
public byte[] Window;
// ROLLING_WINDOW
public uint H1;
public uint H2;
public uint H3;
public uint N;
}
/* A blockhash contains a signature state for a specific (implicit) blocksize.
* The blocksize is given by SSDEEP_BS(index). The h and halfh members are the
* FNV hashes, where halfh stops to be reset after digest is SPAMSUM_LENGTH/2
* long. The halfh hash is needed be able to truncate digest for the second
* output hash to stay compatible with ssdeep output. */
struct BlockhashContext
{
public uint H;
public uint Halfh;
public byte[] Digest;
// SPAMSUM_LENGTH
public byte Halfdigest;
public uint Dlen;
}
struct FuzzyState
{
public uint Bhstart;
public uint Bhend;
public BlockhashContext[] Bh;
//NUM_BLOCKHASHES
public ulong TotalSize;
public RollState Roll;
}
}
}

View File

@@ -1,77 +0,0 @@
using System.IO;
using System.Text;
namespace SabreTools.IO
{
/// <summary>
/// Extensions to File functionality
/// </summary>
public static class FileExtensions
{
/// <summary>
/// Determines a text file's encoding by analyzing its byte order mark (BOM).
/// Defaults to ASCII when detection of the text file's endianness fails.
/// </summary>
/// <param name="filename">The text file to analyze.</param>
/// <returns>The detected encoding.</returns>
/// <link>http://stackoverflow.com/questions/3825390/effective-way-to-find-any-files-encoding</link>
public static Encoding GetEncoding(string filename)
{
if (!File.Exists(filename))
return Encoding.Default;
// Try to open the file
try
{
FileStream file = File.OpenRead(filename);
if (file == null)
return Encoding.Default;
// Read the BOM
var bom = new byte[4];
file.Read(bom, 0, 4);
file.Dispose();
// Analyze the BOM
if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7;
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8;
if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE
if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE
if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32;
return Encoding.Default;
}
catch
{
return Encoding.Default;
}
}
/// <summary>
/// Returns if the first byte array starts with the second array
/// </summary>
/// <param name="arr1">First byte array to compare</param>
/// <param name="arr2">Second byte array to compare</param>
/// <param name="exact">True if the input arrays should match exactly, false otherwise (default)</param>
/// <returns>True if the first byte array starts with the second, false otherwise</returns>
public static bool StartsWith(this byte[] arr1, byte[] arr2, bool exact = false)
{
// If we have any invalid inputs, we return false
if (arr1 == null || arr2 == null
|| arr1.Length == 0 || arr2.Length == 0
|| arr2.Length > arr1.Length
|| (exact && arr1.Length != arr2.Length))
{
return false;
}
// Otherwise, loop through and see
for (int i = 0; i < arr2.Length; i++)
{
if (arr1[i] != arr2[i])
return false;
}
return true;
}
}
}

View File

@@ -1,156 +0,0 @@
using System;
using System.Linq;
using System.Security.Cryptography;
using Aaru.Checksums;
using SabreTools.Core;
namespace SabreTools.IO
{
/// <summary>
/// Async hashing class wraper
/// </summary>
public class Hasher
{
public Hash HashType { get; private set; }
private IDisposable _hasher;
public Hasher(Hash hashType)
{
this.HashType = hashType;
GetHasher();
}
/// <summary>
/// Generate the correct hashing class based on the hash type
/// </summary>
private void GetHasher()
{
switch (HashType)
{
case Hash.CRC:
_hasher = new OptimizedCRC.OptimizedCRC();
break;
case Hash.MD5:
_hasher = MD5.Create();
break;
#if NET_FRAMEWORK
case Hash.RIPEMD160:
_hasher = RIPEMD160.Create();
break;
#endif
case Hash.SHA1:
_hasher = SHA1.Create();
break;
case Hash.SHA256:
_hasher = SHA256.Create();
break;
case Hash.SHA384:
_hasher = SHA384.Create();
break;
case Hash.SHA512:
_hasher = SHA512.Create();
break;
case Hash.SpamSum:
_hasher = new SpamSumContext();
break;
}
}
public void Dispose()
{
_hasher.Dispose();
}
/// <summary>
/// Process a buffer of some length with the internal hash algorithm
/// </summary>
public void Process(byte[] buffer, int size)
{
switch (HashType)
{
case Hash.CRC:
(_hasher as OptimizedCRC.OptimizedCRC).Update(buffer, 0, size);
break;
case Hash.MD5:
#if NET_FRAMEWORK
case Hash.RIPEMD160:
#endif
case Hash.SHA1:
case Hash.SHA256:
case Hash.SHA384:
case Hash.SHA512:
(_hasher as HashAlgorithm).TransformBlock(buffer, 0, size, null, 0);
break;
case Hash.SpamSum:
(_hasher as SpamSumContext).Update(buffer);
break;
}
}
/// <summary>
/// Finalize the internal hash algorigthm
/// </summary>
public void Finalize()
{
byte[] emptyBuffer = new byte[0];
switch (HashType)
{
case Hash.CRC:
(_hasher as OptimizedCRC.OptimizedCRC).Update(emptyBuffer, 0, 0);
break;
case Hash.MD5:
#if NET_FRAMEWORK
case Hash.RIPEMD160:
#endif
case Hash.SHA1:
case Hash.SHA256:
case Hash.SHA384:
case Hash.SHA512:
(_hasher as HashAlgorithm).TransformFinalBlock(emptyBuffer, 0, 0);
break;
case Hash.SpamSum:
// No finalization step needed
break;
}
}
/// <summary>
/// Get internal hash as a byte array
/// </summary>
public byte[] GetHash()
{
switch (HashType)
{
case Hash.CRC:
return BitConverter.GetBytes((_hasher as OptimizedCRC.OptimizedCRC).Value).Reverse().ToArray();
case Hash.MD5:
#if NET_FRAMEWORK
case Hash.RIPEMD160:
#endif
case Hash.SHA1:
case Hash.SHA256:
case Hash.SHA384:
case Hash.SHA512:
return (_hasher as HashAlgorithm).Hash;
case Hash.SpamSum:
return (_hasher as SpamSumContext).Final();
}
return null;
}
}
}

View File

@@ -0,0 +1,132 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace SabreTools.IO
{
/// <summary>
/// Methods around path operations
/// </summary>
public static class IOExtensions
{
/// <summary>
/// Ensure the output directory is a proper format and can be created
/// </summary>
/// <param name="dir">Directory to check</param>
/// <param name="create">True if the directory should be created, false otherwise (default)</param>
/// <param name="temp">True if this is a temp directory, false otherwise</param>
/// <returns>Full path to the directory</returns>
public static string Ensure(this string dir, bool create = false, bool temp = false)
{
// If the output directory is invalid
if (string.IsNullOrWhiteSpace(dir))
{
if (temp)
dir = Path.GetTempPath();
else
dir = Environment.CurrentDirectory;
}
// Get the full path for the output directory
dir = Path.GetFullPath(dir);
// If we're creating the output folder, do so
if (create)
Directory.CreateDirectory(dir);
return dir;
}
/// <summary>
/// Determines a text file's encoding by analyzing its byte order mark (BOM).
/// Defaults to ASCII when detection of the text file's endianness fails.
/// </summary>
/// <param name="filename">The text file to analyze.</param>
/// <returns>The detected encoding.</returns>
/// <link>http://stackoverflow.com/questions/3825390/effective-way-to-find-any-files-encoding</link>
public static Encoding GetEncoding(this string filename)
{
if (string.IsNullOrEmpty(filename))
return Encoding.Default;
if (!File.Exists(filename))
return Encoding.Default;
// Try to open the file
try
{
FileStream file = File.OpenRead(filename);
if (file == null)
return Encoding.Default;
// Read the BOM
var bom = new byte[4];
file.Read(bom, 0, 4);
file.Dispose();
// Analyze the BOM
if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7;
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8;
if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE
if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE
if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32;
return Encoding.Default;
}
catch
{
return Encoding.Default;
}
}
/// <summary>
/// Get the extension from the path, if possible
/// </summary>
/// <param name="path">Path to get extension from</param>
/// <returns>Extension, if possible</returns>
public static string GetNormalizedExtension(this string path)
{
// Check null or empty first
if (string.IsNullOrWhiteSpace(path))
return null;
// Get the extension from the path, if possible
string ext = Path.GetExtension(path)?.ToLowerInvariant();
// Check if the extension is null or empty
if (string.IsNullOrWhiteSpace(ext))
return null;
// Make sure that extensions are valid
ext = ext.TrimStart('.');
return ext;
}
/// <summary>
/// Get all empty folders within a root folder
/// </summary>
/// <param name="root">Root directory to parse</param>
/// <returns>IEumerable containing all directories that are empty, an empty enumerable if the root is empty, null otherwise</returns>
public static List<string> ListEmpty(this string root)
{
// Check null or empty first
if (string.IsNullOrEmpty(root))
return null;
// Then, check if the root exists
if (!Directory.Exists(root))
return null;
// If it does and it is empty, return a blank enumerable
if (Directory.EnumerateFileSystemEntries(root, "*", SearchOption.AllDirectories).Count() == 0)
return new List<string>();
// Otherwise, get the complete list
return Directory.EnumerateDirectories(root, "*", SearchOption.AllDirectories)
.Where(dir => Directory.EnumerateFileSystemEntries(dir, "*", SearchOption.AllDirectories).Count() == 0)
.ToList();
}
}
}

View File

@@ -1,153 +0,0 @@
/*
Copyright (c) 2012-2015 Eugene Larchenko (spct@mail.ru)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
using System;
namespace OptimizedCRC
{
internal class OptimizedCRC : IDisposable
{
private const uint kCrcPoly = 0xEDB88320;
private const uint kInitial = 0xFFFFFFFF;
private const int CRC_NUM_TABLES = 8;
private static readonly uint[] Table;
static OptimizedCRC()
{
unchecked
{
Table = new uint[256 * CRC_NUM_TABLES];
int i;
for (i = 0; i < 256; i++)
{
uint r = (uint)i;
for (int j = 0; j < 8; j++)
{
r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
}
Table[i] = r;
}
for (; i < 256 * CRC_NUM_TABLES; i++)
{
uint r = Table[i - 256];
Table[i] = Table[r & 0xFF] ^ (r >> 8);
}
}
}
public uint UnsignedValue;
public OptimizedCRC()
{
Init();
}
/// <summary>
/// Reset CRC
/// </summary>
public void Init()
{
UnsignedValue = kInitial;
}
public int Value
{
get { return (int)~UnsignedValue; }
}
public void Update(byte[] data, int offset, int count)
{
new ArraySegment<byte>(data, offset, count); // check arguments
if (count == 0)
{
return;
}
var table = OptimizedCRC.Table;
uint crc = UnsignedValue;
for (; (offset & 7) != 0 && count != 0; count--)
{
crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]];
}
if (count >= 8)
{
/*
* Idea from 7-zip project sources (http://7-zip.org/sdk.html)
*/
int end = (count - 8) & ~7;
count -= end;
end += offset;
while (offset != end)
{
crc ^= (uint)(data[offset] + (data[offset + 1] << 8) + (data[offset + 2] << 16) + (data[offset + 3] << 24));
uint high = (uint)(data[offset + 4] + (data[offset + 5] << 8) + (data[offset + 6] << 16) + (data[offset + 7] << 24));
offset += 8;
crc = table[(byte)crc + 0x700]
^ table[(byte)(crc >>= 8) + 0x600]
^ table[(byte)(crc >>= 8) + 0x500]
^ table[/*(byte)*/(crc >> 8) + 0x400]
^ table[(byte)(high) + 0x300]
^ table[(byte)(high >>= 8) + 0x200]
^ table[(byte)(high >>= 8) + 0x100]
^ table[/*(byte)*/(high >> 8) + 0x000];
}
}
while (count-- != 0)
{
crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]];
}
UnsignedValue = crc;
}
static public int Compute(byte[] data, int offset, int count)
{
var crc = new OptimizedCRC();
crc.Update(data, offset, count);
return crc.Value;
}
static public int Compute(byte[] data)
{
return Compute(data, 0, data.Length);
}
static public int Compute(ArraySegment<byte> block)
{
return Compute(block.Array, block.Offset, block.Count);
}
public void Dispose()
{
UnsignedValue = 0;
}
}
}

View File

@@ -71,7 +71,7 @@ namespace SabreTools.IO
public string GetOutputPath(string outDir, bool inplace)
{
// First, we need to ensure the output directory
outDir = DirectoryExtensions.Ensure(outDir);
outDir = outDir.Ensure();
// Check if we have a split path or not
bool splitpath = !string.IsNullOrWhiteSpace(ParentPath);

View File

@@ -1,141 +0,0 @@
using System.IO;
using SabreTools.Core;
namespace SabreTools.IO
{
/// <summary>
/// Extensions to Path functionality
/// </summary>
public static class PathExtensions
{
/// <summary>
/// Get the extension from the path, if possible
/// </summary>
/// <param name="path">Path to get extension from</param>
/// <returns>Extension, if possible</returns>
public static string GetNormalizedExtension(string path)
{
// Check null or empty first
if (string.IsNullOrWhiteSpace(path))
return null;
// Get the extension from the path, if possible
string ext = Path.GetExtension(path)?.ToLowerInvariant();
// Check if the extension is null or empty
if (string.IsNullOrWhiteSpace(ext))
return null;
// Make sure that extensions are valid
ext = ext.TrimStart('.');
return ext;
}
/// <summary>
/// Get a proper romba sub path
/// </summary>
/// <param name="hash">SHA-1 hash to get the path for</param>
/// <param name="depth">Positive value representing the depth of the depot</param>
/// <returns>Subfolder path for the given hash</returns>
public static string GetDepotPath(string hash, int depth)
{
// If the hash isn't the right size, then we return null
if (hash.Length != Constants.SHA1Length)
return null;
// Cap the depth between 0 and 20, for now
if (depth < 0)
depth = 0;
else if (depth > Constants.SHA1ZeroBytes.Length)
depth = Constants.SHA1ZeroBytes.Length;
// Loop through and generate the subdirectory
string path = string.Empty;
for (int i = 0; i < depth; i++)
{
path += hash.Substring(i * 2, 2) + Path.DirectorySeparatorChar;
}
// Now append the filename
path += $"{hash}.gz";
return path;
}
/// <summary>
/// Get if the given path has a valid DAT extension
/// </summary>
/// <param name="path">Path to check</param>
/// <returns>True if the extension is valid, false otherwise</returns>
public static bool HasValidArchiveExtension(string path)
{
// Get the extension from the path, if possible
string ext = GetNormalizedExtension(path);
// Check against the list of known archive extensions
switch (ext)
{
// Aaruformat
case "aaru":
case "aaruf":
case "aaruformat":
case "aif":
case "dicf":
// Archives
case "7z":
case "gz":
case "lzma":
case "rar":
case "rev":
case "r00":
case "r01":
case "tar":
case "tgz":
case "tlz":
case "zip":
case "zipx":
// CHD
case "chd":
return true;
default:
return false;
}
}
/// <summary>
/// Get if the given path has a valid DAT extension
/// </summary>
/// <param name="path">Path to check</param>
/// <returns>True if the extension is valid, false otherwise</returns>
public static bool HasValidDatExtension(string path)
{
// Get the extension from the path, if possible
string ext = GetNormalizedExtension(path);
// Check against the list of known DAT extensions
switch (ext)
{
case "csv":
case "dat":
case "json":
case "md5":
case "ripemd160":
case "sfv":
case "sha1":
case "sha256":
case "sha384":
case "sha512":
case "ssv":
case "tsv":
case "txt":
case "xml":
return true;
default:
return false;
}
}
}
}

View File

@@ -1,4 +1,3 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
@@ -8,65 +7,10 @@ using NaturalSort;
namespace SabreTools.IO
{
/// <summary>
/// Extensions to Directory functionality
/// Methods around path operations
/// </summary>
public static class DirectoryExtensions
public static class PathTool
{
/// <summary>
/// Cleans out the temporary directory
/// </summary>
/// <param name="dir">Name of the directory to clean out</param>
public static void Clean(string dir)
{
foreach (string file in Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly))
{
try
{
if (File.Exists(file))
File.Delete(file);
}
catch { }
}
foreach (string subdir in Directory.EnumerateDirectories(dir, "*", SearchOption.TopDirectoryOnly))
{
try
{
if (Directory.Exists(subdir))
Directory.Delete(subdir);
}
catch { }
}
}
/// <summary>
/// Ensure the output directory is a proper format and can be created
/// </summary>
/// <param name="dir">Directory to check</param>
/// <param name="create">True if the directory should be created, false otherwise (default)</param>
/// <param name="temp">True if this is a temp directory, false otherwise</param>
/// <returns>Full path to the directory</returns>
public static string Ensure(string dir, bool create = false, bool temp = false)
{
// If the output directory is invalid
if (string.IsNullOrWhiteSpace(dir))
{
if (temp)
dir = Path.GetTempPath();
else
dir = Environment.CurrentDirectory;
}
// Get the full path for the output directory
dir = Path.GetFullPath(dir);
// If we're creating the output folder, do so
if (create)
Directory.CreateDirectory(dir);
return dir;
}
/// <summary>
/// Retrieve a list of just directories from inputs
/// </summary>
@@ -222,26 +166,5 @@ namespace SabreTools.IO
// Return the new list
return infiles;
}
/// <summary>
/// Get all empty folders within a root folder
/// </summary>
/// <param name="root">Root directory to parse</param>
/// <returns>IEumerable containing all directories that are empty, an empty enumerable if the root is empty, null otherwise</returns>
public static List<string> ListEmpty(string root)
{
// Check if the root exists first
if (!Directory.Exists(root))
return null;
// If it does and it is empty, return a blank enumerable
if (Directory.EnumerateFileSystemEntries(root, "*", SearchOption.AllDirectories).Count() == 0)
return new List<string>();
// Otherwise, get the complete list
return Directory.EnumerateDirectories(root, "*", SearchOption.AllDirectories)
.Where(dir => Directory.EnumerateFileSystemEntries(dir, "*", SearchOption.AllDirectories).Count() == 0)
.ToList();
}
}
}