Distinguish IO extensions and helpers

2025-12-16 19:14:27 +00:00 · 2020-12-10 22:16:53 -08:00
parent 7141904fc8
commit 58b3cda921
47 changed files with 325 additions and 353 deletions
--- a/SabreTools.IO/Aaru/IChecksum.cs
+++ b/SabreTools.IO/Aaru/IChecksum.cs
@@ -1,58 +0,0 @@
-// /***************************************************************************
-// Aaru Data Preservation Suite
-// ----------------------------------------------------------------------------
-//
-// Filename       : IChecksum.cs
-// Author(s)      : Natalia Portillo <claunia@claunia.com>
-//
-// Component      : Checksums.
-//
-// --[ Description ] ----------------------------------------------------------
-//
-//     Provides an interface for implementing checksums and hashes.
-//
-// --[ License ] --------------------------------------------------------------
-//
-//     Permission is hereby granted, free of charge, to any person obtaining a
-//     copy of this software and associated documentation files (the
-//     "Software"), to deal in the Software without restriction, including
-//     without limitation the rights to use, copy, modify, merge, publish,
-//     distribute, sublicense, and/or sell copies of the Software, and to
-//     permit persons to whom the Software is furnished to do so, subject to
-//     the following conditions:
-//
-//     The above copyright notice and this permission notice shall be included
-//     in all copies or substantial portions of the Software.
-//
-//     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-//     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-//     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-//     IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-//     CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-//     TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-//     SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-//
-// ----------------------------------------------------------------------------
-// Copyright © 2011-2020 Natalia Portillo
-// ****************************************************************************/
-
-namespace Aaru.CommonTypes.Interfaces
-{
-    public interface IChecksum
-    {
-        /// <summary>Updates the hash with data.</summary>
-        /// <param name="data">Data buffer.</param>
-        /// <param name="len">Length of buffer to hash.</param>
-        void Update(byte[] data, uint len);
-
-        /// <summary>Updates the hash with data.</summary>
-        /// <param name="data">Data buffer.</param>
-        void Update(byte[] data);
-
-        /// <summary>Returns a byte array of the hash value.</summary>
-        byte[] Final();
-
-        /// <summary>Returns a hexadecimal representation of the hash value.</summary>
-        string End();
-    }
-}
--- a/SabreTools.IO/Aaru/SpamSumContext.cs
+++ b/SabreTools.IO/Aaru/SpamSumContext.cs
@@ -1,561 +0,0 @@
-// /***************************************************************************
-// Aaru Data Preservation Suite
-// ----------------------------------------------------------------------------
-//
-// Filename       : SpamSumContext.cs
-// Author(s)      : Natalia Portillo <claunia@claunia.com>
-//
-// Component      : Checksums.
-//
-// --[ Description ] ----------------------------------------------------------
-//
-//     Implements the SpamSum fuzzy hashing algorithm.
-//
-// --[ License ] --------------------------------------------------------------
-//
-//     This library is free software; you can redistribute it and/or modify
-//     it under the terms of the GNU Lesser General Public License as
-//     published by the Free Software Foundation; either version 2.1 of the
-//     License, or (at your option) any later version.
-//
-//     This library is distributed in the hope that it will be useful, but
-//     WITHOUT ANY WARRANTY; without even the implied warranty of
-//     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-//     Lesser General Public License for more details.
-//
-//     You should have received a copy of the GNU Lesser General Public
-//     License along with this library; if not, see <http://www.gnu.org/licenses/>.
-//
-// ----------------------------------------------------------------------------
-// Copyright © 2011-2020 Natalia Portillo
-// ****************************************************************************/
-
-//  Based on ssdeep
-//  Copyright (C) 2002 Andrew Tridgell <tridge@samba.org>
-//  Copyright (C) 2006 ManTech International Corporation
-//  Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
-//
-//  Earlier versions of this code were named fuzzy.c and can be found at:
-//      http://www.samba.org/ftp/unpacked/junkcode/spamsum/
-//      http://ssdeep.sf.net/
-
-using System;
-using System.Runtime.CompilerServices;
-using System.Text;
-using Aaru.CommonTypes.Interfaces;
-
-namespace Aaru.Checksums
-{
-    /// <summary>Implements the SpamSum fuzzy hashing algorithm.</summary>
-    public sealed class SpamSumContext : IChecksum, IDisposable
-    {
-        const uint ROLLING_WINDOW   = 7;
-        const uint MIN_BLOCKSIZE    = 3;
-        const uint HASH_PRIME       = 0x01000193;
-        const uint HASH_INIT        = 0x28021967;
-        const uint NUM_BLOCKHASHES  = 31;
-        const uint SPAMSUM_LENGTH   = 64;
-        const uint FUZZY_MAX_RESULT = (2 * SPAMSUM_LENGTH) + 20;
-
-        //"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-        readonly byte[] _b64 =
-        {
-            0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52,
-            0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A,
-            0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31,
-            0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
-        };
-
-        FuzzyState _self;
-
-        /// <summary>Initializes the SpamSum structures</summary>
-        public SpamSumContext()
-        {
-            _self = new FuzzyState
-            {
-                Bh = new BlockhashContext[NUM_BLOCKHASHES]
-            };
-
-            for(int i = 0; i < NUM_BLOCKHASHES; i++)
-                _self.Bh[i].Digest = new byte[SPAMSUM_LENGTH];
-
-            _self.Bhstart          = 0;
-            _self.Bhend            = 1;
-            _self.Bh[0].H          = HASH_INIT;
-            _self.Bh[0].Halfh      = HASH_INIT;
-            _self.Bh[0].Digest[0]  = 0;
-            _self.Bh[0].Halfdigest = 0;
-            _self.Bh[0].Dlen       = 0;
-            _self.TotalSize        = 0;
-            roll_init();
-        }
-
-        /// <inheritdoc />
-        /// <summary>Updates the hash with data.</summary>
-        /// <param name="data">Data buffer.</param>
-        /// <param name="len">Length of buffer to hash.</param>
-        public void Update(byte[] data, uint len)
-        {
-            _self.TotalSize += len;
-
-            for(int i = 0; i < len; i++)
-                fuzzy_engine_step(data[i]);
-        }
-
-        /// <inheritdoc />
-        /// <summary>Updates the hash with data.</summary>
-        /// <param name="data">Data buffer.</param>
-        public void Update(byte[] data) => Update(data, (uint)data.Length);
-
-        /// <inheritdoc />
-        /// <summary>Returns a byte array of the hash value.</summary>
-        public byte[] Final()
-        {
-            FuzzyDigest(out byte[] result);
-
-            return CToArray(result);
-        }
-
-        /// <inheritdoc />
-        /// <summary>Returns a base64 representation of the hash value.</summary>
-        public string End()
-        {
-            FuzzyDigest(out byte[] result);
-
-            return CToString(result);
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        void roll_init() => _self.Roll = new RollState
-        {
-            Window = new byte[ROLLING_WINDOW]
-        };
-
-        /*
-         * a rolling hash, based on the Adler checksum. By using a rolling hash
-         * we can perform auto resynchronisation after inserts/deletes
-
-         * internally, h1 is the sum of the bytes in the window and h2
-         * is the sum of the bytes times the index
-
-         * h3 is a shift/xor based rolling hash, and is mostly needed to ensure that
-         * we can cope with large blocksize values
-         */
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        void roll_hash(byte c)
-        {
-            _self.Roll.H2 -= _self.Roll.H1;
-            _self.Roll.H2 += ROLLING_WINDOW * c;
-
-            _self.Roll.H1 += c;
-            _self.Roll.H1 -= _self.Roll.Window[_self.Roll.N % ROLLING_WINDOW];
-
-            _self.Roll.Window[_self.Roll.N % ROLLING_WINDOW] = c;
-            _self.Roll.N++;
-
-            /* The original spamsum AND'ed this value with 0xFFFFFFFF which
-             * in theory should have no effect. This AND has been removed
-             * for performance (jk) */
-            _self.Roll.H3 <<= 5;
-            _self.Roll.H3 ^=  c;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        uint roll_sum() => _self.Roll.H1 + _self.Roll.H2 + _self.Roll.H3;
-
-        /* A simple non-rolling hash, based on the FNV hash. */
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        static uint sum_hash(byte c, uint h) => (h * HASH_PRIME) ^ c;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        static uint SSDEEP_BS(uint index) => MIN_BLOCKSIZE << (int)index;
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        void fuzzy_try_fork_blockhash()
-        {
-            if(_self.Bhend >= NUM_BLOCKHASHES)
-                return;
-
-            if(_self.Bhend == 0) // assert
-                throw new Exception("Assertion failed");
-
-            uint obh = _self.Bhend - 1;
-            uint nbh = _self.Bhend;
-            _self.Bh[nbh].H          = _self.Bh[obh].H;
-            _self.Bh[nbh].Halfh      = _self.Bh[obh].Halfh;
-            _self.Bh[nbh].Digest[0]  = 0;
-            _self.Bh[nbh].Halfdigest = 0;
-            _self.Bh[nbh].Dlen       = 0;
-            ++_self.Bhend;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        void fuzzy_try_reduce_blockhash()
-        {
-            if(_self.Bhstart >= _self.Bhend)
-                throw new Exception("Assertion failed");
-
-            if(_self.Bhend - _self.Bhstart < 2)
-                /* Need at least two working hashes. */
-                return;
-
-            if((ulong)SSDEEP_BS(_self.Bhstart) * SPAMSUM_LENGTH >= _self.TotalSize)
-                /* Initial blocksize estimate would select this or a smaller
-                 * blocksize. */
-                return;
-
-            if(_self.Bh[_self.Bhstart + 1].Dlen < SPAMSUM_LENGTH / 2)
-                /* Estimate adjustment would select this blocksize. */
-                return;
-
-            /* At this point we are clearly no longer interested in the
-             * start_blocksize. Get rid of it. */
-            ++_self.Bhstart;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        void fuzzy_engine_step(byte c)
-        {
-            uint i;
-            /* At each character we update the rolling hash and the normal hashes.
-             * When the rolling hash hits a reset value then we emit a normal hash
-             * as a element of the signature and reset the normal hash. */
-            roll_hash(c);
-            ulong h = roll_sum();
-
-            for(i = _self.Bhstart; i < _self.Bhend; ++i)
-            {
-                _self.Bh[i].H     = sum_hash(c, _self.Bh[i].H);
-                _self.Bh[i].Halfh = sum_hash(c, _self.Bh[i].Halfh);
-            }
-
-            for(i = _self.Bhstart; i < _self.Bhend; ++i)
-            {
-                /* With growing blocksize almost no runs fail the next test. */
-                if(h % SSDEEP_BS(i) != SSDEEP_BS(i) - 1)
-                    /* Once this condition is false for one bs, it is
-                     * automatically false for all further bs. I.e. if
-                     * h === -1 (mod 2*bs) then h === -1 (mod bs). */
-                    break;
-
-                /* We have hit a reset point. We now emit hashes which are
-                 * based on all characters in the piece of the message between
-                 * the last reset point and this one */
-                if(0 == _self.Bh[i].Dlen)
-                    fuzzy_try_fork_blockhash();
-
-                _self.Bh[i].Digest[_self.Bh[i].Dlen] = _b64[_self.Bh[i].H     % 64];
-                _self.Bh[i].Halfdigest               = _b64[_self.Bh[i].Halfh % 64];
-
-                if(_self.Bh[i].Dlen < SPAMSUM_LENGTH - 1)
-                {
-                    /* We can have a problem with the tail overflowing. The
-                     * easiest way to cope with this is to only reset the
-                     * normal hash if we have room for more characters in
-                     * our signature. This has the effect of combining the
-                     * last few pieces of the message into a single piece
-                     * */
-                    _self.Bh[i].Digest[++_self.Bh[i].Dlen] = 0;
-                    _self.Bh[i].H                          = HASH_INIT;
-
-                    if(_self.Bh[i].Dlen >= SPAMSUM_LENGTH / 2)
-                        continue;
-
-                    _self.Bh[i].Halfh      = HASH_INIT;
-                    _self.Bh[i].Halfdigest = 0;
-                }
-                else
-                    fuzzy_try_reduce_blockhash();
-            }
-        }
-
-        // CLAUNIA: Flags seems to never be used in ssdeep, so I just removed it for code simplicity
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        void FuzzyDigest(out byte[] result)
-        {
-            var  sb     = new StringBuilder();
-            uint bi     = _self.Bhstart;
-            uint h      = roll_sum();
-            int  remain = (int)(FUZZY_MAX_RESULT - 1); /* Exclude terminating '\0'. */
-            result = new byte[FUZZY_MAX_RESULT];
-
-            /* Verify that our elimination was not overeager. */
-            if(!(bi == 0 || ((ulong)SSDEEP_BS(bi) / 2) * SPAMSUM_LENGTH < _self.TotalSize))
-                throw new Exception("Assertion failed");
-
-            int resultOff = 0;
-
-            /* Initial blocksize guess. */
-            while((ulong)SSDEEP_BS(bi) * SPAMSUM_LENGTH < _self.TotalSize)
-            {
-                ++bi;
-
-                if(bi >= NUM_BLOCKHASHES)
-                    throw new OverflowException("The input exceeds data types.");
-            }
-
-            /* Adapt blocksize guess to actual digest length. */
-            while(bi >= _self.Bhend)
-                --bi;
-
-            while(bi                > _self.Bhstart &&
-                  _self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
-                --bi;
-
-            if(bi                > 0 &&
-               _self.Bh[bi].Dlen < SPAMSUM_LENGTH / 2)
-                throw new Exception("Assertion failed");
-
-            sb.AppendFormat("{0}:", SSDEEP_BS(bi));
-            int i = Encoding.ASCII.GetBytes(sb.ToString()).Length;
-
-            if(i <= 0)
-                /* Maybe snprintf has set errno here? */
-                throw new OverflowException("The input exceeds data types.");
-
-            if(i >= remain)
-                throw new Exception("Assertion failed");
-
-            remain -= i;
-
-            Array.Copy(Encoding.ASCII.GetBytes(sb.ToString()), 0, result, 0, i);
-
-            resultOff = i;
-
-            i = (int)_self.Bh[bi].Dlen;
-
-            if(i > remain)
-                throw new Exception("Assertion failed");
-
-            Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i);
-            resultOff += i;
-            remain    -= i;
-
-            if(h != 0)
-            {
-                if(remain <= 0)
-                    throw new Exception("Assertion failed");
-
-                result[resultOff] = _b64[_self.Bh[bi].H % 64];
-
-                if(i                 < 3                      ||
-                   result[resultOff] != result[resultOff - 1] ||
-                   result[resultOff] != result[resultOff - 2] ||
-                   result[resultOff] != result[resultOff - 3])
-                {
-                    ++resultOff;
-                    --remain;
-                }
-            }
-            else if(_self.Bh[bi].Digest[i] != 0)
-            {
-                if(remain <= 0)
-                    throw new Exception("Assertion failed");
-
-                result[resultOff] = _self.Bh[bi].Digest[i];
-
-                if(i                 < 3                      ||
-                   result[resultOff] != result[resultOff - 1] ||
-                   result[resultOff] != result[resultOff - 2] ||
-                   result[resultOff] != result[resultOff - 3])
-                {
-                    ++resultOff;
-                    --remain;
-                }
-            }
-
-            if(remain <= 0)
-                throw new Exception("Assertion failed");
-
-            result[resultOff++] = 0x3A; // ':'
-            --remain;
-
-            if(bi < _self.Bhend - 1)
-            {
-                ++bi;
-                i = (int)_self.Bh[bi].Dlen;
-
-                if(i > remain)
-                    throw new Exception("Assertion failed");
-
-                Array.Copy(_self.Bh[bi].Digest, 0, result, resultOff, i);
-                resultOff += i;
-                remain    -= i;
-
-                if(h != 0)
-                {
-                    if(remain <= 0)
-                        throw new Exception("Assertion failed");
-
-                    h                 = _self.Bh[bi].Halfh;
-                    result[resultOff] = _b64[h % 64];
-
-                    if(i                 < 3                      ||
-                       result[resultOff] != result[resultOff - 1] ||
-                       result[resultOff] != result[resultOff - 2] ||
-                       result[resultOff] != result[resultOff - 3])
-                    {
-                        ++resultOff;
-                        --remain;
-                    }
-                }
-                else
-                {
-                    i = _self.Bh[bi].Halfdigest;
-
-                    if(i != 0)
-                    {
-                        if(remain <= 0)
-                            throw new Exception("Assertion failed");
-
-                        result[resultOff] = (byte)i;
-
-                        if(i                 < 3                      ||
-                           result[resultOff] != result[resultOff - 1] ||
-                           result[resultOff] != result[resultOff - 2] ||
-                           result[resultOff] != result[resultOff - 3])
-                        {
-                            ++resultOff;
-                            --remain;
-                        }
-                    }
-                }
-            }
-            else if(h != 0)
-            {
-                if(_self.Bh[bi].Dlen != 0)
-                    throw new Exception("Assertion failed");
-
-                if(remain <= 0)
-                    throw new Exception("Assertion failed");
-
-                result[resultOff++] = _b64[_self.Bh[bi].H % 64];
-                /* No need to bother with FUZZY_FLAG_ELIMSEQ, because this
-                 * digest has length 1. */
-                --remain;
-            }
-
-            result[resultOff] = 0;
-        }
-
-        /// <summary>Gets the hash of a file</summary>
-        /// <param name="filename">File path.</param>
-        public static byte[] File(string filename) =>
-            throw new NotImplementedException("SpamSum does not have a binary representation.");
-
-        /// <summary>Gets the hash of a file in hexadecimal and as a byte array.</summary>
-        /// <param name="filename">File path.</param>
-        /// <param name="hash">Byte array of the hash value.</param>
-        public static string File(string filename, out byte[] hash) =>
-            throw new NotImplementedException("Not yet implemented.");
-
-        /// <summary>Gets the hash of the specified data buffer.</summary>
-        /// <param name="data">Data buffer.</param>
-        /// <param name="len">Length of the data buffer to hash.</param>
-        /// <param name="hash">null</param>
-        /// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
-        public static string Data(byte[] data, uint len, out byte[] hash)
-        {
-            var fuzzyContext = new SpamSumContext();
-
-            fuzzyContext.Update(data, len);
-
-            hash = null;
-
-            return fuzzyContext.End();
-        }
-
-        /// <summary>Gets the hash of the specified data buffer.</summary>
-        /// <param name="data">Data buffer.</param>
-        /// <param name="hash">null</param>
-        /// <returns>Base64 representation of SpamSum $blocksize:$hash:$hash</returns>
-        public static string Data(byte[] data, out byte[] hash) => Data(data, (uint)data.Length, out hash);
-
-        // Converts an ASCII null-terminated string to .NET string
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        static string CToString(byte[] cString)
-        {
-            int count = 0;
-
-            // ReSharper disable once LoopCanBeConvertedToQuery
-            // LINQ is six times slower
-            foreach(byte c in cString)
-            {
-                if(c == 0)
-                    break;
-
-                count++;
-            }
-
-            return Encoding.ASCII.GetString(cString, 0, count);
-        }
-
-        // Converts an ASCII null-terminated string to .NET string
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        static byte[] CToArray(byte[] cString)
-        {
-            int count = 0;
-
-            // ReSharper disable once LoopCanBeConvertedToQuery
-            // LINQ is six times slower
-            foreach (byte c in cString)
-            {
-                if (c == 0)
-                    break;
-
-                count++;
-            }
-
-#if NET_FRAMEWORK
-            byte[] newString = new byte[count];
-            Array.Copy(cString, newString, count);
-            return newString;
-#else
-            return new ReadOnlySpan<byte>(cString, 0, count).ToArray();
-#endif
-        }
-
-        public void Dispose()
-        {
-            // TODO: No-op to dispose for now
-        }
-
-        struct RollState
-        {
-            public byte[] Window;
-
-            // ROLLING_WINDOW
-            public uint H1;
-            public uint H2;
-            public uint H3;
-            public uint N;
-        }
-
-        /* A blockhash contains a signature state for a specific (implicit) blocksize.
-         * The blocksize is given by SSDEEP_BS(index). The h and halfh members are the
-         * FNV hashes, where halfh stops to be reset after digest is SPAMSUM_LENGTH/2
-         * long. The halfh hash is needed be able to truncate digest for the second
-         * output hash to stay compatible with ssdeep output. */
-        struct BlockhashContext
-        {
-            public uint   H;
-            public uint   Halfh;
-            public byte[] Digest;
-
-            // SPAMSUM_LENGTH
-            public byte Halfdigest;
-            public uint Dlen;
-        }
-
-        struct FuzzyState
-        {
-            public uint               Bhstart;
-            public uint               Bhend;
-            public BlockhashContext[] Bh;
-
-            //NUM_BLOCKHASHES
-            public ulong     TotalSize;
-            public RollState Roll;
-        }
-    }
-}
--- a/SabreTools.IO/FileExtensions.cs
+++ b/SabreTools.IO/FileExtensions.cs
@@ -1,77 +0,0 @@
-using System.IO;
-using System.Text;
-
-namespace SabreTools.IO
-{
-    /// <summary>
-    /// Extensions to File functionality
-    /// </summary>
-    public static class FileExtensions
-    {
-        /// <summary>
-        /// Determines a text file's encoding by analyzing its byte order mark (BOM).
-        /// Defaults to ASCII when detection of the text file's endianness fails.
-        /// </summary>
-        /// <param name="filename">The text file to analyze.</param>
-        /// <returns>The detected encoding.</returns>
-        /// <link>http://stackoverflow.com/questions/3825390/effective-way-to-find-any-files-encoding</link>
-        public static Encoding GetEncoding(string filename)
-        {
-            if (!File.Exists(filename))
-                return Encoding.Default;
-
-            // Try to open the file
-            try
-            {
-                FileStream file = File.OpenRead(filename);
-                if (file == null)
-                    return Encoding.Default;
-
-                // Read the BOM
-                var bom = new byte[4];
-                file.Read(bom, 0, 4);
-                file.Dispose();
-
-                // Analyze the BOM
-                if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7;
-                if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8;
-                if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE
-                if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE
-                if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32;
-                return Encoding.Default;
-            }
-            catch
-            {
-                return Encoding.Default;
-            }
-        }
-
-        /// <summary>
-        /// Returns if the first byte array starts with the second array
-        /// </summary>
-        /// <param name="arr1">First byte array to compare</param>
-        /// <param name="arr2">Second byte array to compare</param>
-        /// <param name="exact">True if the input arrays should match exactly, false otherwise (default)</param>
-        /// <returns>True if the first byte array starts with the second, false otherwise</returns>
-        public static bool StartsWith(this byte[] arr1, byte[] arr2, bool exact = false)
-        {
-            // If we have any invalid inputs, we return false
-            if (arr1 == null || arr2 == null
-                || arr1.Length == 0 || arr2.Length == 0
-                || arr2.Length > arr1.Length
-                || (exact && arr1.Length != arr2.Length))
-            {
-                return false;
-            }
-
-            // Otherwise, loop through and see
-            for (int i = 0; i < arr2.Length; i++)
-            {
-                if (arr1[i] != arr2[i])
-                    return false;
-            }
-
-            return true;
-        }
-    }
-}
--- a/SabreTools.IO/Hasher.cs
+++ b/SabreTools.IO/Hasher.cs
@@ -1,156 +0,0 @@
-using System;
-using System.Linq;
-using System.Security.Cryptography;
-
-using Aaru.Checksums;
-using SabreTools.Core;
-
-namespace SabreTools.IO
-{
-    /// <summary>
-    /// Async hashing class wraper
-    /// </summary>
-    public class Hasher
-    {
-        public Hash HashType { get; private set; }
-        private IDisposable _hasher; 
-
-        public Hasher(Hash hashType)
-        {
-            this.HashType = hashType;
-            GetHasher();
-        }
-
-        /// <summary>
-        /// Generate the correct hashing class based on the hash type
-        /// </summary>
-        private void GetHasher()
-        {
-            switch (HashType)
-            {
-                case Hash.CRC:
-                    _hasher = new OptimizedCRC.OptimizedCRC();
-                    break;
-
-                case Hash.MD5:
-                    _hasher = MD5.Create();
-                    break;
-
-#if NET_FRAMEWORK
-                case Hash.RIPEMD160:
-                    _hasher = RIPEMD160.Create();
-                    break;
-#endif
-
-                case Hash.SHA1:
-                    _hasher = SHA1.Create();
-                    break;
-
-                case Hash.SHA256:
-                    _hasher = SHA256.Create();
-                    break;
-
-                case Hash.SHA384:
-                    _hasher = SHA384.Create();
-                    break;
-
-                case Hash.SHA512:
-                    _hasher = SHA512.Create();
-                    break;
-
-                case Hash.SpamSum:
-                    _hasher = new SpamSumContext();
-                    break;
-            }
-        }
-
-        public void Dispose()
-        {
-            _hasher.Dispose();
-        }
-
-        /// <summary>
-        /// Process a buffer of some length with the internal hash algorithm
-        /// </summary>
-        public void Process(byte[] buffer, int size)
-        {
-            switch (HashType)
-            {
-                case Hash.CRC:
-                    (_hasher as OptimizedCRC.OptimizedCRC).Update(buffer, 0, size);
-                    break;
-
-                case Hash.MD5:
-#if NET_FRAMEWORK
-                case Hash.RIPEMD160:
-#endif
-                case Hash.SHA1:
-                case Hash.SHA256:
-                case Hash.SHA384:
-                case Hash.SHA512:
-                    (_hasher as HashAlgorithm).TransformBlock(buffer, 0, size, null, 0);
-                    break;
-
-                case Hash.SpamSum:
-                    (_hasher as SpamSumContext).Update(buffer);
-                    break;
-            }
-        }
-
-        /// <summary>
-        /// Finalize the internal hash algorigthm
-        /// </summary>
-        public void Finalize()
-        {
-            byte[] emptyBuffer = new byte[0];
-            switch (HashType)
-            {
-                case Hash.CRC:
-                    (_hasher as OptimizedCRC.OptimizedCRC).Update(emptyBuffer, 0, 0);
-                    break;
-
-                case Hash.MD5:
-#if NET_FRAMEWORK
-                case Hash.RIPEMD160:
-#endif
-                case Hash.SHA1:
-                case Hash.SHA256:
-                case Hash.SHA384:
-                case Hash.SHA512:
-                    (_hasher as HashAlgorithm).TransformFinalBlock(emptyBuffer, 0, 0);
-                    break;
-
-                case Hash.SpamSum:
-                    // No finalization step needed
-                    break;
-            }
-        }
-
-        /// <summary>
-        /// Get internal hash as a byte array
-        /// </summary>
-        public byte[] GetHash()
-        {
-            switch (HashType)
-            {
-                case Hash.CRC:
-                    return BitConverter.GetBytes((_hasher as OptimizedCRC.OptimizedCRC).Value).Reverse().ToArray();
-
-                case Hash.MD5:
-#if NET_FRAMEWORK
-                case Hash.RIPEMD160:
-#endif
-                case Hash.SHA1:
-                case Hash.SHA256:
-                case Hash.SHA384:
-                case Hash.SHA512:
-                    return (_hasher as HashAlgorithm).Hash;
-
-                case Hash.SpamSum:
-                    return (_hasher as SpamSumContext).Final();
-            }
-
-            return null;
-        }
-    }
-}
--- a/SabreTools.IO/IOExtensions.cs
+++ b/SabreTools.IO/IOExtensions.cs
@@ -0,0 +1,132 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace SabreTools.IO
+{
+    /// <summary>
+    /// Methods around path operations
+    /// </summary>
+    public static class IOExtensions
+    {
+        /// <summary>
+        /// Ensure the output directory is a proper format and can be created
+        /// </summary>
+        /// <param name="dir">Directory to check</param>
+        /// <param name="create">True if the directory should be created, false otherwise (default)</param>
+        /// <param name="temp">True if this is a temp directory, false otherwise</param>
+        /// <returns>Full path to the directory</returns>
+        public static string Ensure(this string dir, bool create = false, bool temp = false)
+        {
+            // If the output directory is invalid
+            if (string.IsNullOrWhiteSpace(dir))
+            {
+                if (temp)
+                    dir = Path.GetTempPath();
+                else
+                    dir = Environment.CurrentDirectory;
+            }
+
+            // Get the full path for the output directory
+            dir = Path.GetFullPath(dir);
+
+            // If we're creating the output folder, do so
+            if (create)
+                Directory.CreateDirectory(dir);
+
+            return dir;
+        }
+
+        /// <summary>
+        /// Determines a text file's encoding by analyzing its byte order mark (BOM).
+        /// Defaults to ASCII when detection of the text file's endianness fails.
+        /// </summary>
+        /// <param name="filename">The text file to analyze.</param>
+        /// <returns>The detected encoding.</returns>
+        /// <link>http://stackoverflow.com/questions/3825390/effective-way-to-find-any-files-encoding</link>
+        public static Encoding GetEncoding(this string filename)
+        {
+            if (string.IsNullOrEmpty(filename))
+                return Encoding.Default;
+
+            if (!File.Exists(filename))
+                return Encoding.Default;
+
+            // Try to open the file
+            try
+            {
+                FileStream file = File.OpenRead(filename);
+                if (file == null)
+                    return Encoding.Default;
+
+                // Read the BOM
+                var bom = new byte[4];
+                file.Read(bom, 0, 4);
+                file.Dispose();
+
+                // Analyze the BOM
+                if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7;
+                if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8;
+                if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE
+                if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE
+                if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32;
+                return Encoding.Default;
+            }
+            catch
+            {
+                return Encoding.Default;
+            }
+        }
+
+        /// <summary>
+        /// Get the extension from the path, if possible
+        /// </summary>
+        /// <param name="path">Path to get extension from</param>
+        /// <returns>Extension, if possible</returns>
+        public static string GetNormalizedExtension(this string path)
+        {
+            // Check null or empty first
+            if (string.IsNullOrWhiteSpace(path))
+                return null;
+
+            // Get the extension from the path, if possible
+            string ext = Path.GetExtension(path)?.ToLowerInvariant();
+
+            // Check if the extension is null or empty
+            if (string.IsNullOrWhiteSpace(ext))
+                return null;
+
+            // Make sure that extensions are valid
+            ext = ext.TrimStart('.');
+
+            return ext;
+        }
+    
+        /// <summary>
+        /// Get all empty folders within a root folder
+        /// </summary>
+        /// <param name="root">Root directory to parse</param>
+        /// <returns>IEumerable containing all directories that are empty, an empty enumerable if the root is empty, null otherwise</returns>
+        public static List<string> ListEmpty(this string root)
+        {
+            // Check null or empty first
+            if (string.IsNullOrEmpty(root))
+                return null;
+
+            // Then, check if the root exists
+            if (!Directory.Exists(root))
+                return null;
+
+            // If it does and it is empty, return a blank enumerable
+            if (Directory.EnumerateFileSystemEntries(root, "*", SearchOption.AllDirectories).Count() == 0)
+                return new List<string>();
+
+            // Otherwise, get the complete list
+            return Directory.EnumerateDirectories(root, "*", SearchOption.AllDirectories)
+                .Where(dir => Directory.EnumerateFileSystemEntries(dir, "*", SearchOption.AllDirectories).Count() == 0)
+                .ToList();
+        }
+    }
+}
--- a/SabreTools.IO/OptimizedCRC.cs
+++ b/SabreTools.IO/OptimizedCRC.cs
@@ -1,153 +0,0 @@
-/*
- 
- Copyright (c) 2012-2015 Eugene Larchenko (spct@mail.ru)
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- 
-*/
-
-using System;
-
-namespace OptimizedCRC
-{
-    internal class OptimizedCRC : IDisposable
-    {
-        private const uint kCrcPoly = 0xEDB88320;
-        private const uint kInitial = 0xFFFFFFFF;
-        private const int CRC_NUM_TABLES = 8;
-        private static readonly uint[] Table;
-
-        static OptimizedCRC()
-        {
-            unchecked
-            {
-                Table = new uint[256 * CRC_NUM_TABLES];
-                int i;
-                for (i = 0; i < 256; i++)
-                {
-                    uint r = (uint)i;
-                    for (int j = 0; j < 8; j++)
-                    {
-                        r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
-                    }
-                    Table[i] = r;
-                }
-                for (; i < 256 * CRC_NUM_TABLES; i++)
-                {
-                    uint r = Table[i - 256];
-                    Table[i] = Table[r & 0xFF] ^ (r >> 8);
-                }
-            }
-        }
-
-        public uint UnsignedValue;
-
-        public OptimizedCRC()
-        {
-            Init();
-        }
-
-        /// <summary>
-        /// Reset CRC
-        /// </summary>
-        public void Init()
-        {
-            UnsignedValue = kInitial;
-        }
-
-        public int Value
-        {
-            get { return (int)~UnsignedValue; }
-        }
-
-        public void Update(byte[] data, int offset, int count)
-        {
-            new ArraySegment<byte>(data, offset, count);     // check arguments
-            if (count == 0)
-            {
-                return;
-            }
-
-            var table = OptimizedCRC.Table;
-
-            uint crc = UnsignedValue;
-
-            for (; (offset & 7) != 0 && count != 0; count--)
-            {
-                crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]];
-            }
-
-            if (count >= 8)
-            {
-                /*
-                 * Idea from 7-zip project sources (http://7-zip.org/sdk.html)
-                 */
-
-                int end = (count - 8) & ~7;
-                count -= end;
-                end += offset;
-
-                while (offset != end)
-                {
-                    crc ^= (uint)(data[offset] + (data[offset + 1] << 8) + (data[offset + 2] << 16) + (data[offset + 3] << 24));
-                    uint high = (uint)(data[offset + 4] + (data[offset + 5] << 8) + (data[offset + 6] << 16) + (data[offset + 7] << 24));
-                    offset += 8;
-
-                    crc = table[(byte)crc + 0x700]
-                        ^ table[(byte)(crc >>= 8) + 0x600]
-                        ^ table[(byte)(crc >>= 8) + 0x500]
-                        ^ table[/*(byte)*/(crc >> 8) + 0x400]
-                        ^ table[(byte)(high) + 0x300]
-                        ^ table[(byte)(high >>= 8) + 0x200]
-                        ^ table[(byte)(high >>= 8) + 0x100]
-                        ^ table[/*(byte)*/(high >> 8) + 0x000];
-                }
-            }
-
-            while (count-- != 0)
-            {
-                crc = (crc >> 8) ^ table[(byte)crc ^ data[offset++]];
-            }
-
-            UnsignedValue = crc;
-        }
-
-        static public int Compute(byte[] data, int offset, int count)
-        {
-            var crc = new OptimizedCRC();
-            crc.Update(data, offset, count);
-            return crc.Value;
-        }
-
-        static public int Compute(byte[] data)
-        {
-            return Compute(data, 0, data.Length);
-        }
-
-        static public int Compute(ArraySegment<byte> block)
-        {
-            return Compute(block.Array, block.Offset, block.Count);
-        }
-
-        public void Dispose()
-        {
-            UnsignedValue = 0;
-        }
-    }
-}
--- a/SabreTools.IO/ParentablePath.cs
+++ b/SabreTools.IO/ParentablePath.cs
@@ -71,7 +71,7 @@ namespace SabreTools.IO
        public string GetOutputPath(string outDir, bool inplace)
        {
            // First, we need to ensure the output directory
-            outDir = DirectoryExtensions.Ensure(outDir);
+            outDir = outDir.Ensure();

            // Check if we have a split path or not
            bool splitpath = !string.IsNullOrWhiteSpace(ParentPath);
--- a/SabreTools.IO/PathExtensions.cs
+++ b/SabreTools.IO/PathExtensions.cs
@@ -1,141 +0,0 @@
-using System.IO;
-
-using SabreTools.Core;
-
-namespace SabreTools.IO
-{
-    /// <summary>
-    /// Extensions to Path functionality
-    /// </summary>
-    public static class PathExtensions
-    {
-        /// <summary>
-        /// Get the extension from the path, if possible
-        /// </summary>
-        /// <param name="path">Path to get extension from</param>
-        /// <returns>Extension, if possible</returns>
-        public static string GetNormalizedExtension(string path)
-        {
-            // Check null or empty first
-            if (string.IsNullOrWhiteSpace(path))
-                return null;
-
-            // Get the extension from the path, if possible
-            string ext = Path.GetExtension(path)?.ToLowerInvariant();
-
-            // Check if the extension is null or empty
-            if (string.IsNullOrWhiteSpace(ext))
-                return null;
-
-            // Make sure that extensions are valid
-            ext = ext.TrimStart('.');
-
-            return ext;
-        }
-
-        /// <summary>
-        /// Get a proper romba sub path
-        /// </summary>
-        /// <param name="hash">SHA-1 hash to get the path for</param>
-        /// <param name="depth">Positive value representing the depth of the depot</param>
-        /// <returns>Subfolder path for the given hash</returns>
-        public static string GetDepotPath(string hash, int depth)
-        {
-            // If the hash isn't the right size, then we return null
-            if (hash.Length != Constants.SHA1Length)
-                return null;
-
-            // Cap the depth between 0 and 20, for now
-            if (depth < 0)
-                depth = 0;
-            else if (depth > Constants.SHA1ZeroBytes.Length)
-                depth = Constants.SHA1ZeroBytes.Length;
-
-            // Loop through and generate the subdirectory
-            string path = string.Empty;
-            for (int i = 0; i < depth; i++)
-            {
-                path += hash.Substring(i * 2, 2) + Path.DirectorySeparatorChar;
-            }
-
-            // Now append the filename
-            path += $"{hash}.gz";
-            return path;
-        }
-
-        /// <summary>
-        /// Get if the given path has a valid DAT extension
-        /// </summary>
-        /// <param name="path">Path to check</param>
-        /// <returns>True if the extension is valid, false otherwise</returns>
-        public static bool HasValidArchiveExtension(string path)
-        {
-            // Get the extension from the path, if possible
-            string ext = GetNormalizedExtension(path);
-
-            // Check against the list of known archive extensions
-            switch (ext)
-            {
-                // Aaruformat
-                case "aaru":
-                case "aaruf":
-                case "aaruformat":
-                case "aif":
-                case "dicf":
-
-                // Archives
-                case "7z":
-                case "gz":
-                case "lzma":
-                case "rar":
-                case "rev":
-                case "r00":
-                case "r01":
-                case "tar":
-                case "tgz":
-                case "tlz":
-                case "zip":
-                case "zipx":
-
-                // CHD
-                case "chd":
-                    return true;
-                default:
-                    return false;
-            }
-        }
-
-        /// <summary>
-        /// Get if the given path has a valid DAT extension
-        /// </summary>
-        /// <param name="path">Path to check</param>
-        /// <returns>True if the extension is valid, false otherwise</returns>
-        public static bool HasValidDatExtension(string path)
-        {
-            // Get the extension from the path, if possible
-            string ext = GetNormalizedExtension(path);
-
-            // Check against the list of known DAT extensions
-            switch (ext)
-            {
-                case "csv":
-                case "dat":
-                case "json":
-                case "md5":
-                case "ripemd160":
-                case "sfv":
-                case "sha1":
-                case "sha256":
-                case "sha384":
-                case "sha512":
-                case "ssv":
-                case "tsv":
-                case "txt":
-                case "xml":
-                    return true;
-                default:
-                    return false;
-            }
-        }
-    }
-}
--- a/SabreTools.IO/DirectoryExtensions.cs
+++ b/SabreTools.IO/DirectoryExtensions.cs
@@ -1,4 +1,3 @@
-using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
@@ -8,65 +7,10 @@ using NaturalSort;
 namespace SabreTools.IO
 {
    /// <summary>
-    /// Extensions to Directory functionality
+    /// Methods around path operations
    /// </summary>
-    public static class DirectoryExtensions
+    public static class PathTool
    {
-        /// <summary>
-        /// Cleans out the temporary directory
-        /// </summary>
-        /// <param name="dir">Name of the directory to clean out</param>
-        public static void Clean(string dir)
-        {
-            foreach (string file in Directory.EnumerateFiles(dir, "*", SearchOption.TopDirectoryOnly))
-            {
-                try
-                {
-                    if (File.Exists(file))
-                        File.Delete(file);
-                }
-                catch { }
-            }
-
-            foreach (string subdir in Directory.EnumerateDirectories(dir, "*", SearchOption.TopDirectoryOnly))
-            {
-                try
-                {
-                    if (Directory.Exists(subdir))
-                        Directory.Delete(subdir);
-                }
-                catch { }
-            }
-        }
-
-        /// <summary>
-        /// Ensure the output directory is a proper format and can be created
-        /// </summary>
-        /// <param name="dir">Directory to check</param>
-        /// <param name="create">True if the directory should be created, false otherwise (default)</param>
-        /// <param name="temp">True if this is a temp directory, false otherwise</param>
-        /// <returns>Full path to the directory</returns>
-        public static string Ensure(string dir, bool create = false, bool temp = false)
-        {
-            // If the output directory is invalid
-            if (string.IsNullOrWhiteSpace(dir))
-            {
-                if (temp)
-                    dir = Path.GetTempPath();
-                else
-                    dir = Environment.CurrentDirectory;
-            }
-
-            // Get the full path for the output directory
-            dir = Path.GetFullPath(dir);
-
-            // If we're creating the output folder, do so
-            if (create)
-                Directory.CreateDirectory(dir);
-
-            return dir;
-        }
-
        /// <summary>
        /// Retrieve a list of just directories from inputs
        /// </summary>
@@ -222,26 +166,5 @@ namespace SabreTools.IO
            // Return the new list
            return infiles;
        }
-
-        /// <summary>
-        /// Get all empty folders within a root folder
-        /// </summary>
-        /// <param name="root">Root directory to parse</param>
-        /// <returns>IEumerable containing all directories that are empty, an empty enumerable if the root is empty, null otherwise</returns>
-        public static List<string> ListEmpty(string root)
-        {
-            // Check if the root exists first
-            if (!Directory.Exists(root))
-                return null;
-
-            // If it does and it is empty, return a blank enumerable
-            if (Directory.EnumerateFileSystemEntries(root, "*", SearchOption.AllDirectories).Count() == 0)
-                return new List<string>();
-
-            // Otherwise, get the complete list
-            return Directory.EnumerateDirectories(root, "*", SearchOption.AllDirectories)
-                .Where(dir => Directory.EnumerateFileSystemEntries(dir, "*", SearchOption.AllDirectories).Count() == 0)
-                .ToList();
-        }
    }
 }