Bump version

Fix byte array test
Remove Models from references
2026-02-10 05:44:31 +00:00 · 2025-09-22 17:48:18 -04:00 · 2025-09-22 17:44:41 -04:00 · 2025-09-22 11:03:40 -04:00 · 2025-09-22 11:03:07 -04:00 · 2025-09-22 10:55:14 -04:00
28 changed files with 941 additions and 167 deletions
--- a/SabreTools.IO.Test/Extensions/ByteArrayExtensionsTests.cs
+++ b/SabreTools.IO.Test/Extensions/ByteArrayExtensionsTests.cs
@@ -143,27 +143,23 @@ namespace SabreTools.IO.Test.Extensions
            ];
            var actual = arr.ReadStringsFrom(4);
            Assert.NotNull(actual);
-
-            // ASCII and UTF-8 are identical for the character range
            Assert.Equal(2, actual.Count);
        }

        [Fact]
-        public void ReadStringsFrom_UTF8_Filled()
+        public void ReadStringsFrom_Latin1Strings_Filled()
        {
            byte[]? arr =
            [
-                .. Encoding.UTF8.GetBytes("TEST"),
+                .. Encoding.Latin1.GetBytes("TEST"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("TWO"),
+                .. Encoding.Latin1.GetBytes("TWO"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("DATA"),
+                .. Encoding.Latin1.GetBytes("DATA"),
                .. new byte[] { 0x00 },
            ];
            var actual = arr.ReadStringsFrom(4);
            Assert.NotNull(actual);
-
-            // ASCII and UTF-8 are identical for the character range
            Assert.Equal(2, actual.Count);
        }

@@ -195,11 +191,11 @@ namespace SabreTools.IO.Test.Extensions
                .. new byte[] { 0x00 },
                .. Encoding.ASCII.GetBytes("DATA1"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("TEST2"),
+                .. Encoding.Latin1.GetBytes("TEST2"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("TWO2"),
+                .. Encoding.Latin1.GetBytes("TWO2"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("DATA2"),
+                .. Encoding.Latin1.GetBytes("DATA2"),
                .. new byte[] { 0x00 },
                .. Encoding.Unicode.GetBytes("TEST3"),
                .. new byte[] { 0x00 },
@@ -210,8 +206,6 @@ namespace SabreTools.IO.Test.Extensions
            ];
            var actual = arr.ReadStringsFrom(5);
            Assert.NotNull(actual);
-
-            // ASCII and UTF-8 are identical for the character range
            Assert.Equal(6, actual.Count);
        }

@@ -319,6 +313,22 @@ namespace SabreTools.IO.Test.Extensions
            Assert.Equal(2, actual.Count);
        }

+        [Fact]
+        public void ReadStringsWithEncoding_InvalidAsciiChars_Empty()
+        {
+            byte[]? arr =
+            [
+                0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+                0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+                .. Enumerable.Range(0x80, 0x80).Select(i => (byte)i),
+            ];
+            var actual = arr.ReadStringsWithEncoding(1, Encoding.ASCII);
+            Assert.NotNull(actual);
+            Assert.Empty(actual);
+        }
+
        [Fact]
        public void ReadStringsWithEncoding_Latin1_Filled()
        {
@@ -337,6 +347,25 @@ namespace SabreTools.IO.Test.Extensions
            Assert.Equal(2, actual.Count);
        }

+        [Fact]
+        public void ReadStringsWithEncoding_InvalidLatin1Chars_Empty()
+        {
+            byte[]? arr =
+            [
+                0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+                0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+                0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+                0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+                0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+                0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
+            ];
+            var actual = arr.ReadStringsWithEncoding(1, Encoding.Latin1);
+            Assert.NotNull(actual);
+            Assert.Empty(actual);
+        }
+
        [Fact]
        public void ReadStringsWithEncoding_UTF8_Filled()
        {
@@ -355,6 +384,24 @@ namespace SabreTools.IO.Test.Extensions
            Assert.Equal(2, actual.Count);
        }

+        [Fact]
+        public void ReadStringsWithEncoding_InvalidUTF8Chars_Empty()
+        {
+            byte[]? arr =
+            [
+                0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+                0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+                .. Enumerable.Range(0x80, 0x42).Select(i => (byte)i),
+                0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC,
+                0xFD, 0xFE, 0xFF,
+            ];
+            var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF8);
+            Assert.NotNull(actual);
+            Assert.Empty(actual);
+        }
+
        [Fact]
        public void ReadStringsWithEncoding_UTF16_Filled()
        {
@@ -373,6 +420,21 @@ namespace SabreTools.IO.Test.Extensions
            Assert.Equal(2, actual.Count);
        }

+        [Fact]
+        public void ReadStringsWithEncoding_InvalidUTF16Chars_Empty()
+        {
+            byte[]? arr =
+            [
+                0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+                0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+            ];
+            var actual = arr.ReadStringsWithEncoding(1, Encoding.Unicode);
+            Assert.NotNull(actual);
+            Assert.Empty(actual);
+        }
+
        [Fact]
        public void ReadStringsWithEncoding_UTF32_Filled()
        {
@@ -391,6 +453,21 @@ namespace SabreTools.IO.Test.Extensions
            Assert.Equal(2, actual.Count);
        }

+        [Fact]
+        public void ReadStringsWithEncoding_InvalidUTF32Chars_Empty()
+        {
+            byte[]? arr =
+            [
+                0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+                0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+                0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+            ];
+            var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF32);
+            Assert.NotNull(actual);
+            Assert.Empty(actual);
+        }
+
        #endregion
    }
 }
--- a/SabreTools.IO.Test/Extensions/StreamExtensionsTests.cs
+++ b/SabreTools.IO.Test/Extensions/StreamExtensionsTests.cs
@@ -200,29 +200,25 @@ namespace SabreTools.IO.Test.Extensions
            Stream? stream = new MemoryStream(bytes);
            var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
            Assert.NotNull(actual);
-
-            // ASCII and UTF-8 are identical for the character range
-            Assert.Equal(4, actual.Count);
+            Assert.Equal(2, actual.Count);
        }

        [Fact]
-        public void ReadStringsFrom_UTF8_Filled()
+        public void ReadStringsFrom_Latin1Strings_Filled()
        {
            byte[]? bytes =
            [
-                .. Encoding.UTF8.GetBytes("TEST"),
+                .. Encoding.Latin1.GetBytes("TEST"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("TWO"),
+                .. Encoding.Latin1.GetBytes("TWO"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("DATA"),
+                .. Encoding.Latin1.GetBytes("DATA"),
                .. new byte[] { 0x00 },
            ];
            Stream? stream = new MemoryStream(bytes);
            var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
            Assert.NotNull(actual);
-
-            // ASCII and UTF-8 are identical for the character range
-            Assert.Equal(4, actual.Count);
+            Assert.Equal(2, actual.Count);
        }

        [Fact]
@@ -254,11 +250,11 @@ namespace SabreTools.IO.Test.Extensions
                .. new byte[] { 0x00 },
                .. Encoding.ASCII.GetBytes("DATA1"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("TEST2"),
+                .. Encoding.Latin1.GetBytes("TEST2"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("TWO2"),
+                .. Encoding.Latin1.GetBytes("TWO2"),
                .. new byte[] { 0x00 },
-                .. Encoding.UTF8.GetBytes("DATA2"),
+                .. Encoding.Latin1.GetBytes("DATA2"),
                .. new byte[] { 0x00 },
                .. Encoding.Unicode.GetBytes("TEST3"),
                .. new byte[] { 0x00 },
@@ -270,9 +266,7 @@ namespace SabreTools.IO.Test.Extensions
            Stream? stream = new MemoryStream(bytes);
            var actual = stream.ReadStringsFrom(0, bytes.Length, 5);
            Assert.NotNull(actual);
-
-            // ASCII and UTF-8 are identical for the character range
-            Assert.Equal(10, actual.Count);
+            Assert.Equal(6, actual.Count);
        }

        #endregion
--- a/SabreTools.IO.Test/Streams/BufferedStreamTests.cs
+++ b/SabreTools.IO.Test/Streams/BufferedStreamTests.cs
@@ -0,0 +1,40 @@
+using System.IO;
+using Xunit;
+
+namespace SabreTools.IO.Test.Streams
+{
+    public class BufferedStreamTests
+    {
+        #region ReadNextByte
+
+        [Fact]
+        public void ReadNextByte_Empty_Null()
+        {
+            var source = new MemoryStream();
+            var stream = new IO.Streams.BufferedStream(source);
+            byte? actual = stream.ReadNextByte();
+            Assert.Null(actual);
+        }
+
+        [Fact]
+        public void ReadNextByte_Filled_ValidPosition_Byte()
+        {
+            var source = new MemoryStream(new byte[1024]);
+            var stream = new IO.Streams.BufferedStream(source);
+            byte? actual = stream.ReadNextByte();
+            Assert.Equal((byte)0x00, actual);
+        }
+
+        [Fact]
+        public void ReadNextByte_Filled_InvalidPosition_Null()
+        {
+            var source = new MemoryStream(new byte[1024]);
+            source.Seek(0, SeekOrigin.End);
+            var stream = new IO.Streams.BufferedStream(source);
+            byte? actual = stream.ReadNextByte();
+            Assert.Null(actual);
+        }
+
+        #endregion
+    }
+}
--- a/SabreTools.IO/Compression/Deflate/InflateWrapper.cs
+++ b/SabreTools.IO/Compression/Deflate/InflateWrapper.cs
@@ -3,8 +3,6 @@ using System.IO;
 using System.Text;
 using SabreTools.Hashing;
 using SabreTools.IO.Extensions;
-using SabreTools.Models.PKZIP;
-using static SabreTools.Models.PKZIP.Constants;

 namespace SabreTools.IO.Compression.Deflate
 {
@@ -20,6 +18,46 @@ namespace SabreTools.IO.Compression.Deflate
        /// </summary>
        private const int BufferSize = 1024 * 1024;

+        /// <summary>
+        /// Local file header signature
+        /// </summary>
+        private const uint LocalFileHeaderSignature = 0x04034B50;
+
+        #endregion
+
+        #region Private Classes
+
+        /// <summary>
+        /// Minimal PKZIP local file header information
+        /// </summary>
+        private class MinLocalFileHeader
+        {
+            /// <summary>
+            /// Signature (0x04034B50)
+            /// </summary>
+            public uint Signature { get; set; }
+
+            /// <summary>
+            /// CRC-32
+            /// </summary>
+            public uint CRC32 { get; set; }
+
+            /// <summary>
+            /// Compressed size
+            /// </summary>
+            public uint CompressedSize { get; set; }
+
+            /// <summary>
+            /// Uncompressed size
+            /// </summary>
+            public uint UncompressedSize { get; set; }
+
+            /// <summary>
+            /// File name (variable size)
+            /// </summary>
+            public string? FileName { get; set; }
+        }
+
        #endregion

        #region Extraction
@@ -140,7 +178,7 @@ namespace SabreTools.IO.Compression.Deflate
            long current = source.Position;

            // Parse the PKZIP header, if it exists
-            LocalFileHeader? zipHeader = ParseLocalFileHeader(source);
+            MinLocalFileHeader? zipHeader = ParseLocalFileHeader(source);
            long zipHeaderBytes = source.Position - current;

            // Always trust the PKZIP CRC-32 value over what is supplied
@@ -269,46 +307,39 @@ namespace SabreTools.IO.Compression.Deflate
        }

        /// <summary>
-        /// Parse a Stream into a local file header
+        /// Parse a Stream into a minimal local file header
        /// </summary>
        /// <param name="data">Stream to parse</param>
-        /// <returns>Filled local file header on success, null on error</returns>
-        /// <remarks>Mirror of method in Serialization</remarks>
-        private static LocalFileHeader? ParseLocalFileHeader(Stream data)
+        /// <returns>Filled minimal local file header on success, null on error</returns>
+        /// <remarks>Partial mirror of method in Serialization</remarks>
+        private static MinLocalFileHeader? ParseLocalFileHeader(Stream data)
        {
-            var header = new LocalFileHeader();
+            var header = new MinLocalFileHeader();

            header.Signature = data.ReadUInt32LittleEndian();
            if (header.Signature != LocalFileHeaderSignature)
                return null;

-            header.Version = data.ReadUInt16LittleEndian();
-            header.Flags = (GeneralPurposeBitFlags)data.ReadUInt16LittleEndian();
-            header.CompressionMethod = (CompressionMethod)data.ReadUInt16LittleEndian();
-            header.LastModifedFileTime = data.ReadUInt16LittleEndian();
-            header.LastModifiedFileDate = data.ReadUInt16LittleEndian();
+            _ = data.ReadUInt16LittleEndian(); // Version
+            _ = data.ReadUInt16LittleEndian(); // Flags
+            _ = data.ReadUInt16LittleEndian(); // CompressionMethod
+            _ = data.ReadUInt16LittleEndian(); // LastModifedFileTime
+            _ = data.ReadUInt16LittleEndian(); // LastModifiedFileDate
            header.CRC32 = data.ReadUInt32LittleEndian();
            header.CompressedSize = data.ReadUInt32LittleEndian();
            header.UncompressedSize = data.ReadUInt32LittleEndian();
-            header.FileNameLength = data.ReadUInt16LittleEndian();
-            header.ExtraFieldLength = data.ReadUInt16LittleEndian();
+            ushort fileNameLength = data.ReadUInt16LittleEndian();
+            ushort extraFieldLength = data.ReadUInt16LittleEndian();

-            if (header.FileNameLength > 0 && data.Position + header.FileNameLength <= data.Length)
+            if (fileNameLength > 0 && data.Position + fileNameLength <= data.Length)
            {
-                byte[] filenameBytes = data.ReadBytes(header.FileNameLength);
-                if (filenameBytes.Length != header.FileNameLength)
-                    return null;
-
+                byte[] filenameBytes = data.ReadBytes(fileNameLength);
                header.FileName = Encoding.ASCII.GetString(filenameBytes);
            }

            // Parsing extras is skipped here, unlike in Serialization
-            if (header.ExtraFieldLength > 0 && data.Position + header.ExtraFieldLength <= data.Length)
-            {
-                byte[] extraBytes = data.ReadBytes(header.ExtraFieldLength);
-                if (extraBytes.Length != header.ExtraFieldLength)
-                    return null;
-            }
+            if (extraFieldLength > 0 && data.Position + extraFieldLength <= data.Length)
+                _ = data.ReadBytes(extraFieldLength);

            return header;
        }
--- a/SabreTools.IO/Compression/LZX/AlignedOffsetBlockData.cs
+++ b/SabreTools.IO/Compression/LZX/AlignedOffsetBlockData.cs
@@ -0,0 +1,58 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// An aligned offset block is identical to the verbatim block except for the presence of the aligned offset
+    /// tree preceding the other trees.
+    /// </summary>
+    /// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
+    internal class AlignedOffsetBlockData : BlockData
+    {
+        /// <summary>
+        /// Aligned offset tree
+        /// </summary>
+        /// <remarks>8 elements, 3 bits each</remarks>
+        public byte[]? AlignedOffsetTree { get; set; }
+
+        /// <summary>
+        /// Pretree for first 256 elements of main tree
+        /// </summary>
+        /// <remarks>20 elements, 4 bits each</remarks>
+        public byte[]? PretreeFirst256 { get; set; }
+
+        /// <summary>
+        /// Path lengths of first 256 elements of main tree
+        /// </summary>
+        /// <remarks>Encoded using pretree</remarks>
+        public int[]? PathLengthsFirst256 { get; set; }
+
+        /// <summary>
+        /// Pretree for remainder of main tree
+        /// </summary>
+        /// <remarks>20 elements, 4 bits each</remarks>
+        public byte[]? PretreeRemainder { get; set; }
+
+        /// <summary>
+        /// Path lengths of remaining elements of main tree
+        /// </summary>
+        /// <remarks>Encoded using pretree</remarks>
+        public int[]? PathLengthsRemainder { get; set; }
+
+        /// <summary>
+        /// Pretree for length tree
+        /// </summary>
+        /// <remarks>20 elements, 4 bits each</remarks>
+        public byte[]? PretreeLengthTree { get; set; }
+
+        /// <summary>
+        /// Path lengths of elements in length tree
+        /// </summary>
+        /// <remarks>Encoded using pretree</remarks>
+        public int[]? PathLengthsLengthTree { get; set; }
+
+        /// <summary>
+        /// Token sequence (matches and literals)
+        /// </summary>
+        /// <remarks>Variable</remarks>
+        public byte[]? TokenSequence { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/LZX/Block.cs
+++ b/SabreTools.IO/Compression/LZX/Block.cs
@@ -0,0 +1,24 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// An LZXD block represents a sequence of compressed data that is encoded with the same set of
+    /// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
+    /// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
+    /// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
+    /// number of chunks is related to the size of the data being compressed, while the number of blocks is
+    /// related to how well the data is compressed.
+    /// </summary>
+    /// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
+    internal class Block
+    {
+        /// <summary>
+        /// Block header
+        /// </summary>
+        public BlockHeader? Header { get; set; }
+
+        /// <summary>
+        /// Block data
+        /// </summary>
+        public BlockData? BlockData { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/LZX/BlockData.cs
+++ b/SabreTools.IO/Compression/LZX/BlockData.cs
@@ -0,0 +1,8 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
+    internal abstract class BlockData
+    {
+        // No common fields between all block data
+    }
+}
--- a/SabreTools.IO/Compression/LZX/BlockHeader.cs
+++ b/SabreTools.IO/Compression/LZX/BlockHeader.cs
@@ -0,0 +1,33 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// The Block Type field, as specified in section 2.3.1.1, indicates which type of block follows,
+    /// and the Block Size field, as specified in section 2.3.1.2, indicates the number of
+    /// uncompressed bytes represented by the block. Following the generic block
+    /// header is a type-specific header that describes the remainder of the block.
+    /// </summary>
+    /// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
+    internal class BlockHeader
+    {
+        /// <remarks>3 bits</remarks>
+        public BlockType BlockType { get; set; }
+
+        /// <summary>
+        /// Block size is the high 8 bits of 24
+        /// </summary>
+        /// <remarks>8 bits</remarks>
+        public byte BlockSizeMSB { get; set; }
+
+        /// <summary>
+        /// Block size is the middle 8 bits of 24
+        /// </summary>
+        /// <remarks>8 bits</remarks>
+        public byte BlockSizeByte2 { get; set; }
+
+        /// <summary>
+        /// Block size is the low 8 bits of 24
+        /// </summary>
+        /// <remarks>8 bits</remarks>
+        public byte BlocksizeLSB { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/LZX/Chunk.cs
+++ b/SabreTools.IO/Compression/LZX/Chunk.cs
@@ -0,0 +1,25 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
+    /// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
+    /// ensure that an exact number of input bytes represent an exact number of output bytes for each
+    /// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
+    /// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
+    /// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
+    /// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
+    /// incompressible when the chunk is not the last one.
+    /// </summary>
+    internal class Chunk
+    {
+        /// <summary>
+        /// Chunk header
+        /// </summary>
+        public ChunkHeader? Header { get; set; }
+
+        /// <summary>
+        /// Block headers and data
+        /// </summary>
+        public Block[]? Blocks { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/LZX/ChunkHeader.cs
+++ b/SabreTools.IO/Compression/LZX/ChunkHeader.cs
@@ -0,0 +1,46 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
+    /// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
+    /// ensure that an exact number of input bytes represent an exact number of output bytes for each
+    /// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
+    /// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
+    /// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
+    /// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
+    /// incompressible when the chunk is not the last one.
+    /// </summary>
+    internal class ChunkHeader
+    {
+        /// <summary>
+        /// The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
+        /// the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
+        /// 16-bit field. The chunk prefix chain could be followed in the compressed stream without
+        /// decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
+        /// location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
+        /// size.
+        /// </summary>
+        public ushort ChunkSize { get; set; }
+
+        /// <summary>
+        /// The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
+        /// in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
+        /// single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
+        /// 32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
+        /// translation was enabled. Note that E8_file_size is completely independent of the length of the
+        /// uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
+        /// uncompressed data).
+        /// </summary>
+        public byte E8Translation { get; set; }
+
+        /// <summary>
+        /// E8 translation size, high WORD
+        /// </summary>
+        public ushort? TranslationSizeHighWord { get; set; }
+
+        /// <summary>
+        /// E8 translation size, low WORD
+        /// </summary>
+        public ushort? TranslationSizeLowWord { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/LZX/Constants.cs
+++ b/SabreTools.IO/Compression/LZX/Constants.cs
@@ -0,0 +1,38 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    internal static class Constants
+    {
+        /* some constants defined by the LZX specification */
+        public const int LZX_MIN_MATCH = 2;
+        public const int LZX_MAX_MATCH = 257;
+        public const int LZX_NUM_CHARS = 256;
+        public const int LZX_PRETREE_NUM_ELEMENTS = 20;
+
+        /// <summary>
+        /// aligned offset tree #elements
+        /// </summary>
+        public const int LZX_ALIGNED_NUM_ELEMENTS = 8;
+
+        /// <summary>
+        /// this one missing from spec!
+        /// </summary>
+        public const int LZX_NUM_PRIMARY_LENGTHS = 7;
+
+        /// <summary>
+        /// length tree #elements
+        /// </summary>
+        public const int LZX_NUM_SECONDARY_LENGTHS = 249;
+
+        /* LZX huffman defines: tweak tablebits as desired */
+        public const int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
+        public const int LZX_PRETREE_TABLEBITS = 6;
+        public const int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
+        public const int LZX_MAINTREE_TABLEBITS = 12;
+        public const int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
+        public const int LZX_LENGTH_TABLEBITS = 12;
+        public const int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
+        public const int LZX_ALIGNED_TABLEBITS = 7;
+
+        public const int LZX_LENTABLE_SAFETY = 64; /* we allow length table decoding overruns */
+    }
+}
--- a/SabreTools.IO/Compression/LZX/Enums.cs
+++ b/SabreTools.IO/Compression/LZX/Enums.cs
@@ -0,0 +1,48 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// 3-bit block type
+    /// </summary>
+    internal enum BlockType : byte
+    {
+        /// <summary>
+        /// Not valid
+        /// </summary>
+        INVALID_0 = 0b000,
+
+        /// <summary>
+        /// Verbatim block
+        /// </summary>
+        Verbatim = 0b001,
+
+        /// <summary>
+        /// Aligned offset block
+        /// </summary>
+        AlignedOffset = 0b010,
+
+        /// <summary>
+        /// Uncompressed block
+        /// </summary>
+        Uncompressed = 0b011,
+
+        /// <summary>
+        /// Not valid
+        /// </summary>
+        INVALID_4 = 0b100,
+
+        /// <summary>
+        /// Not valid
+        /// </summary>
+        INVALID_5 = 0b101,
+
+        /// <summary>
+        /// Not valid
+        /// </summary>
+        INVALID_6 = 0b110,
+
+        /// <summary>
+        /// Not valid
+        /// </summary>
+        INVALID_7 = 0b111,
+    }
+}
--- a/SabreTools.IO/Compression/LZX/UncompressedBlockData.cs
+++ b/SabreTools.IO/Compression/LZX/UncompressedBlockData.cs
@@ -0,0 +1,54 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// Following the generic block header, an uncompressed block begins with 1 to 16 bits of zero padding
+    /// to align the bit buffer on a 16-bit boundary. At this point, the bitstream ends and a byte stream
+    /// begins. Following the zero padding, new 32-bit values for R0, R1, and R2 are output in little-endian
+    /// form, followed by the uncompressed data bytes themselves. Finally, if the uncompressed data length
+    /// is odd, one extra byte of zero padding is encoded to realign the following bitstream.
+    /// 
+    /// Then the bitstream of byte-swapped 16-bit integers resumes for the next Block Type field (if there
+    /// are subsequent blocks).
+    /// 
+    /// The decoded R0, R1, and R2 values are used as initial repeated offset values to decode the
+    /// subsequent compressed block if present.
+    /// </summary>
+    /// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
+    internal class UncompressedBlockData : BlockData
+    {
+        /// <summary>
+        /// Padding to align following field on 16-bit boundary
+        /// </summary>
+        /// <remarks>Bits have a value of zero</remarks>
+        public ushort PaddingBits { get; set; }
+
+        /// <summary>
+        /// Least significant to most significant byte (little-endian DWORD ([MS-DTYP]))
+        /// </summary>
+        /// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
+        public uint R0 { get; set; }
+
+        /// <summary>
+        /// Least significant to most significant byte (little-endian DWORD)
+        /// </summary>
+        /// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
+        public uint R1 { get; set; }
+
+        /// <summary>
+        /// Least significant to most significant byte (little-endian DWORD)
+        /// </summary>
+        /// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
+        public uint R2 { get; set; }
+
+        /// <summary>
+        /// Can use the direct memcpy function, as specified in [IEEE1003.1]
+        /// </summary>
+        /// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
+        public byte[]? RawDataBytes { get; set; }
+
+        /// <summary>
+        /// Only if uncompressed size is odd
+        /// </summary>
+        public byte AlignmentByte { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/LZX/VerbatimBlockData.cs
+++ b/SabreTools.IO/Compression/LZX/VerbatimBlockData.cs
@@ -0,0 +1,51 @@
+namespace SabreTools.IO.Compression.LZX
+{
+    /// <summary>
+    /// The fields of a verbatim block that follow the generic block header
+    /// </summary>
+    /// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
+    internal class VerbatimBlockData : BlockData
+    {
+        /// <summary>
+        /// Pretree for first 256 elements of main tree
+        /// </summary>
+        /// <remarks>20 elements, 4 bits each</remarks>
+        public byte[]? PretreeFirst256 { get; set; }
+
+        /// <summary>
+        /// Path lengths of first 256 elements of main tree
+        /// </summary>
+        /// <remarks>Encoded using pretree</remarks>
+        public int[]? PathLengthsFirst256 { get; set; }
+
+        /// <summary>
+        /// Pretree for remainder of main tree
+        /// </summary>
+        /// <remarks>20 elements, 4 bits each</remarks>
+        public byte[]? PretreeRemainder { get; set; }
+
+        /// <summary>
+        /// Path lengths of remaining elements of main tree
+        /// </summary>
+        /// <remarks>Encoded using pretree</remarks>
+        public int[]? PathLengthsRemainder { get; set; }
+
+        /// <summary>
+        /// Pretree for length tree
+        /// </summary>
+        /// <remarks>20 elements, 4 bits each</remarks>
+        public byte[]? PretreeLengthTree { get; set; }
+
+        /// <summary>
+        /// Path lengths of elements in length tree
+        /// </summary>
+        /// <remarks>Encoded using pretree</remarks>
+        public int[]? PathLengthsLengthTree { get; set; }
+
+        /// <summary>
+        /// Token sequence (matches and literals)
+        /// </summary>
+        /// <remarks>Variable</remarks>
+        public byte[]? TokenSequence { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/MSZIP/BlockHeader.cs
+++ b/SabreTools.IO/Compression/MSZIP/BlockHeader.cs
@@ -0,0 +1,28 @@
+namespace SabreTools.IO.Compression.MSZIP
+{
+    /// <summary>
+    /// Each MSZIP block MUST consist of a 2-byte MSZIP signature and one or more RFC 1951 blocks. The
+    /// 2-byte MSZIP signature MUST consist of the bytes 0x43 and 0x4B. The MSZIP signature MUST be
+    /// the first 2 bytes in the MSZIP block. The MSZIP signature is shown in the following packet diagram.
+    /// 
+    /// Each MSZIP block is the result of a single deflate compression operation, as defined in [RFC1951].
+    /// The compressor that performs the compression operation MUST generate one or more RFC 1951
+    /// blocks, as defined in [RFC1951]. The number, deflation mode, and type of RFC 1951 blocks in each
+    /// MSZIP block is determined by the compressor, as defined in [RFC1951]. The last RFC 1951 block in
+    /// each MSZIP block MUST be marked as the "end" of the stream(1), as defined by [RFC1951]
+    /// section 3.2.3. Decoding trees MUST be discarded after each RFC 1951 block, but the history buffer
+    /// MUST be maintained.Each MSZIP block MUST represent no more than 32 KB of uncompressed data.
+    /// 
+    /// The maximum compressed size of each MSZIP block is 32 KB + 12 bytes. This enables the MSZIP
+    /// block to contain 32 KB of data split between two noncompressed RFC 1951 blocks, each of which
+    /// has a value of BTYPE = 00.
+    /// </summary>
+    /// <see href="https://interoperability.blob.core.windows.net/files/MS-MCI/%5bMS-MCI%5d.pdf"/>
+    internal class BlockHeader
+    {
+        /// <summary>
+        /// 'CK'
+        /// </summary>
+        public ushort Signature { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/MSZIP/Decompressor.cs
+++ b/SabreTools.IO/Compression/MSZIP/Decompressor.cs
@@ -1,7 +1,6 @@
 using System;
 using System.IO;
 using SabreTools.IO.Extensions;
-using SabreTools.Models.Compression.MSZIP;

 namespace SabreTools.IO.Compression.MSZIP
 {
--- a/SabreTools.IO/Compression/Quantum/Constants.cs
+++ b/SabreTools.IO/Compression/Quantum/Constants.cs
@@ -0,0 +1,50 @@
+namespace SabreTools.IO.Compression.Quantum
+{
+    /// <see href="http://www.russotto.net/quantumcomp.html"/>
+    internal static class Constants
+    {
+        public static readonly int[] PositionSlot =
+        [
+            0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c,
+            0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0,
+            0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00,
+            0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000,
+            0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000,
+            0x100000, 0x180000
+        ];
+
+        public static readonly int[] PositionExtraBits =
+        [
+            0,  0,  0,  0,  1,  1,  2,  2,
+            3,  3,  4,  4,  5,  5,  6,  6,
+            7,  7,  8,  8,  9,  9, 10, 10,
+            11, 11, 12, 12, 13, 13, 14, 14,
+            15, 15, 16, 16, 17, 17, 18, 18,
+            19, 19
+        ];
+
+        public static readonly int[] LengthSlot =
+        [
+            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
+            0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26,
+            0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e,
+            0xbe, 0xde, 0xfe
+        ];
+
+        public static readonly int[] LengthExtraBits =
+        [
+            0,  0,  0,  0,  0,  0,  1,  1,
+            1,  1,  2,  2,  2,  2,  3,  3,
+            3,  3,  4,  4,  4,  4,  5,  5,
+            5,  5,  0
+        ];
+
+        /// <summary>
+        /// Number of position slots for (tsize - 10)
+        /// </summary>
+        public static readonly int[] NumPositionSlots =
+        [
+            20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42
+        ];
+    }
+}
--- a/SabreTools.IO/Compression/Quantum/Decompressor.cs
+++ b/SabreTools.IO/Compression/Quantum/Decompressor.cs
@@ -2,8 +2,7 @@ using System;
 using System.Collections.Generic;
 using System.IO;
 using SabreTools.IO.Streams;
-using SabreTools.Models.Compression.Quantum;
-using static SabreTools.Models.Compression.Quantum.Constants;
+using static SabreTools.IO.Compression.Quantum.Constants;

 namespace SabreTools.IO.Compression.Quantum
 {
--- a/SabreTools.IO/Compression/Quantum/Enums.cs
+++ b/SabreTools.IO/Compression/Quantum/Enums.cs
@@ -0,0 +1,45 @@
+namespace SabreTools.IO.Compression.Quantum
+{
+    internal enum SelectorModel
+    {
+        /// <summary>
+        /// Literal model, 64 entries, start at symbol 0
+        /// </summary>
+        SELECTOR_0 = 0,
+
+        /// <summary>
+        /// Literal model, 64 entries, start at symbol 64
+        /// </summary>
+        SELECTOR_1 = 1,
+
+        /// <summary>
+        /// Literal model, 64 entries, start at symbol 128
+        /// </summary>
+        SELECTOR_2 = 2,
+
+        /// <summary>
+        /// Literal model, 64 entries, start at symbol 192
+        /// </summary>
+        SELECTOR_3 = 3,
+
+        /// <summary>
+        /// LZ model, 3 character matches, max 24 entries, start at symbol 0
+        /// </summary>
+        SELECTOR_4 = 4,
+
+        /// <summary>
+        /// LZ model, 4 character matches, max 36 entries, start at symbol 0
+        /// </summary>
+        SELECTOR_5 = 5,
+
+        /// <summary>
+        /// LZ model, 5+ character matches, max 42 entries, start at symbol 0
+        /// </summary>
+        SELECTOR_6_POSITION = 6,
+
+        /// <summary>
+        /// LZ model, 5+ character matches, 27 entries, start at symbol 0
+        /// </summary>
+        SELECTOR_6_LENGTH = 7,
+    }
+}
--- a/SabreTools.IO/Compression/Quantum/Model.cs
+++ b/SabreTools.IO/Compression/Quantum/Model.cs
@@ -0,0 +1,24 @@
+namespace SabreTools.IO.Compression.Quantum
+{
+    /// <see href="http://www.russotto.net/quantumcomp.html"/>
+    internal sealed class Model
+    {
+        public int Entries { get; set; }
+
+        /// <remarks>
+        /// All the models are initialized with the symbols in symbol
+        /// order in the table, and with every symbol in the table
+        /// having a frequency of 1
+        /// </remarks>
+        public ModelSymbol[]? Symbols { get; set; }
+
+        /// <remarks>
+        /// The initial total frequency is equal to the number of entries
+        /// in the table
+        /// </remarks>
+        public int TotalFrequency { get; set; }
+
+        /// <remarks>The initial time_to_reorder value is 4</remarks>
+        public int TimeToReorder { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/Quantum/ModelSymbol.cs
+++ b/SabreTools.IO/Compression/Quantum/ModelSymbol.cs
@@ -0,0 +1,15 @@
+namespace SabreTools.IO.Compression.Quantum
+{
+    /// <see href="http://www.russotto.net/quantumcomp.html"/>
+    internal sealed class ModelSymbol
+    {
+        public ushort Symbol { get; set; }
+
+        /// <summary>
+        /// The cumulative frequency is the frequency of all the symbols
+        /// which are at a higher index in the table than that symbol —
+        /// thus the last entry in the table has a cumulative frequency of 0.
+        /// </summary>
+        public ushort CumulativeFrequency { get; set; }
+    }
+}
--- a/SabreTools.IO/Compression/SZDD/Decompressor.cs
+++ b/SabreTools.IO/Compression/SZDD/Decompressor.cs
@@ -1,6 +1,5 @@
 using System;
 using System.IO;
-using SabreTools.Models.LZ;

 namespace SabreTools.IO.Compression.SZDD
 {
@@ -15,7 +14,7 @@ namespace SabreTools.IO.Compression.SZDD
        /// <summary>
        /// Source stream for the decompressor
        /// </summary>
-        private readonly BufferedStream _source;
+        private readonly Streams.BufferedStream _source;

        /// <summary>
        /// SZDD format being decompressed
@@ -37,19 +36,19 @@ namespace SabreTools.IO.Compression.SZDD

            // Initialize the window with space characters
            _window = Array.ConvertAll(_window, b => (byte)0x20);
-            _source = new BufferedStream(source);
+            _source = new Streams.BufferedStream(source);
        }

        /// <summary>
        /// Create a KWAJ decompressor
        /// </summary>
-        public static Decompressor CreateKWAJ(byte[] source, KWAJCompressionType compressionType)
+        public static Decompressor CreateKWAJ(byte[] source, ushort compressionType)
            => CreateKWAJ(new MemoryStream(source), compressionType);

        /// <summary>
        /// Create a KWAJ decompressor
        /// </summary>
-        public static Decompressor CreateKWAJ(Stream source, KWAJCompressionType compressionType)
+        public static Decompressor CreateKWAJ(Stream source, ushort compressionType)
        {
            // Create the decompressor
            var decompressor = new Decompressor(source);
@@ -57,11 +56,11 @@ namespace SabreTools.IO.Compression.SZDD
            // Set the format and return
            decompressor._format = compressionType switch
            {
-                KWAJCompressionType.NoCompression => Format.KWAJNoCompression,
-                KWAJCompressionType.NoCompressionXor => Format.KWAJXor,
-                KWAJCompressionType.QBasic => Format.KWAJQBasic,
-                KWAJCompressionType.LZH => Format.KWAJLZH,
-                KWAJCompressionType.MSZIP => Format.KWAJMSZIP,
+                0x0000 => Format.KWAJNoCompression,
+                0x0001 => Format.KWAJXor,
+                0x0002 => Format.KWAJQBasic,
+                0x0003 => Format.KWAJLZH,
+                0x0004 => Format.KWAJMSZIP,
                _ => throw new IndexOutOfRangeException(nameof(source)),
            };
            return decompressor;
@@ -229,77 +228,5 @@ namespace SabreTools.IO.Compression.SZDD
            dest.Flush();
            return true;
        }
-
-        /// <summary>
-        /// Buffered stream that reads in blocks
-        /// </summary>
-        private class BufferedStream
-        {
-            /// <summary>
-            /// Source stream for populating the buffer
-            /// </summary>
-            private readonly Stream _source;
-
-            /// <summary>
-            /// Internal buffer to read
-            /// </summary>
-            private readonly byte[] _buffer = new byte[2048];
-
-            /// <summary>
-            /// Current pointer into the buffer
-            /// </summary>
-            private int _bufferPtr = 0;
-
-            /// <summary>
-            /// Represents the number of available bytes
-            /// </summary>
-            private int _available = -1;
-
-            /// <summary>
-            /// Create a new buffered stream
-            /// </summary>
-            public BufferedStream(Stream source)
-            {
-                _source = source;
-            }
-
-            /// <summary>
-            /// Read the next byte from the buffer, if possible
-            /// </summary>
-            public byte? ReadNextByte()
-            {
-                // Ensure the buffer first
-                if (!EnsureBuffer())
-                    return null;
-
-                // Return the next available value
-                return _buffer[_bufferPtr++];
-            }
-
-            /// <summary>
-            /// Ensure the buffer has data to read
-            /// </summary>
-            private bool EnsureBuffer()
-            {
-                // Force an update if in the initial state
-                if (_available == -1)
-                {
-                    _available = _source.Read(_buffer, 0, _buffer.Length);
-                    _bufferPtr = 0;
-                    return _available != 0;
-                }
-
-                // If the pointer is out of range
-                if (_bufferPtr >= _available)
-                {
-                    _available = _source.Read(_buffer, 0, _buffer.Length);
-                    _bufferPtr = 0;
-                    return _available != 0;
-                }
-
-                // Otherwise, assume data is available
-                return true;
-            }
-        }
    }
 }
--- a/SabreTools.IO/Encryption/MoPaQDecrypter.cs
+++ b/SabreTools.IO/Encryption/MoPaQDecrypter.cs
@@ -2,7 +2,6 @@ using System;
 using System.IO;
 using SabreTools.Hashing;
 using SabreTools.Matching;
-using static SabreTools.Models.MoPaQ.Constants;

 namespace SabreTools.IO.Encryption
 {
@@ -11,6 +10,14 @@ namespace SabreTools.IO.Encryption
    /// </summary>
    public class MoPaQDecrypter
    {
+        #region Constants
+
+        private const uint MPQ_HASH_KEY2_MIX = 0x400;
+
+        private const uint STORM_BUFFER_SIZE = 0x500;
+
+        #endregion
+
        #region Private Instance Variables

        /// <summary>
--- a/SabreTools.IO/Extensions/ByteArrayExtensions.cs
+++ b/SabreTools.IO/Extensions/ByteArrayExtensions.cs
@@ -59,14 +59,24 @@ namespace SabreTools.IO.Extensions
        /// </summary>
        /// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
        /// <returns>String list containing the requested data, null on error</returns>
+#if NET5_0_OR_GREATER
+        /// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
+#else
+        /// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
+#endif
        public static List<string>? ReadStringsFrom(this byte[]? input, int charLimit = 5)
        {
            // Validate the data
            if (input == null || input.Length == 0)
                return null;

+#if NET5_0_OR_GREATER
+            // Check for Latin1 strings
+            var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Latin1);
+#else
            // Check for ASCII strings
            var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
+#endif

            // Check for Unicode strings
            // We are limiting the check for Unicode characters with a second byte of 0x00 for now
@@ -101,13 +111,17 @@ namespace SabreTools.IO.Extensions

            // Short-circuit for some encoding types
            if (encoding.CodePage == Encoding.ASCII.CodePage)
-                return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.ASCII, 1);
+                return bytes.ReadAsciiStrings(charLimit);
 #if NET5_0_OR_GREATER
            else if (encoding.CodePage == Encoding.Latin1.CodePage)
                return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Latin1, 1);
 #endif
+            else if (encoding.IsSingleByte)
+                return bytes.ReadFixedWidthEncodingStrings(charLimit, encoding, 1);
            else if (encoding.CodePage == Encoding.Unicode.CodePage)
                return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Unicode, 2);
+            else if (encoding.CodePage == Encoding.BigEndianUnicode.CodePage)
+                return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.BigEndianUnicode, 2);
            else if (encoding.CodePage == Encoding.UTF32.CodePage)
                return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.UTF32, 4);

@@ -133,7 +147,7 @@ namespace SabreTools.IO.Extensions
                char c = (char)reader.Read();

                // If the character is invalid
-                if (char.IsControl(c) || (c & 0xFF00) != 0)
+                if (char.IsControl(c) || (c & 0xFFFFFF00) != 0)
                {
                    // Seek to the end of the last found string
                    string str = sb.ToString();
@@ -141,6 +155,10 @@ namespace SabreTools.IO.Extensions
                    ms.Seek(lastOffset, SeekOrigin.Begin);
                    reader.DiscardBufferedData();

+                    // If there is no cached string
+                    if (str.Length == 0)
+                        continue;
+
                    // Add the string if long enough
                    if (str.Length >= charLimit)
                        strings.Add(str);
@@ -209,10 +227,77 @@ namespace SabreTools.IO.Extensions
                    // Pretend only one byte was read
                    offset -= width - 1;

+                    // If there is no cached string
+                    if (sb.Length == 0)
+                        continue;
+
                    // Add the string if long enough
-                    string str = sb.ToString();
-                    if (str.Length >= charLimit)
-                        strings.Add(str);
+                    if (sb.Length >= charLimit)
+                        strings.Add(sb.ToString());
+
+                    // Clear the builder and continue
+#if NET20 || NET35
+                    sb = new();
+#else
+                    sb.Clear();
+#endif
+                    continue;
+                }
+
+                // Otherwise, add the character to the builder and continue
+                sb.Append(c);
+            }
+
+            // Handle any remaining data
+            if (sb.Length >= charLimit)
+                strings.Add(sb.ToString());
+
+            return strings;
+        }
+
+        /// <summary>
+        /// Read string data from a byte array using ASCII encoding
+        /// </summary>
+        /// <param name="bytes">Byte array representing the source data</param>
+        /// <param name="charLimit">Number of characters needed to be a valid string</param>
+        /// <returns>String list containing the requested data, empty on error</returns>
+        /// <remarks>Handling for 7-bit ASCII needs to be done differently than other fixed-width encodings</remarks>
+#if NET20
+        private static List<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
+#else
+        private static HashSet<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
+#endif
+        {
+            if (charLimit <= 0 || charLimit > bytes.Length)
+                return [];
+
+            // Create the string set to return
+#if NET20
+            var strings = new List<string>();
+#else
+            var strings = new HashSet<string>();
+#endif
+
+            // Create a string builder for the loop
+            var sb = new StringBuilder();
+
+            // Check for strings
+            int offset = 0;
+            while (offset < bytes.Length)
+            {
+                // Read the next character from the stream
+                char c = bytes.ReadChar(ref offset);
+
+                // If the character is invalid
+                if (char.IsControl(c) || c > 0x7F)
+                {
+                    // If there is no cached string
+                    if (sb.Length == 0)
+                        continue;
+
+                    // Add the string if long enough
+                    if (sb.Length >= charLimit)
+                        strings.Add(sb.ToString());

                    // Clear the builder and continue
 #if NET20 || NET35
--- a/SabreTools.IO/Extensions/ByteArrayReaderExtensions.cs
+++ b/SabreTools.IO/Extensions/ByteArrayReaderExtensions.cs
@@ -1065,6 +1065,10 @@ namespace SabreTools.IO.Extensions
        /// </summary>
        private static byte[] ReadExactlyToBuffer(byte[] content, ref int offset, int length)
        {
+            // If we have an invalid offset
+            if (offset < 0 || offset >= content.Length)
+                throw new ArgumentOutOfRangeException($"{nameof(offset)} must be between 0 and {content.Length}, {offset} provided");
+
            // If we have an invalid length
            if (length < 0)
                throw new ArgumentOutOfRangeException($"{nameof(length)} must be 0 or a positive value, {length} requested");
--- a/SabreTools.IO/Extensions/StreamExtensions.cs
+++ b/SabreTools.IO/Extensions/StreamExtensions.cs
@@ -1,6 +1,5 @@
 using System.Collections.Generic;
 using System.IO;
-using System.Text;

 namespace SabreTools.IO.Extensions
 {
@@ -76,10 +75,15 @@ namespace SabreTools.IO.Extensions
        /// <summary>
        /// Read string data from a Stream
        /// </summary>
+        /// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
        /// <param name="position">Position in the source to read from</param>
        /// <param name="length">Length of the requested data</param>
-        /// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
        /// <returns>String list containing the requested data, null on error</returns>
+#if NET5_0_OR_GREATER
+        /// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
+#else
+        /// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
+#endif
        public static List<string>? ReadStringsFrom(this Stream? input, int position, int length, int charLimit = 5)
        {
            // Read the data as a byte array first
@@ -87,23 +91,7 @@ namespace SabreTools.IO.Extensions
            if (data == null)
                return null;

-            // Check for ASCII strings
-            var asciiStrings = data.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
-
-            // Check for UTF-8 strings
-            // We are limiting the check for Unicode characters with a second byte of 0x00 for now
-            var utf8Strings = data.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
-
-            // Check for Unicode strings
-            // We are limiting the check for Unicode characters with a second byte of 0x00 for now
-            var unicodeStrings = data.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
-
-            // Ignore duplicate strings across encodings
-            List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
-
-            // Sort the strings and return
-            sourceStrings.Sort();
-            return sourceStrings;
+            return data.ReadStringsFrom(charLimit);
        }

        /// <summary>
--- a/SabreTools.IO/SabreTools.IO.csproj
+++ b/SabreTools.IO/SabreTools.IO.csproj
@@ -11,7 +11,7 @@
        <SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
        <SymbolPackageFormat>snupkg</SymbolPackageFormat>
        <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
-        <Version>1.7.3</Version>
+        <Version>1.7.4</Version>

        <!-- Package Properties -->
        <Authors>Matt Nadareski</Authors>
@@ -31,7 +31,6 @@

    <ItemGroup>
        <PackageReference Include="SabreTools.Matching" Version="1.6.0" />
-        <PackageReference Include="SabreTools.Models" Version="1.7.1" />
        <PackageReference Include="SabreTools.Hashing" Version="1.5.0" />
    </ItemGroup>

--- a/SabreTools.IO/Streams/BufferedStream.cs
+++ b/SabreTools.IO/Streams/BufferedStream.cs
@@ -0,0 +1,77 @@
+using System.IO;
+
+namespace SabreTools.IO.Streams
+{
+    /// <summary>
+    /// Buffered stream that reads in blocks
+    /// </summary>
+    /// <remarks>Not a true <see cref="Stream"/> implementation yet</remarks> 
+    public class BufferedStream
+    {
+        /// <summary>
+        /// Source stream for populating the buffer
+        /// </summary>
+        private readonly Stream _source;
+
+        /// <summary>
+        /// Internal buffer to read
+        /// </summary>
+        private readonly byte[] _buffer = new byte[2048];
+
+        /// <summary>
+        /// Current pointer into the buffer
+        /// </summary>
+        private int _bufferPtr = 0;
+
+        /// <summary>
+        /// Represents the number of available bytes
+        /// </summary>
+        private int _available = -1;
+
+        /// <summary>
+        /// Create a new buffered stream
+        /// </summary>
+        public BufferedStream(Stream source)
+        {
+            _source = source;
+        }
+
+        /// <summary>
+        /// Read the next byte from the buffer, if possible
+        /// </summary>
+        public byte? ReadNextByte()
+        {
+            // Ensure the buffer first
+            if (!EnsureBuffer())
+                return null;
+
+            // Return the next available value
+            return _buffer[_bufferPtr++];
+        }
+
+        /// <summary>
+        /// Ensure the buffer has data to read
+        /// </summary>
+        private bool EnsureBuffer()
+        {
+            // Force an update if in the initial state
+            if (_available == -1)
+            {
+                _available = _source.Read(_buffer, 0, _buffer.Length);
+                _bufferPtr = 0;
+                return _available != 0;
+            }
+
+            // If the pointer is out of range
+            if (_bufferPtr >= _available)
+            {
+                _available = _source.Read(_buffer, 0, _buffer.Length);
+                _bufferPtr = 0;
+                return _available != 0;
+            }
+
+            // Otherwise, assume data is available
+            return true;
+        }
+    }
+}
Author	SHA1	Message	Date
Matt Nadareski	5054aeb077	Bump version	2025-09-22 17:48:18 -04:00
Matt Nadareski	d2e9b8d6e5	Fix byte array test	2025-09-22 17:44:41 -04:00
Matt Nadareski	2c29aee834	Remove Models from references	2025-09-22 11:03:40 -04:00
Matt Nadareski	576bafcb87	Create minimal model for InflateWrapper	2025-09-22 11:03:07 -04:00
Matt Nadareski	2b310ac528	SZDD no longer uses models	2025-09-22 10:55:14 -04:00
Matt Nadareski	4f6b6d7b59	Reduce Models use another notch	2025-09-22 10:52:26 -04:00
Matt Nadareski	17e55ee233	Move BufferedStream out of SZDD	2025-09-22 10:50:53 -04:00
Matt Nadareski	8b78906d1d	Move MoPaQ encryption constants from Models	2025-09-22 10:37:27 -04:00
Matt Nadareski	cff2dcf4cc	Move LZX models from Models	2025-09-22 10:35:04 -04:00
Matt Nadareski	a56942cb73	Move Quantum compression models from Models	2025-09-22 10:31:14 -04:00
Matt Nadareski	5ed661b77c	Move MSZIP "model" from Models	2025-09-22 10:27:03 -04:00
Matt Nadareski	a0a0cd0386	Add more complete UTF-8 first-byte tests	2025-09-21 16:34:46 -04:00
Matt Nadareski	bcc0fca4ad	Ensure 7-bit ASCII never reads above 0x7F	2025-09-21 16:27:03 -04:00
Matt Nadareski	843e821e5f	Use extended check in slow path too	2025-09-21 16:07:20 -04:00
Matt Nadareski	630b01283e	Latin1 instead of ASCII for .NET 5.0 and beyond	2025-09-21 15:12:16 -04:00
Matt Nadareski	22abb96013	Add remarks about what encodings are used	2025-09-21 15:02:33 -04:00
Matt Nadareski	314de12661	Fix tests, remove UTF-8 checks from irrelevant places	2025-09-21 14:03:45 -04:00
Matt Nadareski	a0b24031b5	Remove duplicate code from Stream implementation	2025-09-21 13:58:46 -04:00
Matt Nadareski	b4628485c3	Sync stream implementation with byte one	2025-09-21 13:58:13 -04:00
Matt Nadareski	4610ddc9b9	Don't read the string unless it's long enough	2025-09-21 13:53:16 -04:00
Matt Nadareski	e392ddc8d7	Fix code formatting	2025-09-21 13:52:05 -04:00
Matt Nadareski	1908d1b32e	More generically support single-byte encodings	2025-09-21 13:50:08 -04:00
Matt Nadareski	9d73195f86	Big-endian unicode support because it's there	2025-09-21 13:42:05 -04:00
Matt Nadareski	335a486f17	Special handling of empty string builders	2025-09-21 13:41:06 -04:00
Matt Nadareski	d3e41ac187	Handle invalid offsets in byte array extensions	2025-09-21 11:43:07 -04:00