Start writing Inflate implementation

2026-04-26 08:10:32 +00:00 · 2022-12-14 10:55:56 -08:00
parent aaee56f44e
commit d715072cbc
1 changed files with 790 additions and 141 deletions
--- a/BurnOutSharp/FileType/MicrosoftCAB.MSZIP.cs
+++ b/BurnOutSharp/FileType/MicrosoftCAB.MSZIP.cs
@@ -1,4 +1,7 @@
-using BurnOutSharp.Tools;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using BurnOutSharp.Tools;

 /// <see href="https://interoperability.blob.core.windows.net/files/MS-MCI/%5bMS-MCI%5d.pdf"/>
 /// <see href="https://www.rfc-editor.org/rfc/rfc1951"/>
@@ -81,7 +84,7 @@ namespace BurnOutSharp.FileType
    /// <summary>
    /// How the data are compressed
    /// </summary>
-    public enum DeflateCompressionType : byte
+    public enum MSZIPDeflateCompressionType : byte
    {
        /// <summary>
        /// no compression
@@ -104,173 +107,819 @@ namespace BurnOutSharp.FileType
        Reserved = 0b11,
    }

+    public class MSZIPDeflateStream
+    {
+        #region Instance Variables
+
+        /// <summary>
+        /// Original data source to read from
+        /// </summary>
+        private System.IO.Stream _dataStream = null;
+
+        /// <summary>
+        /// Current rolling buffer
+        /// </summary>
+        private byte[] _buffer = null;
+
+        /// <summary>
+        /// Current position in the buffer
+        /// </summary>
+        private int _bufferPointer = -1;
+
+        /// <summary>
+        /// Bit buffer to read bits from when necessary
+        /// </summary>
+        private BitArray _bitBuffer = null;
+
+        /// <summary>
+        /// Number of bits left in the buffer
+        /// </summary>
+        private int _bitsLeft = 0;
+
+        #endregion
+
+        /// <summary>
+        /// Constructor
+        /// </summary>
+        public MSZIPDeflateStream(System.IO.Stream dataStream)
+        {
+            _dataStream = dataStream;
+        }
+
+        /// <summary>
+        /// Read between 0 and 64 bits of data from the stream assuming LSB
+        /// </summary>
+        /// <exception cref="ArgumentOutOfRangeException"></exception>
+        public ulong ReadBitsLSB(int numBits)
+        {
+            // If we are reading an invalid number of bits
+            if (numBits < 0 || numBits > 64)
+                throw new ArgumentOutOfRangeException();
+
+            // Allocate the bit buffer
+            ulong bitBuffer = 0;
+
+            // If the bit buffer has the right number remaining
+            if (_bitsLeft >= numBits)
+            {
+                for (int i = 0; i < numBits; i++)
+                {
+                    bitBuffer |= _bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1u : 0;
+                    bitBuffer <<= 1;
+                }
+
+                return bitBuffer;
+            }
+
+            // Otherwise, we need to read what we can
+            int bitsRemaining = _bitsLeft;
+            for (int i = 0; i < bitsRemaining; i++)
+            {
+                bitBuffer |= _bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1u : 0;
+                bitBuffer <<= 1;
+            }
+
+            // Fill the bit buffer, if possible
+            FillBitBuffer();
+
+            // If we couldn't read anything, throw an exception
+            if (_buffer == null)
+                throw new IndexOutOfRangeException();
+
+            // Otherwise, read in the remaining bits needed
+            for (int i = 0; i < bitsRemaining; i++)
+            {
+                bitBuffer |= _bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1u : 0;
+                bitBuffer <<= 1;
+            }
+
+            return bitBuffer;
+        }
+
+        /// <summary>
+        /// Read between 0 and 64 bits of data from the stream assuming MSB
+        /// </summary>
+        /// <exception cref="ArgumentOutOfRangeException"></exception>
+        public ulong ReadBitsMSB(int numBits)
+        {
+            // If we are reading an invalid number of bits
+            if (numBits < 0 || numBits > 64)
+                throw new ArgumentOutOfRangeException();
+
+            // Allocate the bit buffer
+            ulong bitBuffer = 0;
+
+            // If the bit buffer has the right number remaining
+            if (_bitsLeft >= numBits)
+            {
+                for (int i = 0; i < numBits; i++)
+                {
+                    bitBuffer |= _bitBuffer[i + _bitsLeft--] ? 1u : 0;
+                    bitBuffer <<= 1;
+                }
+
+                return bitBuffer;
+            }
+
+            // Otherwise, we need to read what we can
+            int bitsRemaining = _bitsLeft;
+            for (int i = 0; i < bitsRemaining; i++)
+            {
+                bitBuffer |= _bitBuffer[i + _bitsLeft--] ? 1u : 0;
+                bitBuffer <<= 1;
+            }
+
+            // Fill the bit buffer, if possible
+            FillBitBuffer();
+
+            // If we couldn't read anything, throw an exception
+            if (_buffer == null)
+                throw new IndexOutOfRangeException();
+
+            // Otherwise, read in the remaining bits needed
+            for (int i = 0; i < bitsRemaining; i++)
+            {
+                bitBuffer |= _bitBuffer[i + _bitsLeft--] ? 1u : 0;
+                bitBuffer <<= 1;
+            }
+
+            return bitBuffer;
+        }
+
+        /// <summary>
+        /// Read more than 0 bytes of data from the stream assuming LSB
+        /// </summary>
+        public byte[] ReadBytesLSB(int numBytes)
+        {
+            // If we are reading an invalid number of bytes
+            if (numBytes < 0)
+                throw new ArgumentOutOfRangeException();
+
+            // Allocate the byte buffer
+            byte[] byteBuffer = new byte[numBytes];
+            int byteBufferPtr = 0;
+
+            // If the bit buffer has the right number remaining
+            if (_bitsLeft >= numBytes * 8)
+            {
+                byte fullBitBuffer = 0;
+                for (int i = 0; i < numBytes * 8; i++)
+                {
+                    fullBitBuffer |= (byte)(_bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1 : 0);
+                    if (i % 8 == 7)
+                    {
+                        byteBuffer[byteBufferPtr++] = fullBitBuffer;
+                        fullBitBuffer = 0;
+                    }
+                    else
+                    {
+                        fullBitBuffer <<= 1;
+                    }
+                }
+
+                byteBuffer[byteBufferPtr++] = fullBitBuffer;
+                return byteBuffer;
+            }
+
+            // Otherwise, we need to read what we can
+            int bitsRemaining = _bitsLeft;
+
+            byte bitBuffer = 0;
+            for (int i = 0; i < numBytes * 8; i++)
+            {
+                bitBuffer |= (byte)(_bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1 : 0);
+                if (i % 8 == 7)
+                {
+                    byteBuffer[byteBufferPtr++] = bitBuffer;
+                    bitBuffer = 0;
+                }
+                else
+                {
+                    bitBuffer <<= 1;
+                }
+            }
+
+            // Fill the bit buffer, if possible
+            FillBitBuffer();
+
+            // If we couldn't read anything, throw an exception
+            if (_buffer == null)
+                throw new IndexOutOfRangeException();
+
+            // Otherwise, read in the remaining bits needed
+            for (int i = 0; i < bitsRemaining; i++)
+            {
+                bitBuffer |= (byte)(_bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1 : 0);
+                if (i % 8 == 7)
+                {
+                    byteBuffer[byteBufferPtr++] = bitBuffer;
+                    bitBuffer = 0;
+                }
+                else
+                {
+                    bitBuffer <<= 1;
+                }
+            }
+
+            byteBuffer[byteBufferPtr++] = bitBuffer;
+            return byteBuffer;
+        }
+
+        /// <summary>
+        /// Read more than 0 bytes of data from the stream assuming MSB
+        /// </summary>
+        public byte[] ReadBytesMSB(int numBytes)
+        {
+            // If we are reading an invalid number of bytes
+            if (numBytes < 0)
+                throw new ArgumentOutOfRangeException();
+
+            // Allocate the byte buffer
+            byte[] byteBuffer = new byte[numBytes];
+            int byteBufferPtr = 0;
+
+            // If the bit buffer has the right number remaining
+            if (_bitsLeft >= numBytes * 8)
+            {
+                byte fullBitBuffer = 0;
+                for (int i = 0; i < numBytes * 8; i++)
+                {
+                    fullBitBuffer |= (byte)(_bitBuffer[i + _bitsLeft--] ? 1 : 0);
+                    if (i % 8 == 7)
+                    {
+                        byteBuffer[byteBufferPtr++] = fullBitBuffer;
+                        fullBitBuffer = 0;
+                    }
+                    else
+                    {
+                        fullBitBuffer <<= 1;
+                    }
+                }
+
+                byteBuffer[byteBufferPtr++] = fullBitBuffer;
+                return byteBuffer;
+            }
+
+            // Otherwise, we need to read what we can
+            int bitsRemaining = _bitsLeft;
+
+            byte bitBuffer = 0;
+            for (int i = 0; i < numBytes * 8; i++)
+            {
+                bitBuffer |= (byte)(_bitBuffer[i + _bitsLeft--] ? 1 : 0);
+                if (i % 8 == 7)
+                {
+                    byteBuffer[byteBufferPtr++] = bitBuffer;
+                    bitBuffer = 0;
+                }
+                else
+                {
+                    bitBuffer <<= 1;
+                }
+            }
+
+            // Fill the bit buffer, if possible
+            FillBitBuffer();
+
+            // If we couldn't read anything, throw an exception
+            if (_buffer == null)
+                throw new IndexOutOfRangeException();
+
+            // Otherwise, read in the remaining bits needed
+            for (int i = 0; i < bitsRemaining; i++)
+            {
+                bitBuffer |= (byte)(_bitBuffer[i + _bitsLeft--] ? 1 : 0);
+                if (i % 8 == 7)
+                {
+                    byteBuffer[byteBufferPtr++] = bitBuffer;
+                    bitBuffer = 0;
+                }
+                else
+                {
+                    bitBuffer <<= 1;
+                }
+            }
+
+            byteBuffer[byteBufferPtr++] = bitBuffer;
+            return byteBuffer;
+        }
+
+        /// <summary>
+        /// Discard bits in the array up to the next byte boundary
+        /// </summary>
+        public void DiscardToByteBoundary()
+        {
+            int bitsToDiscard = _bitsLeft & 7;
+            _bitsLeft -= bitsToDiscard;
+        }
+
+        /// <summary>
+        /// Fill the internal bit buffer from the internal buffer
+        /// </summary>
+        /// <remarks>Fills up to 4 bytes worth of data at a time</remarks>
+        private void FillBitBuffer()
+        {
+            // If we have 4 bytes left, just create the bit buffer directly
+            if (_bufferPointer < _buffer.Length - 4)
+            {
+                // Read all 4 bytes directly
+                byte[] readAllBytes = new ReadOnlySpan<byte>(_buffer, _bufferPointer, 4).ToArray();
+                _bufferPointer += 4;
+
+                // Create the new bit buffer
+                _bitBuffer = new BitArray(readAllBytes);
+                _bitsLeft = 32;
+                return;
+            }
+
+            // If we have less than 4 bytes left, we need to get creative
+            // Create the byte array to hold the data
+            byte[] bytes = new byte[4];
+
+            // Read what we can first
+            int bytesRemaining = _buffer.Length - _bufferPointer;
+            if (bytesRemaining > 0)
+            {
+                byte[] readBytesRemaining = new ReadOnlySpan<byte>(_buffer, _bufferPointer, bytesRemaining).ToArray();
+                Array.Copy(readBytesRemaining, 0, bytes, 0, bytesRemaining);
+                _bufferPointer += bytesRemaining;
+            }
+
+            // Fill the buffer, if we can
+            FillBuffer();
+
+            // If we couldn't read anything, reset the buffer
+            if (_buffer == null && bytesRemaining == 4)
+            {
+                _bitBuffer = null;
+                _bitsLeft = 0;
+                return;
+            }
+
+            // If we don't have anything left, just create a bit array
+            if (_buffer == null)
+            {
+                byte[] readBytesRemaining = new ReadOnlySpan<byte>(bytes, 0, bytesRemaining).ToArray();
+                _bitBuffer = new BitArray(readBytesRemaining);
+                _bitsLeft = 8 * bytesRemaining;
+                return;
+            }
+
+            // Otherwise, we want to read in the remaining necessary bytes
+            int bytesToRead = 4 - bytesRemaining;
+            byte[] bytesRead = new ReadOnlySpan<byte>(_buffer, _bufferPointer, bytesToRead).ToArray();
+            _bufferPointer += bytesToRead;
+
+            Array.Copy(bytesRead, 0, bytes, bytesRemaining, bytesToRead);
+            _bitBuffer = new BitArray(bytes);
+            _bitsLeft = 32;
+        }
+
+        /// <summary>
+        /// Fill the internal buffer from the original data source
+        /// </summary>
+        /// <remarks>Reads up to 4096 bytes at a time</remarks>
+        private void FillBuffer()
+        {
+            // Get the amount of bytes to read
+            int bytesRemaining = (int)(_dataStream.Length - _dataStream.Position);
+            int bytesToRead = Math.Min(bytesRemaining, 4096);
+
+            // If we can't ready any bytes, reset the buffer
+            if (bytesToRead == 0)
+            {
+                _buffer = null;
+                _bufferPointer = -1;
+                return;
+            }
+
+            // Otherwise, read and reset the position
+            _buffer = _dataStream.ReadBytes(bytesToRead);
+            _bufferPointer = 0;
+        }
+    }
+
    public class MSZIPDeflate
    {
-        /*
-        3.2.5. Compressed blocks (length and distance codes)
+        #region Properties

-         As noted above, encoded data blocks in the "deflate" format
-         consist of sequences of symbols drawn from three conceptually
-         distinct alphabets: either literal bytes, from the alphabet of
-         byte values(0..255), or<length, backward distance> pairs,
-         where the length is drawn from(3..258) and the distance is
-         drawn from(1..32,768).  In fact, the literal and length
-         alphabets are merged into a single alphabet(0..285), where
-         values 0..255 represent literal bytes, the value 256 indicates
-         end-of-block, and values 257..285 represent length codes
-         (possibly in conjunction with extra bits following the symbol
-         code) as follows:
+        /// <summary>
+        /// Match lengths for literal codes 257..285
+        /// </summary>
+        /// <remarks>Each value here is the lower bound for lengths represented</remarks>
+        public Dictionary<int, int> LiteralLengths
+        {
+            get
+            {
+                // If we have cached length mappings, use those
+                if (_literalLengths != null)
+                    return _literalLengths;

-                 Extra Extra               Extra
-            Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
-            ---- ---- ------     ---- ---- -------   ---- ---- -------
-             257   0     3       267   1   15,16     277   4   67-82
-             258   0     4       268   1   17,18     278   4   83-98
-             259   0     5       269   2   19-22     279   4   99-114
-             260   0     6       270   2   23-26     280   4  115-130
-             261   0     7       271   2   27-30     281   5  131-162
-             262   0     8       272   2   31-34     282   5  163-194
-             263   0     9       273   3   35-42     283   5  195-226
-             264   0    10       274   3   43-50     284   5  227-257
-             265   1  11,12      275   3   51-58     285   0    258
-             266   1  13,14      276   3   59-66
+                // Otherwise, build it from scratch
+                _literalLengths = new Dictionary<int, int>
+                {
+                    [257] = 3,
+                    [258] = 4,
+                    [259] = 5,
+                    [260] = 6,
+                    [261] = 7,
+                    [262] = 8,
+                    [263] = 9,
+                    [264] = 10,
+                    [265] = 11, // 11,12
+                    [266] = 13, // 13,14
+                    [267] = 15, // 15,16
+                    [268] = 17, // 17,18
+                    [269] = 19, // 19-22
+                    [270] = 23, // 23-26
+                    [271] = 27, // 27-30
+                    [272] = 31, // 31-34
+                    [273] = 35, // 35-42
+                    [274] = 43, // 43-50
+                    [275] = 51, // 51-58
+                    [276] = 59, // 59-66
+                    [277] = 67, // 67-82
+                    [278] = 83, // 83-98
+                    [279] = 99, // 99-114
+                    [280] = 115, // 115-130
+                    [281] = 131, // 131-162
+                    [282] = 163, // 163-194
+                    [283] = 195, // 195-226
+                    [284] = 227, // 227-257
+                    [285] = 258,
+                };

-         The extra bits should be interpreted as a machine integer
-         stored with the most-significant bit first, e.g., bits 1110
-         represent the value 14.
+                return _literalLengths;
+            }
+        }

-                  Extra Extra               Extra
-             Code Bits Dist  Code Bits   Dist Code Bits Distance
-             ---- ---- ----  ---- ----  ------    ---- ---- --------
-               0   0    1     10   4     33-48    20    9   1025-1536
-               1   0    2     11   4     49-64    21    9   1537-2048
-               2   0    3     12   5     65-96    22   10   2049-3072
-               3   0    4     13   5     97-128   23   10   3073-4096
-               4   1   5,6    14   6    129-192   24   11   4097-6144
-               5   1   7,8    15   6    193-256   25   11   6145-8192
-               6   2   9-12   16   7    257-384   26   12  8193-12288
-               7   2  13-16   17   7    385-512   27   12 12289-16384
-               8   3  17-24   18   8    513-768   28   13 16385-24576
-               9   3  25-32   19   8   769-1024   29   13 24577-32768
-        */
+        /// <summary>
+        /// Extra bits for literal codes 257..285
+        /// </summary>
+        public Dictionary<int, int> LiteralExtraBits
+        {
+            get
+            {
+                // If we have cached bit mappings, use those
+                if (_literalExtraBits != null)
+                    return _literalExtraBits;
+
+                // Otherwise, build it from scratch
+                _literalExtraBits = new Dictionary<int, int>();
+
+                // Literal Value 257 - 264, 0 bits
+                for (int i = 257; i < 265; i++)
+                    _literalExtraBits[i] = 0;
+
+                // Literal Value 265 - 268, 1 bit
+                for (int i = 265; i < 269; i++)
+                    _literalExtraBits[i] = 1;
+
+                // Literal Value 269 - 272, 2 bits
+                for (int i = 269; i < 273; i++)
+                    _literalExtraBits[i] = 2;
+
+                // Literal Value 273 - 276, 3 bits
+                for (int i = 273; i < 277; i++)
+                    _literalExtraBits[i] = 3;
+
+                // Literal Value 277 - 280, 4 bits
+                for (int i = 277; i < 281; i++)
+                    _literalExtraBits[i] = 4;
+
+                // Literal Value 281 - 284, 5 bits
+                for (int i = 281; i < 285; i++)
+                    _literalExtraBits[i] = 5;
+
+                // Literal Value 285, 0 bits
+                _literalExtraBits[285] = 0;
+
+                return _literalExtraBits;
+            }
+        }
+
+        /// <summary>
+        /// Match offsets for distance codes 0..29
+        /// </summary>
+        /// <remarks>Each value here is the lower bound for lengths represented</remarks>
+        public static readonly int[] DistanceOffsets = new int[30]
+        {
+            1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
+            33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
+            1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577,
+        };
+
+        /// <summary>
+        /// Extra bits for distance codes 0..29
+        /// </summary>
+        public static readonly int[] DistanceExtraBits = new int[30]
+        {
+            0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
+            4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+            9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+        };
+
+        /// <summary>
+        /// The order of the bit length Huffman code lengths
+        /// </summary>
+        public static readonly int[] BitLengthOrder = new int[19]
+        {
+            16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
+        };
+
+        #endregion
+
+        #region Instance Variables
+
+        /// <summary>
+        /// Match lengths for literal codes 257..285
+        /// </summary>
+        private Dictionary<int, int> _literalLengths = null;
+
+        /// <summary>
+        /// Extra bits for literal codes 257..285
+        /// </summary>
+        private Dictionary<int, int> _literalExtraBits = null;
+
+        #endregion
+
+        /// <summary>
+        /// The decoding algorithm for the actual data
+        /// </summary>
+        public static void Decode(MSZIPDeflateStream data)
+        {
+            // Create the output byte array
+            List<byte> decodedBytes = new List<byte>();
+
+            // Create the loop variable block
+            MSZIPDeflateBlock block;
+
+            do
+            {
+                ulong header = data.ReadBitsLSB(3);
+                block = new MSZIPDeflateBlock(header);
+
+                // We should never get a reserved block
+                if (block.BTYPE == MSZIPDeflateCompressionType.Reserved)
+                    throw new Exception();
+
+                // If stored with no compression
+                if (block.BTYPE == MSZIPDeflateCompressionType.NoCompression)
+                {
+                    // Skip any remaining bits in current partially processed byte
+                    data.DiscardToByteBoundary();
+
+                    // Read LEN and NLEN
+                    byte[] nonCompressedHeader = data.ReadBytesLSB(4);
+                    block.BlockData = new MSZIPNonCompressedBlock(nonCompressedHeader);
+
+                    // Copy LEN bytes of data to output
+                    ushort length = ((MSZIPNonCompressedBlock)block.BlockData).LEN;
+                    ((MSZIPNonCompressedBlock)block.BlockData).Data = data.ReadBytesLSB(length);
+                    decodedBytes.AddRange(((MSZIPNonCompressedBlock)block.BlockData).Data);
+                }
+
+                // Otherwise
+                else
+                {
+                    block.BlockData = block.BTYPE == MSZIPDeflateCompressionType.DynamicHuffman
+                        ? (IMSZIPBlockData)new MSZIPDynamicHuffmanCompressedBlock()
+                        : (IMSZIPBlockData)new MSZIPFixedHuffmanCompressedBlock();
+
+                    // If compressed with dynamic Huffman codes
+                    if (block.BTYPE == MSZIPDeflateCompressionType.DynamicHuffman)
+                    {
+                        // read representation of code trees (see subsection below)
+                    }
+
+                    // Loop until end of block code recognized
+                    while (true)
+                    {
+                        /*
+                        decode literal/length value from input stream
+                        if value < 256
+                            copy value (literal byte) to output stream
+                        otherwise
+                            if value = end of block (256)
+                                break from loop
+                            otherwise (value = 257..285)
+                                decode distance from input stream
+
+                            move backwards distance bytes in the output
+                            stream, and copy length bytes from this
+                            position to the output stream.
+                        */
+                    }
+                }
+            } while (!block.BFINAL);
+
+            /*
+             Note that a duplicated string reference may refer to a string
+             in a previous block; i.e., the backward distance may cross one
+             or more block boundaries.  However a distance cannot refer past
+             the beginning of the output stream.  (An application using a
+             preset dictionary might discard part of the output stream; a
+             distance can refer to that part of the output stream anyway)
+             Note also that the referenced string may overlap the current
+             position; for example, if the last 2 bytes decoded have values
+             X and Y, a string reference with <length = 5, distance = 2>
+             adds X,Y,X,Y,X to the output stream.
+            */
+        }
    }

    public class MSZIPDeflateBlock
    {
-        /*
-        3.2.3. Details of block format
+        #region Properties

-         Each block of compressed data begins with 3 header bits
-         containing the following data:
+        /// <summary>
+        /// Set if and only if this is the last block of the data set.
+        /// </summary>
+        /// <remarks>Bit 0</remarks>
+        public bool BFINAL { get; set; }

-            first bit       BFINAL
-            next 2 bits     BTYPE
+        /// <summary>
+        /// Specifies how the data are compressed
+        /// </summary>
+        /// <remarks>Bits 1-2</remarks>
+        public MSZIPDeflateCompressionType BTYPE { get; set; }

-         Note that the header bits do not necessarily begin on a byte
-         boundary, since a block does not necessarily occupy an integral
-         number of bytes.
+        /// <summary>
+        /// Block data as defined by the compression type
+        /// </summary>
+        public IMSZIPBlockData BlockData { get; set; }

-         BFINAL is set if and only if this is the last block of the data
-         set.
+        #endregion

-         BTYPE specifies how the data are compressed, as follows:
-
-            00 - no compression
-            01 - compressed with fixed Huffman codes
-            10 - compressed with dynamic Huffman codes
-            11 - reserved (error)
-
-         The only difference between the two compressed cases is how the
-         Huffman codes for the literal/length and distance alphabets are
-         defined.
-
-         In all cases, the decoding algorithm for the actual data is as
-         follows:
-
-            do
-               read block header from input stream.
-               if stored with no compression
-                  skip any remaining bits in current partially
-                     processed byte
-                  read LEN and NLEN (see next section)
-                  copy LEN bytes of data to output
-               otherwise
-                  if compressed with dynamic Huffman codes
-                     read representation of code trees (see
-                        subsection below)
-                  loop (until end of block code recognized)
-                     decode literal/length value from input stream
-                     if value < 256
-                        copy value (literal byte) to output stream
-                     otherwise
-                        if value = end of block (256)
-                           break from loop
-                        otherwise (value = 257..285)
-                           decode distance from input stream
-
-                           move backwards distance bytes in the output
-                           stream, and copy length bytes from this
-                           position to the output stream.
-                  end loop
-            while not last block
-
-         Note that a duplicated string reference may refer to a string
-         in a previous block; i.e., the backward distance may cross one
-         or more block boundaries.  However a distance cannot refer past
-         the beginning of the output stream.  (An application using a
-         preset dictionary might discard part of the output stream; a
-         distance can refer to that part of the output stream anyway)
-         Note also that the referenced string may overlap the current
-         position; for example, if the last 2 bytes decoded have values
-         X and Y, a string reference with <length = 5, distance = 2>
-         adds X,Y,X,Y,X to the output stream.
-
-         We now specify each compression method in turn.
-        */
+        /// <summary>
+        /// Constructor
+        /// </summary>
+        public MSZIPDeflateBlock(ulong header)
+        {
+            BFINAL = (header & 0b100) != 0;
+            BTYPE = (MSZIPDeflateCompressionType)(header & 0b011);
+        }
    }

-    public class MSZIPNonCompressedBlock
+    /// <summary>
+    /// Empty interface defining block types
+    /// </summary>
+    public interface IMSZIPBlockData { }
+
+    /// <summary>
+    /// Non-compressed blocks (BTYPE=00)
+    /// </summary>
+    public class MSZIPNonCompressedBlock : IMSZIPBlockData
    {
-        /*
-        3.2.4. Non-compressed blocks (BTYPE=00)
+        #region Properties

-         Any bits of input up to the next byte boundary are ignored.
-         The rest of the block consists of the following information:
+        /// <summary>
+        /// The number of data bytes in the block
+        /// </summary>
+        /// <remarks>Bytes 0-1</remarks>
+        public ushort LEN { get; set; }

-              0   1   2   3   4...
-            +---+---+---+---+================================+
-            |  LEN  | NLEN  |... LEN bytes of literal data...|
-            +---+---+---+---+================================+
+        /// <summary>
+        /// The one's complement of LEN
+        /// </summary>
+        /// <remarks>Bytes 2-3</remarks>
+        public ushort NLEN { get; set; }

-         LEN is the number of data bytes in the block.  NLEN is the
-         one's complement of LEN.
-        */
+        /// <summary>
+        /// <see cref="LEN"/> bytes of literal data
+        /// </summary>
+        public byte[] Data { get; set; }
+
+        #endregion
+
+        /// <summary>
+        /// Constructor
+        /// </summary>
+        public MSZIPNonCompressedBlock(byte[] header)
+        {
+            // If we have invalid header data
+            if (header == null || header.Length < 4)
+                throw new ArgumentException();
+
+            int offset = 0;
+            LEN = header.ReadUInt16(ref offset);
+            NLEN = header.ReadUInt16(ref offset);
+
+            // TODO: Confirm NLEN is 1's compliment of LEN
+        }
    }

-    public class MSZIPFixedHuffmanCompressedBlock
+    /// <summary>
+    /// Base class for compressed blocks
+    /// </summary>
+    public abstract class MSZIPCompressedBlock : IMSZIPBlockData
    {
-        /*
-        3.2.6. Compression with fixed Huffman codes (BTYPE = 01)
+        /// <summary>
+        /// Huffman code lengths for the literal / length alphabet
+        /// </summary>
+        public abstract int[] LiteralLengths { get; }

-         The Huffman codes for the two alphabets are fixed, and are not
-         represented explicitly in the data.The Huffman code lengths
-         for the literal / length alphabet are:
-
-                   Lit Value    Bits        Codes
-                   -------- - ---------
-                     0 - 143     8          00110000 through
-                                            10111111
-                   144 - 255     9          110010000 through
-                                            111111111
-                   256 - 279     7          0000000 through
-                                            0010111
-                   280 - 287     8          11000000 through
-                                            11000111
-        */
+        /// <summary>
+        /// Huffman distance codes for the literal / length alphabet
+        /// </summary>
+        public abstract int[] DistanceCodes { get; }
    }

-    public class MSZIPDynamicHuffmanCompressedBlock
+    /// <summary>
+    /// Compression with fixed Huffman codes (BTYPE=01)
+    /// </summary>
+    public class MSZIPFixedHuffmanCompressedBlock : MSZIPCompressedBlock
    {
+        #region Properties
+
+        /// <summary>
+        /// Huffman code lengths for the literal / length alphabet
+        /// </summary>
+        public override int[] LiteralLengths
+        {
+            get
+            {
+                // If we have cached lengths, use those
+                if (_literalLengths != null)
+                    return _literalLengths;
+
+                // Otherwise, build it from scratch
+                _literalLengths = new int[288];
+
+                // Literal Value 0 - 143, 8 bits
+                for (int i = 0; i < 144; i++)
+                    _literalLengths[i] = 8;
+
+                // Literal Value 144 - 255, 9 bits
+                for (int i = 144; i < 256; i++)
+                    _literalLengths[i] = 9;
+
+                // Literal Value 256 - 279, 7 bits
+                for (int i = 256; i < 280; i++)
+                    _literalLengths[i] = 7;
+
+                // Literal Value 280 - 287, 8 bits
+                for (int i = 280; i < 288; i++)
+                    _literalLengths[i] = 8;
+
+                return _literalLengths;
+            }
+        }
+
+        /// <summary>
+        /// Huffman distance codes for the literal / length alphabet
+        /// </summary>
+        public override int[] DistanceCodes
+        {
+            get
+            {
+                // If we have cached distances, use those
+                if (_distanceCodes != null)
+                    return _distanceCodes;
+
+                // Otherwise, build it from scratch
+                _distanceCodes = new int[32];
+
+                // Fixed length, 5 bits
+                for (int i = 0; i < 32; i++)
+                    _distanceCodes[i] = 5;
+
+                return _distanceCodes;
+            }
+        }
+
+        #endregion
+
+        #region Instance Variables
+
+        /// <summary>
+        /// Huffman code lengths for the literal / length alphabet
+        /// </summary>
+        private int[] _literalLengths = null;
+
+        /// <summary>
+        /// Huffman distance codes for the literal / length alphabet
+        /// </summary>
+        private int[] _distanceCodes = null;
+
+        #endregion
+    }
+
+    /// <summary>
+    /// Compression with dynamic Huffman codes (BTYPE=10)
+    /// </summary>
+    public class MSZIPDynamicHuffmanCompressedBlock : MSZIPCompressedBlock
+    {
+        /// <summary>
+        /// The Huffman codes for the literal/length code
+        /// </summary>
+        public override int[] LiteralLengths => new int[19];
+
+        /// <summary>
+        /// The Huffman codes for the distance code
+        /// </summary>
+        public override int[] DistanceCodes => new int[19];
+
        /*
        3.2.7. Compression with dynamic Huffman codes (BTYPE=10)