using System; using System.Collections.Generic; using System.IO; using System.Linq; using SabreTools.Models.Compression.MSZIP; using static SabreTools.Models.Compression.MSZIP.Constants; namespace SabreTools.Compression.MSZIP { /// public class DeflateDecompressor { /// /// Internal bitstream to use for decompression /// private BitStream _bitStream; /// /// Create a new Decompressor from a byte array /// /// Byte array to decompress public DeflateDecompressor(byte[]? input) { // If we have an invalid stream if (input == null || input.Length == 0) throw new ArgumentException(nameof(input)); // Create a memory stream to wrap var ms = new MemoryStream(input); // Wrap the stream in a BitStream _bitStream = new BitStream(ms); } /// /// Create a new Decompressor from a Stream /// /// Stream to decompress public DeflateDecompressor(Stream? input) { // If we have an invalid stream if (input == null || !input.CanRead || !input.CanSeek) throw new ArgumentException(nameof(input)); // Wrap the stream in a BitStream _bitStream = new BitStream(input); } /// /// Decompress a stream into a /// /// Block containing the decompressed data on success, null on error public Block? Process() { // Create a new block var block = new Block(); // Try to read the header block.BlockHeader = ReadBlockHeader(); if (block.BlockHeader.Signature != 0x4B43) return null; // Loop and read the deflate blocks var deflateBlocks = new List(); while (true) { // Try to read the deflate block var deflateBlock = ReadDeflateBlock(); if (deflateBlock == null) return null; // Add the deflate block to the set deflateBlocks.Add(deflateBlock); // If we're at the final block, exit out of the loop if (deflateBlock.Header!.BFINAL) break; } // Assign the deflate blocks to the block and return block.CompressedBlocks = deflateBlocks.ToArray(); return block; } #region Headers /// /// Read a BlockHeader from the input stream /// private BlockHeader ReadBlockHeader() { var header = new BlockHeader(); header.Signature = _bitStream.ReadUInt16() ?? 0; return header; } /// /// Read a DeflateBlockHeader from the input stream /// private DeflateBlockHeader ReadDeflateBlockHeader() { var header = new DeflateBlockHeader(); header.BFINAL = _bitStream.ReadBit() != 0x01; uint? btype = _bitStream.ReadBitsLSB(2) ?? 0b11; header.BTYPE = (CompressionType)btype; return header; } /// /// Read a NonCompressedBlockHeader from the input stream /// private NonCompressedBlockHeader ReadNonCompressedBlockHeader() { var header = new NonCompressedBlockHeader(); header.LEN = _bitStream.ReadUInt16() ?? 0; header.NLEN = _bitStream.ReadUInt16() ?? 0; return header; } /// /// Read a FixedHuffmanCompressedBlockHeader from the input stream /// private (FixedCompressedDataHeader, uint, uint) RaadFixedCompressedDataHeader() { // Nothing needs to be read, all values are fixed return (new FixedCompressedDataHeader(), 288, 30); } /// /// Read a DynamicHuffmanCompressedBlockHeader from the input stream /// private (DynamicCompressedDataHeader, uint, uint) ReadDynamicCompressedDataHeader() { var header = new DynamicCompressedDataHeader(); // Setup the counts first uint numLiteral = 257 + _bitStream.ReadBitsLSB(5) ?? 0; uint numDistance = 1 + _bitStream.ReadBitsLSB(5) ?? 0; uint numLength = 4 + _bitStream.ReadBitsLSB(4) ?? 0; // Convert the alphabet based on lengths uint[] lengthLengths = new uint[19]; for (int i = 0; i < numLength; i++) { lengthLengths[BitLengthOrder[i]] = (byte)(_bitStream.ReadBitsLSB(3) ?? 0); } for (int i = (int)numLength; i < 19; i++) { lengthLengths[BitLengthOrder[i]] = 0; } // Make the lengths tree HuffmanDecoder lengthTree = new HuffmanDecoder(lengthLengths, 19); // Setup the literal and distance lengths header.LiteralLengths = new uint[288]; header.DistanceCodes = new uint[32]; // Read the literal and distance codes int repeatCode = 1; uint leftover = ReadHuffmanLengths(lengthTree, header.LiteralLengths, numLiteral, 0, ref repeatCode); _ = ReadHuffmanLengths(lengthTree, header.DistanceCodes, numDistance, leftover, ref repeatCode); return (header, numLiteral, numDistance); } #endregion #region Data /// /// Read an RFC1951 block /// private DeflateBlock? ReadDeflateBlock() { var deflateBlock = new DeflateBlock(); // Try to read the deflate block header deflateBlock.Header = ReadDeflateBlockHeader(); switch (deflateBlock.Header.BTYPE) { // If stored with no compression case CompressionType.NoCompression: (var header00, var bytes00) = ReadNoCompression(); if (header00 == null || bytes00 == null) return null; deflateBlock.DataHeader = header00; deflateBlock.Data = bytes00; break; // If compressed with fixed Huffman codes case CompressionType.FixedHuffman: (var header01, var bytes01) = ReadFixedHuffman(); if (header01 == null || bytes01 == null) return null; deflateBlock.DataHeader = header01; deflateBlock.Data = bytes01; break; // If compressed with dynamic Huffman codes case CompressionType.DynamicHuffman: (var header10, var bytes10) = ReadDynamicHuffman(); if (header10 == null || bytes10 == null) return null; deflateBlock.DataHeader = header10; deflateBlock.Data = bytes10; break; // Reserved is not allowed and is treated as an error case CompressionType.Reserved: default: return null; } return deflateBlock; } /// /// Read an RFC1951 block with no compression /// private (NonCompressedBlockHeader?, byte[]?) ReadNoCompression() { // Skip any remaining bits in current partially processed byte _bitStream.Discard(); // Read LEN and NLEN var header = ReadNonCompressedBlockHeader(); if (header.LEN == 0 && header.NLEN == 0) return (null, null); // Copy LEN bytes of data to output return (header, _bitStream.ReadBytes(header.LEN)); } /// /// Read an RFC1951 block with fixed Huffman compression /// private (FixedCompressedDataHeader, byte[]?) ReadFixedHuffman() { var bytes = new List(); // Get the fixed huffman header (var header, uint numLiteral, uint numDistance) = RaadFixedCompressedDataHeader(); // Make the literal and distance trees HuffmanDecoder literalTree = new HuffmanDecoder(header.LiteralLengths, numLiteral); HuffmanDecoder distanceTree = new HuffmanDecoder(header.DistanceCodes, numDistance); // Now loop and decode return (header, ReadHuffmanBlock(literalTree, distanceTree)); } /// /// Read an RFC1951 block with dynamic Huffman compression /// private (DynamicCompressedDataHeader?, byte[]?) ReadDynamicHuffman() { // Get the dynamic huffman header (var header, uint numLiteral, uint numDistance) = ReadDynamicCompressedDataHeader(); // Make the literal and distance trees HuffmanDecoder literalTree = new HuffmanDecoder(header.LiteralLengths, numLiteral); HuffmanDecoder distanceTree = new HuffmanDecoder(header.DistanceCodes, numDistance); // Now loop and decode return (header, ReadHuffmanBlock(literalTree, distanceTree)); } /// /// Read an RFC1951 block with Huffman compression /// private byte[]? ReadHuffmanBlock(HuffmanDecoder literalTree, HuffmanDecoder distanceTree) { // Now loop and decode var bytes = new List(); while (true) { // Decode the next literal value int sym = literalTree.Decode(_bitStream); // If we have an immediate symbol if (sym < 256) { bytes.Add((byte)sym); } // If we have the ending symbol else if (sym == 256) { break; } // If we have a length/distance pair else { sym -= 257; uint? length = CopyLengths[sym] + _bitStream.ReadBitsLSB(LiteralExtraBits[sym]); if (length == null) return null; int distanceCode = distanceTree.Decode(_bitStream); uint? distance = CopyOffsets[distanceCode] + _bitStream.ReadBitsLSB(DistanceExtraBits[distanceCode]); if (distance == null) return null; byte[] arr = bytes.Skip(bytes.Count - (int)distance).Take((int)length).ToArray(); bytes.AddRange(arr); } } // Return the decoded array return bytes.ToArray(); } /// /// Read the huffman lengths /// private uint ReadHuffmanLengths(HuffmanDecoder lengthTree, uint[] lengths, uint numCodes, uint repeat, ref int repeatCode) { int i = 0; // First fill in any repeat codes while (repeat > 0) { lengths[i++] = (byte)repeatCode; repeat--; } // Then process the rest of the table while (i < numCodes) { // Get the next length encoding from the stream int lengthEncoding = lengthTree.Decode(_bitStream); // Values less than 16 are encoded directly if (lengthEncoding < 16) { lengths[i++] = (byte)lengthEncoding; repeatCode = lengthEncoding; } // Otherwise, the repeat count is based on the next values else { // Determine the repeat count and code from the encoding if (lengthEncoding == 16) { repeat = 3 + _bitStream.ReadBitsLSB(2) ?? 0; } else if (lengthEncoding == 17) { repeat = 3 + _bitStream.ReadBitsLSB(3) ?? 0; repeatCode = 0; } else if (lengthEncoding == 18) { repeat = 11 + _bitStream.ReadBitsLSB(7) ?? 0; repeatCode = 0; } // Read in the expected lengths while (i < numCodes && repeat > 0) { lengths[i++] = (byte)repeatCode; repeat--; } } } // Return any repeat value we have left over return repeat; } #endregion } }