using System; using System.Collections.Generic; using BurnOutSharp.Models.Compression.MSZIP; using BurnOutSharp.Utilities; using ICSharpCode.SharpZipLib.Zip.Compression; namespace BurnOutSharp.Compression { public class MSZIP { // TODO: Implement MSZIP decompression // The below is a first attempt at implementation that is not working. It likely needs to be replaced by // a zlib wrapper and/or a proper implementation #region Constants /// /// Maximum Huffman code bit count /// private const int MAX_BITS = 16; #endregion #region Properties /// /// Match lengths for literal codes 257..285 /// /// Each value here is the lower bound for lengths represented private static Dictionary LiteralLengths { get { // If we have cached length mappings, use those if (_literalLengths != null) return _literalLengths; // Otherwise, build it from scratch _literalLengths = new Dictionary { [257] = 3, [258] = 4, [259] = 5, [260] = 6, [261] = 7, [262] = 8, [263] = 9, [264] = 10, [265] = 11, // 11,12 [266] = 13, // 13,14 [267] = 15, // 15,16 [268] = 17, // 17,18 [269] = 19, // 19-22 [270] = 23, // 23-26 [271] = 27, // 27-30 [272] = 31, // 31-34 [273] = 35, // 35-42 [274] = 43, // 43-50 [275] = 51, // 51-58 [276] = 59, // 59-66 [277] = 67, // 67-82 [278] = 83, // 83-98 [279] = 99, // 99-114 [280] = 115, // 115-130 [281] = 131, // 131-162 [282] = 163, // 163-194 [283] = 195, // 195-226 [284] = 227, // 227-257 [285] = 258, }; return _literalLengths; } } /// /// Extra bits for literal codes 257..285 /// private static Dictionary LiteralExtraBits { get { // If we have cached bit mappings, use those if (_literalExtraBits != null) return _literalExtraBits; // Otherwise, build it from scratch _literalExtraBits = new Dictionary(); // Literal Value 257 - 264, 0 bits for (int i = 257; i < 265; i++) _literalExtraBits[i] = 0; // Literal Value 265 - 268, 1 bit for (int i = 265; i < 269; i++) _literalExtraBits[i] = 1; // Literal Value 269 - 272, 2 bits for (int i = 269; i < 273; i++) _literalExtraBits[i] = 2; // Literal Value 273 - 276, 3 bits for (int i = 273; i < 277; i++) _literalExtraBits[i] = 3; // Literal Value 277 - 280, 4 bits for (int i = 277; i < 281; i++) _literalExtraBits[i] = 4; // Literal Value 281 - 284, 5 bits for (int i = 281; i < 285; i++) _literalExtraBits[i] = 5; // Literal Value 285, 0 bits _literalExtraBits[285] = 0; return _literalExtraBits; } } /// /// Match offsets for distance codes 0..29 /// /// Each value here is the lower bound for lengths represented public static readonly int[] DistanceOffsets = new int[30] { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, }; /// /// Extra bits for distance codes 0..29 /// private static readonly int[] DistanceExtraBits = new int[30] { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, }; /// /// The order of the bit length Huffman code lengths /// private static readonly int[] BitLengthOrder = new int[19] { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, }; #endregion #region Instance Variables /// /// Match lengths for literal codes 257..285 /// private static Dictionary _literalLengths = null; /// /// Extra bits for literal codes 257..285 /// private static Dictionary _literalExtraBits = null; #endregion #region Parsing /// /// Read the block header from the block data, if possible /// /// BitStream representing the block /// Offset within the array to parse /// Filled block header on success, null on error private static Models.Compression.MSZIP.BlockHeader AsBlockHeader(BitStream data) { // If the data is invalid if (data == null) return null; var header = new Models.Compression.MSZIP.BlockHeader(); header.Signature = data.ReadAlignedUInt16(); if (header.Signature != 0x4B43) return null; return header; } /// /// Read the deflate block header from the block data, if possible /// /// Byte array representing the block /// Offset within the array to parse /// Filled deflate block header on success, null on error private static Models.Compression.MSZIP.DeflateBlockHeader AsDeflateBlockHeader(BitStream data) { // If the data is invalid if (data == null) return null; var header = new Models.Compression.MSZIP.DeflateBlockHeader(); header.BFINAL = data.ReadBits(1)[0]; header.BTYPE = (Models.Compression.MSZIP.CompressionType)data.ReadBits(2).AsByte(); return header; } /// /// Read the block header from the block data, if possible /// /// Byte array representing the block /// Offset within the array to parse /// Filled dynamic Huffman compressed block header on success, null on error private static Models.Compression.MSZIP.DynamicHuffmanCompressedBlockHeader AsDynamicHuffmanCompressedBlockHeader(BitStream data) { // If the data is invalid if (data == null) return null; var header = new Models.Compression.MSZIP.DynamicHuffmanCompressedBlockHeader(); // # of Literal/Length codes - 257 ushort HLIT = (ushort)(data.ReadBits(5).AsUInt16() + 257); // # of Distance codes - 1 byte HDIST = (byte)(data.ReadBits(5).AsByte() + 1); // HCLEN, # of Code Length codes - 4 byte HCLEN = (byte)(data.ReadBits(4).AsByte() + 4); // (HCLEN + 4) x 3 bits: code lengths for the code length // alphabet given just above // // These code lengths are interpreted as 3-bit integers // (0-7); as above, a code length of 0 means the // corresponding symbol (literal/ length or distance code // length) is not used. int[] bitLengths = new int[19]; for (byte i = 0; i < HCLEN; i++) bitLengths[BitLengthOrder[i]] = data.ReadBits(3).AsByte(); // Code length Huffman code int[] bitLengthTable = CreateTable(19, 7, bitLengths, 1 << 7); // HLIT + 257 code lengths for the literal/length alphabet, // encoded using the code length Huffman code header.LiteralLengths = BuildHuffmanTree(data, HLIT, bitLengths, bitLengthTable); // HDIST + 1 code lengths for the distance alphabet, // encoded using the code length Huffman code header.DistanceCodes = BuildHuffmanTree(data, HDIST, bitLengths, bitLengthTable); return header; } /// /// Read the block header from the block data, if possible /// /// Byte array representing the block /// Offset within the array to parse /// Filled non-compressed block header on success, null on error private static Models.Compression.MSZIP.NonCompressedBlockHeader AsNonCompressedBlockHeader(BitStream data) { // If the data is invalid if (data == null) return null; var header = new Models.Compression.MSZIP.NonCompressedBlockHeader(); header.LEN = data.ReadAlignedUInt16(); header.NLEN = data.ReadAlignedUInt16(); if (header.LEN != (~header.NLEN & 0xFFFF)) return null; return header; } #endregion #region Helpers /// /// The alphabet for code lengths is as follows /// private static int[] BuildHuffmanTree(BitStream data, ushort codeCount, int[] bitLengths, int[] decodingTable) { // Setup the huffman tree int[] tree = new int[codeCount]; // Setup the loop variables int lastCode = 0, repeatLength = 0; for (int i = 0; i < codeCount; i++) { // TODO: Fix so we only read the number of bits we need int nextCode = data.ReadBits(7).AsUInt16(); int symbol = decodingTable[nextCode]; if (bitLengths[symbol] > 7) _ = data.ReadBits(decodingTable[symbol] - 7); // Represent code lengths of 0 - 15 if (symbol > 0 && symbol <= 15) { lastCode = symbol; tree[i] = symbol; } // Copy the previous code length 3 - 6 times. // The next 2 bits indicate repeat length (0 = 3, ... , 3 = 6) // Example: Codes 8, 16 (+2 bits 11), 16 (+2 bits 10) will expand to 12 code lengths of 8 (1 + 6 + 5) else if (symbol == 16) { repeatLength = data.ReadBits(2).AsByte(); repeatLength += 2; symbol = lastCode; } // Repeat a code length of 0 for 3 - 10 times. // (3 bits of length) else if (symbol == 17) { repeatLength = data.ReadBits(3).AsByte(); repeatLength += 3; symbol = 0; } // Repeat a code length of 0 for 11 - 138 times // (7 bits of length) else if (symbol == 18) { repeatLength = data.ReadBits(7).AsByte(); repeatLength += 11; symbol = 0; } // Everything else else { throw new ArgumentOutOfRangeException(); } // If we had a repeat length for (; repeatLength > 0; repeatLength--) { tree[i++] = symbol; } } return tree; } /// /// This function was originally coded by David Tritscher. /// /// It builds a fast huffman decoding table from a canonical huffman code lengths table. /// /// Total number of symbols in this huffman tree. /// Any symbols with a code length of bitCount or less can be decoded in one lookup of the table. /// A table to get code lengths from [0 to maxSymbols-1] /// The table with decoded symbols and pointers. /// private static int[] CreateTable(int maxSymbols, int bitCount, int[] lengths, int distanceSize) { int[] table = new int[distanceSize]; ushort sym, next_symbol; uint leaf, fill; uint reverse; byte bit_num; uint pos = 0; // The current position in the decode table uint table_mask = (uint)1 << bitCount; uint bit_mask = table_mask >> 1; // Don't do 0 length codes // Fill entries for codes short enough for a direct mapping for (bit_num = 1; bit_num <= bitCount; bit_num++) { for (sym = 0; sym < maxSymbols; sym++) { if (lengths[sym] != bit_num) continue; // Reverse the significant bits fill = (uint)lengths[sym]; reverse = pos >> (int)(bitCount - fill); leaf = 0; do { leaf <<= 1; leaf |= reverse & 1; reverse >>= 1; } while (--fill > 0); if ((pos += bit_mask) > table_mask) return null; // Table overrun // Fill all possible lookups of this symbol with the symbol itself fill = bit_mask; next_symbol = (ushort)(1 << bit_num); do { table[leaf] = sym; leaf += next_symbol; } while (--fill > 0); } bit_mask >>= 1; } // Exit with success if table is now complete if (pos == table_mask) return table; // Mark all remaining table entries as unused for (sym = (ushort)pos; sym < table_mask; sym++) { reverse = sym; leaf = 0; fill = (uint)bitCount; do { leaf <<= 1; leaf |= reverse & 1; reverse >>= 1; } while (--fill > 0); table[leaf] = 0xFFFF; } // next_symbol = base of allocation for long codes next_symbol = ((table_mask >> 1) < maxSymbols) ? (ushort)maxSymbols : (ushort)(table_mask >> 1); // Give ourselves room for codes to grow by up to 16 more bits. // codes now start at bit bitCount+16 and end at (bitCount+16-codelength) pos <<= 16; table_mask <<= 16; bit_mask = 1 << 15; for (bit_num = (byte)(bitCount + 1); bit_num <= MAX_BITS; bit_num++) { for (sym = 0; sym < maxSymbols; sym++) { if (lengths[sym] != bit_num) continue; if (pos >= table_mask) return null; // Table overflow // leaf = the first bitCount of the code, reversed reverse = pos >> 16; leaf = 0; fill = (uint)bitCount; do { leaf <<= 1; leaf |= reverse & 1; reverse >>= 1; } while (--fill > 0); for (fill = 0; fill < (bit_num - bitCount); fill++) { // If this path hasn't been taken yet, 'allocate' two entries if (table[leaf] == 0xFFFF) { table[(next_symbol << 1)] = 0xFFFF; table[(next_symbol << 1) + 1] = 0xFFFF; table[leaf] = (ushort)next_symbol++; } // Follow the path and select either left or right for next bit leaf = (uint)(table[leaf] << 1); if (((pos >> (15 - (int)fill)) & 1) != 0) leaf++; } table[leaf] = sym; pos += bit_mask; } bit_mask >>= 1; } // Full table? return pos == table_mask ? table : null; } #endregion #region Folders /// /// Decompress MSZIP data /// protected byte[] DecompressMSZIPData(byte[] data) { // Inflater inflater = new Inflater(noHeader: true); // inflater.SetInput(data); // byte[] outputData = new byte[data.Length * 4]; // int read = inflater.Inflate(outputData); // return outputData.AsSpan(0, read).ToArray(); // Create the bitstream to read from var dataStream = new BitStream(data); // Get the block header var blockHeader = AsBlockHeader(dataStream); if (blockHeader == null) return null; // Create the output byte array List decodedBytes = new List(); // Create the loop variable block Models.Compression.MSZIP.DeflateBlockHeader deflateBlockHeader; do { deflateBlockHeader = AsDeflateBlockHeader(dataStream); // We should never get a reserved block if (deflateBlockHeader.BTYPE == Models.Compression.MSZIP.CompressionType.Reserved) throw new InvalidOperationException(); // If stored with no compression if (deflateBlockHeader.BTYPE == Models.Compression.MSZIP.CompressionType.NoCompression) { // Skip any remaining bits in current partially processed byte dataStream.DiscardBuffer(); // Read the block header deflateBlockHeader.BlockDataHeader = AsNonCompressedBlockHeader(dataStream); // Copy LEN bytes of data to output var header = deflateBlockHeader.BlockDataHeader as Models.Compression.MSZIP.NonCompressedBlockHeader; ushort length = header.LEN; decodedBytes.AddRange(dataStream.ReadAlignedBytes(length)); } // Otherwise else { // If compressed with dynamic Huffman codes read representation of code trees switch (deflateBlockHeader.BTYPE) { case Models.Compression.MSZIP.CompressionType.FixedHuffman: deflateBlockHeader.BlockDataHeader = new Models.Compression.MSZIP.FixedHuffmanCompressedBlockHeader(); break; case Models.Compression.MSZIP.CompressionType.DynamicHuffman: deflateBlockHeader.BlockDataHeader = AsDynamicHuffmanCompressedBlockHeader(dataStream); break; } var header = deflateBlockHeader.BlockDataHeader as Models.Compression.MSZIP.CompressedBlockHeader; // 9 bits per entry, 288 max symbols int[] literalDecodeTable = CreateTable(288, 9, header.LiteralLengths, (1 << 9) + (288 * 2)); // 6 bits per entry, 32 max symbols int[] distanceDecodeTable = CreateTable(32, 6, header.DistanceCodes, (1 << 6) + (32 * 2)); // Loop until end of block code recognized while (true) { // Decode literal/length value from input stream int symbol = literalDecodeTable[dataStream.ReadBits(7).AsUInt16()]; // Copy value (literal byte) to output stream if (symbol < 256) { decodedBytes.Add((byte)symbol); } // End of block (256) else if (symbol == 256) { break; } else { // Decode distance from input stream ulong length = dataStream.ReadBits(LiteralExtraBits[symbol]).AsUInt64(); length += (ulong)LiteralLengths[symbol]; int code = distanceDecodeTable[length]; ulong distance = dataStream.ReadBits(DistanceExtraBits[code]).AsUInt64(); distance += (ulong)DistanceOffsets[code]; // Move backwards distance bytes in the output // stream, and copy length bytes from this // position to the output stream. } } } } while (!deflateBlockHeader.BFINAL); /* Note that a duplicated string reference may refer to a string in a previous block; i.e., the backward distance may cross one or more block boundaries. However a distance cannot refer past the beginning of the output stream. (An application using a preset dictionary might discard part of the output stream; a distance can refer to that part of the output stream anyway) Note also that the referenced string may overlap the current position; for example, if the last 2 bytes decoded have values X and Y, a string reference with adds X,Y,X,Y,X to the output stream. */ return decodedBytes.ToArray(); } #endregion } }