mirror of
https://github.com/SabreTools/BinaryObjectScanner.git
synced 2026-04-26 08:10:32 +00:00
Start writing Inflate implementation
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
using BurnOutSharp.Tools;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using BurnOutSharp.Tools;
|
||||
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-MCI/%5bMS-MCI%5d.pdf"/>
|
||||
/// <see href="https://www.rfc-editor.org/rfc/rfc1951"/>
|
||||
@@ -81,7 +84,7 @@ namespace BurnOutSharp.FileType
|
||||
/// <summary>
|
||||
/// How the data are compressed
|
||||
/// </summary>
|
||||
public enum DeflateCompressionType : byte
|
||||
public enum MSZIPDeflateCompressionType : byte
|
||||
{
|
||||
/// <summary>
|
||||
/// no compression
|
||||
@@ -104,173 +107,819 @@ namespace BurnOutSharp.FileType
|
||||
Reserved = 0b11,
|
||||
}
|
||||
|
||||
public class MSZIPDeflateStream
|
||||
{
|
||||
#region Instance Variables
|
||||
|
||||
/// <summary>
|
||||
/// Original data source to read from
|
||||
/// </summary>
|
||||
private System.IO.Stream _dataStream = null;
|
||||
|
||||
/// <summary>
|
||||
/// Current rolling buffer
|
||||
/// </summary>
|
||||
private byte[] _buffer = null;
|
||||
|
||||
/// <summary>
|
||||
/// Current position in the buffer
|
||||
/// </summary>
|
||||
private int _bufferPointer = -1;
|
||||
|
||||
/// <summary>
|
||||
/// Bit buffer to read bits from when necessary
|
||||
/// </summary>
|
||||
private BitArray _bitBuffer = null;
|
||||
|
||||
/// <summary>
|
||||
/// Number of bits left in the buffer
|
||||
/// </summary>
|
||||
private int _bitsLeft = 0;
|
||||
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Constructor
|
||||
/// </summary>
|
||||
public MSZIPDeflateStream(System.IO.Stream dataStream)
|
||||
{
|
||||
_dataStream = dataStream;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read between 0 and 64 bits of data from the stream assuming LSB
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentOutOfRangeException"></exception>
|
||||
public ulong ReadBitsLSB(int numBits)
|
||||
{
|
||||
// If we are reading an invalid number of bits
|
||||
if (numBits < 0 || numBits > 64)
|
||||
throw new ArgumentOutOfRangeException();
|
||||
|
||||
// Allocate the bit buffer
|
||||
ulong bitBuffer = 0;
|
||||
|
||||
// If the bit buffer has the right number remaining
|
||||
if (_bitsLeft >= numBits)
|
||||
{
|
||||
for (int i = 0; i < numBits; i++)
|
||||
{
|
||||
bitBuffer |= _bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1u : 0;
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
|
||||
return bitBuffer;
|
||||
}
|
||||
|
||||
// Otherwise, we need to read what we can
|
||||
int bitsRemaining = _bitsLeft;
|
||||
for (int i = 0; i < bitsRemaining; i++)
|
||||
{
|
||||
bitBuffer |= _bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1u : 0;
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
|
||||
// Fill the bit buffer, if possible
|
||||
FillBitBuffer();
|
||||
|
||||
// If we couldn't read anything, throw an exception
|
||||
if (_buffer == null)
|
||||
throw new IndexOutOfRangeException();
|
||||
|
||||
// Otherwise, read in the remaining bits needed
|
||||
for (int i = 0; i < bitsRemaining; i++)
|
||||
{
|
||||
bitBuffer |= _bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1u : 0;
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
|
||||
return bitBuffer;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read between 0 and 64 bits of data from the stream assuming MSB
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentOutOfRangeException"></exception>
|
||||
public ulong ReadBitsMSB(int numBits)
|
||||
{
|
||||
// If we are reading an invalid number of bits
|
||||
if (numBits < 0 || numBits > 64)
|
||||
throw new ArgumentOutOfRangeException();
|
||||
|
||||
// Allocate the bit buffer
|
||||
ulong bitBuffer = 0;
|
||||
|
||||
// If the bit buffer has the right number remaining
|
||||
if (_bitsLeft >= numBits)
|
||||
{
|
||||
for (int i = 0; i < numBits; i++)
|
||||
{
|
||||
bitBuffer |= _bitBuffer[i + _bitsLeft--] ? 1u : 0;
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
|
||||
return bitBuffer;
|
||||
}
|
||||
|
||||
// Otherwise, we need to read what we can
|
||||
int bitsRemaining = _bitsLeft;
|
||||
for (int i = 0; i < bitsRemaining; i++)
|
||||
{
|
||||
bitBuffer |= _bitBuffer[i + _bitsLeft--] ? 1u : 0;
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
|
||||
// Fill the bit buffer, if possible
|
||||
FillBitBuffer();
|
||||
|
||||
// If we couldn't read anything, throw an exception
|
||||
if (_buffer == null)
|
||||
throw new IndexOutOfRangeException();
|
||||
|
||||
// Otherwise, read in the remaining bits needed
|
||||
for (int i = 0; i < bitsRemaining; i++)
|
||||
{
|
||||
bitBuffer |= _bitBuffer[i + _bitsLeft--] ? 1u : 0;
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
|
||||
return bitBuffer;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read more than 0 bytes of data from the stream assuming LSB
|
||||
/// </summary>
|
||||
public byte[] ReadBytesLSB(int numBytes)
|
||||
{
|
||||
// If we are reading an invalid number of bytes
|
||||
if (numBytes < 0)
|
||||
throw new ArgumentOutOfRangeException();
|
||||
|
||||
// Allocate the byte buffer
|
||||
byte[] byteBuffer = new byte[numBytes];
|
||||
int byteBufferPtr = 0;
|
||||
|
||||
// If the bit buffer has the right number remaining
|
||||
if (_bitsLeft >= numBytes * 8)
|
||||
{
|
||||
byte fullBitBuffer = 0;
|
||||
for (int i = 0; i < numBytes * 8; i++)
|
||||
{
|
||||
fullBitBuffer |= (byte)(_bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1 : 0);
|
||||
if (i % 8 == 7)
|
||||
{
|
||||
byteBuffer[byteBufferPtr++] = fullBitBuffer;
|
||||
fullBitBuffer = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
fullBitBuffer <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
byteBuffer[byteBufferPtr++] = fullBitBuffer;
|
||||
return byteBuffer;
|
||||
}
|
||||
|
||||
// Otherwise, we need to read what we can
|
||||
int bitsRemaining = _bitsLeft;
|
||||
|
||||
byte bitBuffer = 0;
|
||||
for (int i = 0; i < numBytes * 8; i++)
|
||||
{
|
||||
bitBuffer |= (byte)(_bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1 : 0);
|
||||
if (i % 8 == 7)
|
||||
{
|
||||
byteBuffer[byteBufferPtr++] = bitBuffer;
|
||||
bitBuffer = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the bit buffer, if possible
|
||||
FillBitBuffer();
|
||||
|
||||
// If we couldn't read anything, throw an exception
|
||||
if (_buffer == null)
|
||||
throw new IndexOutOfRangeException();
|
||||
|
||||
// Otherwise, read in the remaining bits needed
|
||||
for (int i = 0; i < bitsRemaining; i++)
|
||||
{
|
||||
bitBuffer |= (byte)(_bitBuffer[i + _bitBuffer.Length - _bitsLeft--] ? 1 : 0);
|
||||
if (i % 8 == 7)
|
||||
{
|
||||
byteBuffer[byteBufferPtr++] = bitBuffer;
|
||||
bitBuffer = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
byteBuffer[byteBufferPtr++] = bitBuffer;
|
||||
return byteBuffer;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read more than 0 bytes of data from the stream assuming MSB
|
||||
/// </summary>
|
||||
public byte[] ReadBytesMSB(int numBytes)
|
||||
{
|
||||
// If we are reading an invalid number of bytes
|
||||
if (numBytes < 0)
|
||||
throw new ArgumentOutOfRangeException();
|
||||
|
||||
// Allocate the byte buffer
|
||||
byte[] byteBuffer = new byte[numBytes];
|
||||
int byteBufferPtr = 0;
|
||||
|
||||
// If the bit buffer has the right number remaining
|
||||
if (_bitsLeft >= numBytes * 8)
|
||||
{
|
||||
byte fullBitBuffer = 0;
|
||||
for (int i = 0; i < numBytes * 8; i++)
|
||||
{
|
||||
fullBitBuffer |= (byte)(_bitBuffer[i + _bitsLeft--] ? 1 : 0);
|
||||
if (i % 8 == 7)
|
||||
{
|
||||
byteBuffer[byteBufferPtr++] = fullBitBuffer;
|
||||
fullBitBuffer = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
fullBitBuffer <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
byteBuffer[byteBufferPtr++] = fullBitBuffer;
|
||||
return byteBuffer;
|
||||
}
|
||||
|
||||
// Otherwise, we need to read what we can
|
||||
int bitsRemaining = _bitsLeft;
|
||||
|
||||
byte bitBuffer = 0;
|
||||
for (int i = 0; i < numBytes * 8; i++)
|
||||
{
|
||||
bitBuffer |= (byte)(_bitBuffer[i + _bitsLeft--] ? 1 : 0);
|
||||
if (i % 8 == 7)
|
||||
{
|
||||
byteBuffer[byteBufferPtr++] = bitBuffer;
|
||||
bitBuffer = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the bit buffer, if possible
|
||||
FillBitBuffer();
|
||||
|
||||
// If we couldn't read anything, throw an exception
|
||||
if (_buffer == null)
|
||||
throw new IndexOutOfRangeException();
|
||||
|
||||
// Otherwise, read in the remaining bits needed
|
||||
for (int i = 0; i < bitsRemaining; i++)
|
||||
{
|
||||
bitBuffer |= (byte)(_bitBuffer[i + _bitsLeft--] ? 1 : 0);
|
||||
if (i % 8 == 7)
|
||||
{
|
||||
byteBuffer[byteBufferPtr++] = bitBuffer;
|
||||
bitBuffer = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bitBuffer <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
byteBuffer[byteBufferPtr++] = bitBuffer;
|
||||
return byteBuffer;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Discard bits in the array up to the next byte boundary
|
||||
/// </summary>
|
||||
public void DiscardToByteBoundary()
|
||||
{
|
||||
int bitsToDiscard = _bitsLeft & 7;
|
||||
_bitsLeft -= bitsToDiscard;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fill the internal bit buffer from the internal buffer
|
||||
/// </summary>
|
||||
/// <remarks>Fills up to 4 bytes worth of data at a time</remarks>
|
||||
private void FillBitBuffer()
|
||||
{
|
||||
// If we have 4 bytes left, just create the bit buffer directly
|
||||
if (_bufferPointer < _buffer.Length - 4)
|
||||
{
|
||||
// Read all 4 bytes directly
|
||||
byte[] readAllBytes = new ReadOnlySpan<byte>(_buffer, _bufferPointer, 4).ToArray();
|
||||
_bufferPointer += 4;
|
||||
|
||||
// Create the new bit buffer
|
||||
_bitBuffer = new BitArray(readAllBytes);
|
||||
_bitsLeft = 32;
|
||||
return;
|
||||
}
|
||||
|
||||
// If we have less than 4 bytes left, we need to get creative
|
||||
// Create the byte array to hold the data
|
||||
byte[] bytes = new byte[4];
|
||||
|
||||
// Read what we can first
|
||||
int bytesRemaining = _buffer.Length - _bufferPointer;
|
||||
if (bytesRemaining > 0)
|
||||
{
|
||||
byte[] readBytesRemaining = new ReadOnlySpan<byte>(_buffer, _bufferPointer, bytesRemaining).ToArray();
|
||||
Array.Copy(readBytesRemaining, 0, bytes, 0, bytesRemaining);
|
||||
_bufferPointer += bytesRemaining;
|
||||
}
|
||||
|
||||
// Fill the buffer, if we can
|
||||
FillBuffer();
|
||||
|
||||
// If we couldn't read anything, reset the buffer
|
||||
if (_buffer == null && bytesRemaining == 4)
|
||||
{
|
||||
_bitBuffer = null;
|
||||
_bitsLeft = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// If we don't have anything left, just create a bit array
|
||||
if (_buffer == null)
|
||||
{
|
||||
byte[] readBytesRemaining = new ReadOnlySpan<byte>(bytes, 0, bytesRemaining).ToArray();
|
||||
_bitBuffer = new BitArray(readBytesRemaining);
|
||||
_bitsLeft = 8 * bytesRemaining;
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, we want to read in the remaining necessary bytes
|
||||
int bytesToRead = 4 - bytesRemaining;
|
||||
byte[] bytesRead = new ReadOnlySpan<byte>(_buffer, _bufferPointer, bytesToRead).ToArray();
|
||||
_bufferPointer += bytesToRead;
|
||||
|
||||
Array.Copy(bytesRead, 0, bytes, bytesRemaining, bytesToRead);
|
||||
_bitBuffer = new BitArray(bytes);
|
||||
_bitsLeft = 32;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fill the internal buffer from the original data source
|
||||
/// </summary>
|
||||
/// <remarks>Reads up to 4096 bytes at a time</remarks>
|
||||
private void FillBuffer()
|
||||
{
|
||||
// Get the amount of bytes to read
|
||||
int bytesRemaining = (int)(_dataStream.Length - _dataStream.Position);
|
||||
int bytesToRead = Math.Min(bytesRemaining, 4096);
|
||||
|
||||
// If we can't ready any bytes, reset the buffer
|
||||
if (bytesToRead == 0)
|
||||
{
|
||||
_buffer = null;
|
||||
_bufferPointer = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, read and reset the position
|
||||
_buffer = _dataStream.ReadBytes(bytesToRead);
|
||||
_bufferPointer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public class MSZIPDeflate
|
||||
{
|
||||
/*
|
||||
3.2.5. Compressed blocks (length and distance codes)
|
||||
#region Properties
|
||||
|
||||
As noted above, encoded data blocks in the "deflate" format
|
||||
consist of sequences of symbols drawn from three conceptually
|
||||
distinct alphabets: either literal bytes, from the alphabet of
|
||||
byte values(0..255), or<length, backward distance> pairs,
|
||||
where the length is drawn from(3..258) and the distance is
|
||||
drawn from(1..32,768). In fact, the literal and length
|
||||
alphabets are merged into a single alphabet(0..285), where
|
||||
values 0..255 represent literal bytes, the value 256 indicates
|
||||
end-of-block, and values 257..285 represent length codes
|
||||
(possibly in conjunction with extra bits following the symbol
|
||||
code) as follows:
|
||||
/// <summary>
|
||||
/// Match lengths for literal codes 257..285
|
||||
/// </summary>
|
||||
/// <remarks>Each value here is the lower bound for lengths represented</remarks>
|
||||
public Dictionary<int, int> LiteralLengths
|
||||
{
|
||||
get
|
||||
{
|
||||
// If we have cached length mappings, use those
|
||||
if (_literalLengths != null)
|
||||
return _literalLengths;
|
||||
|
||||
Extra Extra Extra
|
||||
Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
|
||||
---- ---- ------ ---- ---- ------- ---- ---- -------
|
||||
257 0 3 267 1 15,16 277 4 67-82
|
||||
258 0 4 268 1 17,18 278 4 83-98
|
||||
259 0 5 269 2 19-22 279 4 99-114
|
||||
260 0 6 270 2 23-26 280 4 115-130
|
||||
261 0 7 271 2 27-30 281 5 131-162
|
||||
262 0 8 272 2 31-34 282 5 163-194
|
||||
263 0 9 273 3 35-42 283 5 195-226
|
||||
264 0 10 274 3 43-50 284 5 227-257
|
||||
265 1 11,12 275 3 51-58 285 0 258
|
||||
266 1 13,14 276 3 59-66
|
||||
// Otherwise, build it from scratch
|
||||
_literalLengths = new Dictionary<int, int>
|
||||
{
|
||||
[257] = 3,
|
||||
[258] = 4,
|
||||
[259] = 5,
|
||||
[260] = 6,
|
||||
[261] = 7,
|
||||
[262] = 8,
|
||||
[263] = 9,
|
||||
[264] = 10,
|
||||
[265] = 11, // 11,12
|
||||
[266] = 13, // 13,14
|
||||
[267] = 15, // 15,16
|
||||
[268] = 17, // 17,18
|
||||
[269] = 19, // 19-22
|
||||
[270] = 23, // 23-26
|
||||
[271] = 27, // 27-30
|
||||
[272] = 31, // 31-34
|
||||
[273] = 35, // 35-42
|
||||
[274] = 43, // 43-50
|
||||
[275] = 51, // 51-58
|
||||
[276] = 59, // 59-66
|
||||
[277] = 67, // 67-82
|
||||
[278] = 83, // 83-98
|
||||
[279] = 99, // 99-114
|
||||
[280] = 115, // 115-130
|
||||
[281] = 131, // 131-162
|
||||
[282] = 163, // 163-194
|
||||
[283] = 195, // 195-226
|
||||
[284] = 227, // 227-257
|
||||
[285] = 258,
|
||||
};
|
||||
|
||||
The extra bits should be interpreted as a machine integer
|
||||
stored with the most-significant bit first, e.g., bits 1110
|
||||
represent the value 14.
|
||||
return _literalLengths;
|
||||
}
|
||||
}
|
||||
|
||||
Extra Extra Extra
|
||||
Code Bits Dist Code Bits Dist Code Bits Distance
|
||||
---- ---- ---- ---- ---- ------ ---- ---- --------
|
||||
0 0 1 10 4 33-48 20 9 1025-1536
|
||||
1 0 2 11 4 49-64 21 9 1537-2048
|
||||
2 0 3 12 5 65-96 22 10 2049-3072
|
||||
3 0 4 13 5 97-128 23 10 3073-4096
|
||||
4 1 5,6 14 6 129-192 24 11 4097-6144
|
||||
5 1 7,8 15 6 193-256 25 11 6145-8192
|
||||
6 2 9-12 16 7 257-384 26 12 8193-12288
|
||||
7 2 13-16 17 7 385-512 27 12 12289-16384
|
||||
8 3 17-24 18 8 513-768 28 13 16385-24576
|
||||
9 3 25-32 19 8 769-1024 29 13 24577-32768
|
||||
*/
|
||||
/// <summary>
|
||||
/// Extra bits for literal codes 257..285
|
||||
/// </summary>
|
||||
public Dictionary<int, int> LiteralExtraBits
|
||||
{
|
||||
get
|
||||
{
|
||||
// If we have cached bit mappings, use those
|
||||
if (_literalExtraBits != null)
|
||||
return _literalExtraBits;
|
||||
|
||||
// Otherwise, build it from scratch
|
||||
_literalExtraBits = new Dictionary<int, int>();
|
||||
|
||||
// Literal Value 257 - 264, 0 bits
|
||||
for (int i = 257; i < 265; i++)
|
||||
_literalExtraBits[i] = 0;
|
||||
|
||||
// Literal Value 265 - 268, 1 bit
|
||||
for (int i = 265; i < 269; i++)
|
||||
_literalExtraBits[i] = 1;
|
||||
|
||||
// Literal Value 269 - 272, 2 bits
|
||||
for (int i = 269; i < 273; i++)
|
||||
_literalExtraBits[i] = 2;
|
||||
|
||||
// Literal Value 273 - 276, 3 bits
|
||||
for (int i = 273; i < 277; i++)
|
||||
_literalExtraBits[i] = 3;
|
||||
|
||||
// Literal Value 277 - 280, 4 bits
|
||||
for (int i = 277; i < 281; i++)
|
||||
_literalExtraBits[i] = 4;
|
||||
|
||||
// Literal Value 281 - 284, 5 bits
|
||||
for (int i = 281; i < 285; i++)
|
||||
_literalExtraBits[i] = 5;
|
||||
|
||||
// Literal Value 285, 0 bits
|
||||
_literalExtraBits[285] = 0;
|
||||
|
||||
return _literalExtraBits;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Match offsets for distance codes 0..29
|
||||
/// </summary>
|
||||
/// <remarks>Each value here is the lower bound for lengths represented</remarks>
|
||||
public static readonly int[] DistanceOffsets = new int[30]
|
||||
{
|
||||
1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
|
||||
33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
|
||||
1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Extra bits for distance codes 0..29
|
||||
/// </summary>
|
||||
public static readonly int[] DistanceExtraBits = new int[30]
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// The order of the bit length Huffman code lengths
|
||||
/// </summary>
|
||||
public static readonly int[] BitLengthOrder = new int[19]
|
||||
{
|
||||
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
|
||||
};
|
||||
|
||||
#endregion
|
||||
|
||||
#region Instance Variables
|
||||
|
||||
/// <summary>
|
||||
/// Match lengths for literal codes 257..285
|
||||
/// </summary>
|
||||
private Dictionary<int, int> _literalLengths = null;
|
||||
|
||||
/// <summary>
|
||||
/// Extra bits for literal codes 257..285
|
||||
/// </summary>
|
||||
private Dictionary<int, int> _literalExtraBits = null;
|
||||
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// The decoding algorithm for the actual data
|
||||
/// </summary>
|
||||
public static void Decode(MSZIPDeflateStream data)
|
||||
{
|
||||
// Create the output byte array
|
||||
List<byte> decodedBytes = new List<byte>();
|
||||
|
||||
// Create the loop variable block
|
||||
MSZIPDeflateBlock block;
|
||||
|
||||
do
|
||||
{
|
||||
ulong header = data.ReadBitsLSB(3);
|
||||
block = new MSZIPDeflateBlock(header);
|
||||
|
||||
// We should never get a reserved block
|
||||
if (block.BTYPE == MSZIPDeflateCompressionType.Reserved)
|
||||
throw new Exception();
|
||||
|
||||
// If stored with no compression
|
||||
if (block.BTYPE == MSZIPDeflateCompressionType.NoCompression)
|
||||
{
|
||||
// Skip any remaining bits in current partially processed byte
|
||||
data.DiscardToByteBoundary();
|
||||
|
||||
// Read LEN and NLEN
|
||||
byte[] nonCompressedHeader = data.ReadBytesLSB(4);
|
||||
block.BlockData = new MSZIPNonCompressedBlock(nonCompressedHeader);
|
||||
|
||||
// Copy LEN bytes of data to output
|
||||
ushort length = ((MSZIPNonCompressedBlock)block.BlockData).LEN;
|
||||
((MSZIPNonCompressedBlock)block.BlockData).Data = data.ReadBytesLSB(length);
|
||||
decodedBytes.AddRange(((MSZIPNonCompressedBlock)block.BlockData).Data);
|
||||
}
|
||||
|
||||
// Otherwise
|
||||
else
|
||||
{
|
||||
block.BlockData = block.BTYPE == MSZIPDeflateCompressionType.DynamicHuffman
|
||||
? (IMSZIPBlockData)new MSZIPDynamicHuffmanCompressedBlock()
|
||||
: (IMSZIPBlockData)new MSZIPFixedHuffmanCompressedBlock();
|
||||
|
||||
// If compressed with dynamic Huffman codes
|
||||
if (block.BTYPE == MSZIPDeflateCompressionType.DynamicHuffman)
|
||||
{
|
||||
// read representation of code trees (see subsection below)
|
||||
}
|
||||
|
||||
// Loop until end of block code recognized
|
||||
while (true)
|
||||
{
|
||||
/*
|
||||
decode literal/length value from input stream
|
||||
if value < 256
|
||||
copy value (literal byte) to output stream
|
||||
otherwise
|
||||
if value = end of block (256)
|
||||
break from loop
|
||||
otherwise (value = 257..285)
|
||||
decode distance from input stream
|
||||
|
||||
move backwards distance bytes in the output
|
||||
stream, and copy length bytes from this
|
||||
position to the output stream.
|
||||
*/
|
||||
}
|
||||
}
|
||||
} while (!block.BFINAL);
|
||||
|
||||
/*
|
||||
Note that a duplicated string reference may refer to a string
|
||||
in a previous block; i.e., the backward distance may cross one
|
||||
or more block boundaries. However a distance cannot refer past
|
||||
the beginning of the output stream. (An application using a
|
||||
preset dictionary might discard part of the output stream; a
|
||||
distance can refer to that part of the output stream anyway)
|
||||
Note also that the referenced string may overlap the current
|
||||
position; for example, if the last 2 bytes decoded have values
|
||||
X and Y, a string reference with <length = 5, distance = 2>
|
||||
adds X,Y,X,Y,X to the output stream.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
public class MSZIPDeflateBlock
|
||||
{
|
||||
/*
|
||||
3.2.3. Details of block format
|
||||
#region Properties
|
||||
|
||||
Each block of compressed data begins with 3 header bits
|
||||
containing the following data:
|
||||
/// <summary>
|
||||
/// Set if and only if this is the last block of the data set.
|
||||
/// </summary>
|
||||
/// <remarks>Bit 0</remarks>
|
||||
public bool BFINAL { get; set; }
|
||||
|
||||
first bit BFINAL
|
||||
next 2 bits BTYPE
|
||||
/// <summary>
|
||||
/// Specifies how the data are compressed
|
||||
/// </summary>
|
||||
/// <remarks>Bits 1-2</remarks>
|
||||
public MSZIPDeflateCompressionType BTYPE { get; set; }
|
||||
|
||||
Note that the header bits do not necessarily begin on a byte
|
||||
boundary, since a block does not necessarily occupy an integral
|
||||
number of bytes.
|
||||
/// <summary>
|
||||
/// Block data as defined by the compression type
|
||||
/// </summary>
|
||||
public IMSZIPBlockData BlockData { get; set; }
|
||||
|
||||
BFINAL is set if and only if this is the last block of the data
|
||||
set.
|
||||
#endregion
|
||||
|
||||
BTYPE specifies how the data are compressed, as follows:
|
||||
|
||||
00 - no compression
|
||||
01 - compressed with fixed Huffman codes
|
||||
10 - compressed with dynamic Huffman codes
|
||||
11 - reserved (error)
|
||||
|
||||
The only difference between the two compressed cases is how the
|
||||
Huffman codes for the literal/length and distance alphabets are
|
||||
defined.
|
||||
|
||||
In all cases, the decoding algorithm for the actual data is as
|
||||
follows:
|
||||
|
||||
do
|
||||
read block header from input stream.
|
||||
if stored with no compression
|
||||
skip any remaining bits in current partially
|
||||
processed byte
|
||||
read LEN and NLEN (see next section)
|
||||
copy LEN bytes of data to output
|
||||
otherwise
|
||||
if compressed with dynamic Huffman codes
|
||||
read representation of code trees (see
|
||||
subsection below)
|
||||
loop (until end of block code recognized)
|
||||
decode literal/length value from input stream
|
||||
if value < 256
|
||||
copy value (literal byte) to output stream
|
||||
otherwise
|
||||
if value = end of block (256)
|
||||
break from loop
|
||||
otherwise (value = 257..285)
|
||||
decode distance from input stream
|
||||
|
||||
move backwards distance bytes in the output
|
||||
stream, and copy length bytes from this
|
||||
position to the output stream.
|
||||
end loop
|
||||
while not last block
|
||||
|
||||
Note that a duplicated string reference may refer to a string
|
||||
in a previous block; i.e., the backward distance may cross one
|
||||
or more block boundaries. However a distance cannot refer past
|
||||
the beginning of the output stream. (An application using a
|
||||
preset dictionary might discard part of the output stream; a
|
||||
distance can refer to that part of the output stream anyway)
|
||||
Note also that the referenced string may overlap the current
|
||||
position; for example, if the last 2 bytes decoded have values
|
||||
X and Y, a string reference with <length = 5, distance = 2>
|
||||
adds X,Y,X,Y,X to the output stream.
|
||||
|
||||
We now specify each compression method in turn.
|
||||
*/
|
||||
/// <summary>
|
||||
/// Constructor
|
||||
/// </summary>
|
||||
public MSZIPDeflateBlock(ulong header)
|
||||
{
|
||||
BFINAL = (header & 0b100) != 0;
|
||||
BTYPE = (MSZIPDeflateCompressionType)(header & 0b011);
|
||||
}
|
||||
}
|
||||
|
||||
public class MSZIPNonCompressedBlock
|
||||
/// <summary>
|
||||
/// Empty interface defining block types
|
||||
/// </summary>
|
||||
public interface IMSZIPBlockData { }
|
||||
|
||||
/// <summary>
|
||||
/// Non-compressed blocks (BTYPE=00)
|
||||
/// </summary>
|
||||
public class MSZIPNonCompressedBlock : IMSZIPBlockData
|
||||
{
|
||||
/*
|
||||
3.2.4. Non-compressed blocks (BTYPE=00)
|
||||
#region Properties
|
||||
|
||||
Any bits of input up to the next byte boundary are ignored.
|
||||
The rest of the block consists of the following information:
|
||||
/// <summary>
|
||||
/// The number of data bytes in the block
|
||||
/// </summary>
|
||||
/// <remarks>Bytes 0-1</remarks>
|
||||
public ushort LEN { get; set; }
|
||||
|
||||
0 1 2 3 4...
|
||||
+---+---+---+---+================================+
|
||||
| LEN | NLEN |... LEN bytes of literal data...|
|
||||
+---+---+---+---+================================+
|
||||
/// <summary>
|
||||
/// The one's complement of LEN
|
||||
/// </summary>
|
||||
/// <remarks>Bytes 2-3</remarks>
|
||||
public ushort NLEN { get; set; }
|
||||
|
||||
LEN is the number of data bytes in the block. NLEN is the
|
||||
one's complement of LEN.
|
||||
*/
|
||||
/// <summary>
|
||||
/// <see cref="LEN"/> bytes of literal data
|
||||
/// </summary>
|
||||
public byte[] Data { get; set; }
|
||||
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Constructor
|
||||
/// </summary>
|
||||
public MSZIPNonCompressedBlock(byte[] header)
|
||||
{
|
||||
// If we have invalid header data
|
||||
if (header == null || header.Length < 4)
|
||||
throw new ArgumentException();
|
||||
|
||||
int offset = 0;
|
||||
LEN = header.ReadUInt16(ref offset);
|
||||
NLEN = header.ReadUInt16(ref offset);
|
||||
|
||||
// TODO: Confirm NLEN is 1's compliment of LEN
|
||||
}
|
||||
}
|
||||
|
||||
public class MSZIPFixedHuffmanCompressedBlock
|
||||
/// <summary>
|
||||
/// Base class for compressed blocks
|
||||
/// </summary>
|
||||
public abstract class MSZIPCompressedBlock : IMSZIPBlockData
|
||||
{
|
||||
/*
|
||||
3.2.6. Compression with fixed Huffman codes (BTYPE = 01)
|
||||
/// <summary>
|
||||
/// Huffman code lengths for the literal / length alphabet
|
||||
/// </summary>
|
||||
public abstract int[] LiteralLengths { get; }
|
||||
|
||||
The Huffman codes for the two alphabets are fixed, and are not
|
||||
represented explicitly in the data.The Huffman code lengths
|
||||
for the literal / length alphabet are:
|
||||
|
||||
Lit Value Bits Codes
|
||||
-------- - ---------
|
||||
0 - 143 8 00110000 through
|
||||
10111111
|
||||
144 - 255 9 110010000 through
|
||||
111111111
|
||||
256 - 279 7 0000000 through
|
||||
0010111
|
||||
280 - 287 8 11000000 through
|
||||
11000111
|
||||
*/
|
||||
/// <summary>
|
||||
/// Huffman distance codes for the literal / length alphabet
|
||||
/// </summary>
|
||||
public abstract int[] DistanceCodes { get; }
|
||||
}
|
||||
|
||||
public class MSZIPDynamicHuffmanCompressedBlock
|
||||
/// <summary>
|
||||
/// Compression with fixed Huffman codes (BTYPE=01)
|
||||
/// </summary>
|
||||
public class MSZIPFixedHuffmanCompressedBlock : MSZIPCompressedBlock
|
||||
{
|
||||
#region Properties
|
||||
|
||||
/// <summary>
|
||||
/// Huffman code lengths for the literal / length alphabet
|
||||
/// </summary>
|
||||
public override int[] LiteralLengths
|
||||
{
|
||||
get
|
||||
{
|
||||
// If we have cached lengths, use those
|
||||
if (_literalLengths != null)
|
||||
return _literalLengths;
|
||||
|
||||
// Otherwise, build it from scratch
|
||||
_literalLengths = new int[288];
|
||||
|
||||
// Literal Value 0 - 143, 8 bits
|
||||
for (int i = 0; i < 144; i++)
|
||||
_literalLengths[i] = 8;
|
||||
|
||||
// Literal Value 144 - 255, 9 bits
|
||||
for (int i = 144; i < 256; i++)
|
||||
_literalLengths[i] = 9;
|
||||
|
||||
// Literal Value 256 - 279, 7 bits
|
||||
for (int i = 256; i < 280; i++)
|
||||
_literalLengths[i] = 7;
|
||||
|
||||
// Literal Value 280 - 287, 8 bits
|
||||
for (int i = 280; i < 288; i++)
|
||||
_literalLengths[i] = 8;
|
||||
|
||||
return _literalLengths;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Huffman distance codes for the literal / length alphabet
|
||||
/// </summary>
|
||||
public override int[] DistanceCodes
|
||||
{
|
||||
get
|
||||
{
|
||||
// If we have cached distances, use those
|
||||
if (_distanceCodes != null)
|
||||
return _distanceCodes;
|
||||
|
||||
// Otherwise, build it from scratch
|
||||
_distanceCodes = new int[32];
|
||||
|
||||
// Fixed length, 5 bits
|
||||
for (int i = 0; i < 32; i++)
|
||||
_distanceCodes[i] = 5;
|
||||
|
||||
return _distanceCodes;
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Instance Variables
|
||||
|
||||
/// <summary>
|
||||
/// Huffman code lengths for the literal / length alphabet
|
||||
/// </summary>
|
||||
private int[] _literalLengths = null;
|
||||
|
||||
/// <summary>
|
||||
/// Huffman distance codes for the literal / length alphabet
|
||||
/// </summary>
|
||||
private int[] _distanceCodes = null;
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compression with dynamic Huffman codes (BTYPE=10)
|
||||
/// </summary>
|
||||
public class MSZIPDynamicHuffmanCompressedBlock : MSZIPCompressedBlock
|
||||
{
|
||||
/// <summary>
|
||||
/// The Huffman codes for the literal/length code
|
||||
/// </summary>
|
||||
public override int[] LiteralLengths => new int[19];
|
||||
|
||||
/// <summary>
|
||||
/// The Huffman codes for the distance code
|
||||
/// </summary>
|
||||
public override int[] DistanceCodes => new int[19];
|
||||
|
||||
/*
|
||||
3.2.7. Compression with dynamic Huffman codes (BTYPE=10)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user