mirror of
https://github.com/SabreTools/BinaryObjectScanner.git
synced 2026-04-22 06:03:34 +00:00
Start filling out LZX
This commit is contained in:
@@ -1,11 +1,397 @@
|
||||
namespace BurnOutSharp.FileType
|
||||
using System.Collections.Generic;
|
||||
/// <see href="https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-patch/cc78752a-b4af-4eee-88cb-01f4d8a4c2bf"/>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
/// <see href="https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzx.h"/>
|
||||
/// <see href="https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxc.c"/>
|
||||
/// <see href="https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxd.c"/>
|
||||
/// <see href="https://wimlib.net/"/>
|
||||
/// <see href="http://xavprods.free.fr/lzx/"/>
|
||||
/// <see href="https://github.com/jhermsmeier/node-lzx"/>
|
||||
/// <see href="https://github.com/jhermsmeier/node-cabarc"/>
|
||||
namespace BurnOutSharp.FileType
|
||||
{
|
||||
#region TEMPORARY AREA FOR LZX COMPRESSION FORMAT
|
||||
/// <summary>
|
||||
/// 3-bit block type
|
||||
/// </summary>
|
||||
public enum MSCABLXZBlockType : byte
|
||||
{
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_0 = 0b000,
|
||||
|
||||
// See the following for details about implementation (there is no open spec):
|
||||
// https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzx.h
|
||||
// https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxc.c
|
||||
// https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxd.c
|
||||
/// <summary>
|
||||
/// Verbatim block
|
||||
/// </summary>
|
||||
Verbatim = 0b001,
|
||||
|
||||
#endregion
|
||||
/// <summary>
|
||||
/// Aligned offset block
|
||||
/// </summary>
|
||||
AlignedOffset = 0b010,
|
||||
|
||||
/// <summary>
|
||||
/// Uncompressed block
|
||||
/// </summary>
|
||||
Uncompressed = 0b011,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_4 = 0b100,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_5 = 0b101,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_6 = 0b110,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_7 = 0b111,
|
||||
}
|
||||
|
||||
public class MSCABLZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The window size determines the number of window subdivisions, or position slots
|
||||
/// </summary>
|
||||
public static readonly Dictionary<int, int> PositionSlots = new Dictionary<int, int>()
|
||||
{
|
||||
[128 * 1024] = 34, // 128 KB
|
||||
[256 * 1024] = 36, // 256 KB
|
||||
[512 * 1024] = 38, // 512 KB
|
||||
[1024 * 1024] = 42, // 1 MB
|
||||
[2 * 1024 * 1024] = 50, // 2 MB
|
||||
[4 * 1024 * 1024] = 66, // 4 MB
|
||||
[8 * 1024 * 1024] = 98, // 8 MB
|
||||
[16 * 1024 * 1024] = 162, // 16 MB
|
||||
[32 * 1024 * 1024] = 290, // 32 MB
|
||||
};
|
||||
}
|
||||
|
||||
public class MSCABLZXHeader
|
||||
{
|
||||
/*
|
||||
2.2 Header
|
||||
|
||||
2.2.1 Chunk Size
|
||||
|
||||
The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
|
||||
uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
|
||||
ensure that an exact number of input bytes represent an exact number of output bytes for each
|
||||
chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
|
||||
output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
|
||||
(even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
|
||||
size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
|
||||
incompressible when the chunk is not the last one.
|
||||
|
||||
The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
|
||||
the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
|
||||
16-bit field. The chunk prefix chain could be followed in the compressed stream without
|
||||
decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
|
||||
location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
|
||||
size.
|
||||
|
||||
2.2.2 E8 Call Translation
|
||||
|
||||
E8 call translation is an optional feature that can be used when the data to compress contains x86
|
||||
instruction sequences. E8 translation operates as a preprocessing stage before compressing each
|
||||
chunk, and the compressed stream header contains a bit that indicates whether the decoder shall
|
||||
reverse the translation as a postprocessing step after decompressing each chunk.
|
||||
|
||||
The x86 instruction beginning with a byte value of 0xE8 is followed by a 32-bit, little-endian relative
|
||||
displacement to the call target. When E8 call translation is enabled, the following preprocessing steps
|
||||
are performed on the uncompressed input before compression (assuming little-endian byte ordering):
|
||||
|
||||
Let chunk_offset refer to the total number of uncompressed bytes preceding this chunk.
|
||||
|
||||
Let E8_file_size refer to the caller-specified value given to the compressor or decoded from the header
|
||||
of the compressed stream during decompression.
|
||||
|
||||
The following example shows how E8 translation is performed for each 32-KB chunk of uncompressed
|
||||
data (or less than 32 KB if last chunk to compress).
|
||||
|
||||
if (( chunk_offset < 0x40000000 ) && ( chunk_size > 10 ))
|
||||
for ( i = 0; i < (chunk_size – 10); i++ )
|
||||
if ( chunk_byte[ i ] == 0xE8 )
|
||||
long current_pointer = chunk_offset + i;
|
||||
long displacement = chunk_byte[ i+1 ] |
|
||||
chunk_byte[ i+2 ] << 8 |
|
||||
chunk_byte[ i+3 ] << 16 |
|
||||
chunk_byte[ i+4 ] << 24;
|
||||
long target = current_pointer + displacement;
|
||||
if (( target >= 0 ) && ( target < E8_file_size+current_pointer))
|
||||
if ( target >= E8_file_size )
|
||||
target = displacement – E8_file_size;
|
||||
endif
|
||||
chunk_byte[ i+1 ] = (byte)( target );
|
||||
chunk_byte[ i+2 ] = (byte)( target >> 8 );
|
||||
chunk_byte[ i+3 ] = (byte)( target >> 16 );
|
||||
chunk_byte[ i+4 ] = (byte)( target >> 24 );
|
||||
endif
|
||||
i += 4;
|
||||
endif
|
||||
endfor
|
||||
endif
|
||||
|
||||
After decompression, the E8 scanning algorithm is the same. The following example shows how E8
|
||||
translation reversal is performed.
|
||||
|
||||
long value = chunk_byte[ i+1 ] |
|
||||
chunk_byte[ i+2 ] << 8 |
|
||||
chunk_byte[ i+3 ] << 16 |
|
||||
chunk_byte[ i+4 ] << 24;
|
||||
if (( value >= -current_pointer ) && ( value < E8_file_size ))
|
||||
if ( value >= 0 )
|
||||
displacement = value – current_pointer;
|
||||
else
|
||||
displacement = value + E8_file_size;
|
||||
endif
|
||||
chunk_byte[ i+1 ] = (byte)( displacement );
|
||||
chunk_byte[ i+2 ] = (byte)( displacement >> 8 );
|
||||
chunk_byte[ i+3 ] = (byte)( displacement >> 16 );
|
||||
chunk_byte[ i+4 ] = (byte)( displacement >> 24 );
|
||||
endif
|
||||
|
||||
The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
|
||||
in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
|
||||
single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
|
||||
32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
|
||||
translation was enabled. Note that E8_file_size is completely independent of the length of the
|
||||
uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
|
||||
uncompressed data).
|
||||
|
||||
Field Comments Size
|
||||
----------------------------------------------------------------
|
||||
E8 translation 0-disabled, 1-enabled 1 bit
|
||||
Translation size high word Only present if enabled 0 or 16 bits
|
||||
Translation size low word Only present if enabled 0 or 16 bits
|
||||
*/
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// An LZXD block represents a sequence of compressed data that is encoded with the same set of
|
||||
/// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
|
||||
/// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
|
||||
/// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
|
||||
/// number of chunks is related to the size of the data being compressed, while the number of blocks is
|
||||
/// related to how well the data is compressed. The Block Type field, as specified in section 2.3.1.1,
|
||||
/// indicates which type of block follows, and the Block Size field, as specified in section 2.3.1.2,
|
||||
/// indicates the number of uncompressed bytes represented by the block. Following the generic block
|
||||
/// header is a type-specific header that describes the remainder of the block.
|
||||
/// </summary>
|
||||
public class MSCABLZXBlockHeader
|
||||
{
|
||||
/// <remarks>3 bits</remarks>
|
||||
public MSCABLXZBlockType BlockType;
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the high 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlockSizeMSB;
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the middle 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlockSizeByte2;
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the low 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlocksizeLSB;
|
||||
|
||||
/*
|
||||
2.3.2 Block Data
|
||||
|
||||
2.3.2.3 Aligned Offset Block
|
||||
|
||||
An aligned offset block is identical to the verbatim block except for the presence of the aligned offset
|
||||
tree preceding the other trees.
|
||||
|
||||
Entry Comments Size
|
||||
----------------------------------------------------------------------------------
|
||||
Aligned offset tree 8 elements, 3 bits each 24 bits
|
||||
Pretree for first 256 elements of main tree 20 elements, 4 bits each 80 bits
|
||||
Path lengths of first 256 elements of main tree Encoded using pretree Variable
|
||||
Pretree for remainder of main tree 20 elements, 4 bits each 80 bits
|
||||
Path lengths of remaining elements of main tree Encoded using pretree Variable
|
||||
Pretree for length tree 20 elements, 4 bits each 80 bits
|
||||
Path lengths of elements in length tree Encoded using pretree Variable
|
||||
Token sequence (matches and literals) Specified in section 2.6 Variable
|
||||
*/
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Following the generic block header, an uncompressed block begins with 1 to 16 bits of zero padding
|
||||
/// to align the bit buffer on a 16-bit boundary. At this point, the bitstream ends and a byte stream
|
||||
/// begins. Following the zero padding, new 32-bit values for R0, R1, and R2 are output in little-endian
|
||||
/// form, followed by the uncompressed data bytes themselves. Finally, if the uncompressed data length
|
||||
/// is odd, one extra byte of zero padding is encoded to realign the following bitstream.
|
||||
///
|
||||
/// Then the bitstream of byte-swapped 16-bit integers resumes for the next Block Type field (if there
|
||||
/// are subsequent blocks).
|
||||
///
|
||||
/// The decoded R0, R1, and R2 values are used as initial repeated offset values to decode the
|
||||
/// subsequent compressed block if present.
|
||||
/// </summary>
|
||||
public class MSCABLZXUncompressedBlock
|
||||
{
|
||||
/// <summary>
|
||||
/// Generic block header
|
||||
/// </summary>
|
||||
public MSCABLZXBlockHeader Header;
|
||||
|
||||
/// <summary>
|
||||
/// Padding to align following field on 16-bit boundary
|
||||
/// </summary>
|
||||
/// <remarks>Bits have a value of zero</remarks>
|
||||
public ushort PaddingBits;
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD ([MS-DTYP]))
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R0;
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD)
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R1;
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD)
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R2;
|
||||
|
||||
/// <summary>
|
||||
/// Can use the direct memcpy function, as specified in [IEEE1003.1]
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public byte[] RawDataBytes;
|
||||
|
||||
/// <summary>
|
||||
/// Only if uncompressed size is odd
|
||||
/// </summary>
|
||||
public byte AlignmentByte;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The fields of a verbatim block that follow the generic block header
|
||||
/// </summary>
|
||||
public class MSCABLZXVerbatimBlock
|
||||
{
|
||||
/// <summary>
|
||||
/// Generic block header
|
||||
/// </summary>
|
||||
public MSCABLZXBlockHeader Header;
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[] PretreeFirst256;
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[] PathLengthsFirst256;
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for remainder of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[] PretreeRemainder;
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of remaining elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[] PathLengthsRemainder;
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for length tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[] PretreeLengthTree;
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of elements in length tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[] PathLengthsLengthTree;
|
||||
|
||||
// Entry Comments Size
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// Token sequence (matches and literals) Specified in section 2.6 Variable
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// An aligned offset block is identical to the verbatim block except for the presence of the aligned offset
|
||||
/// tree preceding the other trees.
|
||||
/// </summary>
|
||||
public class MSCABLZXAlignedOffsetBlock
|
||||
{
|
||||
/// <summary>
|
||||
/// Generic block header
|
||||
/// </summary>
|
||||
public MSCABLZXBlockHeader Header;
|
||||
|
||||
/// <summary>
|
||||
/// Aligned offset tree
|
||||
/// </summary>
|
||||
/// <remarks>8 elements, 3 bits each</remarks>
|
||||
public byte[] AlignedOffsetTree;
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[] PretreeFirst256;
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[] PathLengthsFirst256;
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for remainder of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[] PretreeRemainder;
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of remaining elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[] PathLengthsRemainder;
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for length tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[] PretreeLengthTree;
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of elements in length tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[] PathLengthsLengthTree;
|
||||
|
||||
// Entry Comments Size
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// Token sequence (matches and literals) Specified in section 2.6 Variable
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user