8 Commits
1.1.3 ... 1.1.4

Author SHA1 Message Date
Matt Nadareski
c000e581c8 Bump version 2023-09-28 23:24:58 -04:00
Matt Nadareski
465cef4224 Add XGD4 identifier for PIC 2023-09-28 23:21:44 -04:00
Matt Nadareski
87cadbfd2b Add documentation around Quantum 2023-09-22 21:24:05 -04:00
Matt Nadareski
648ee2eaa5 Add back two properties 2023-09-22 21:15:52 -04:00
Matt Nadareski
daa814728d Simplify the Quantum models for now 2023-09-22 21:13:31 -04:00
Matt Nadareski
68aac36623 Fully create Chunk and ChunkHeader 2023-09-22 21:00:41 -04:00
Matt Nadareski
0c95cfcde4 More LZX cleanup 2023-09-22 20:47:29 -04:00
Matt Nadareski
6d6361c153 Start making LZX models better 2023-09-22 20:40:22 -04:00
15 changed files with 213 additions and 216 deletions

View File

@@ -5,17 +5,8 @@ namespace SabreTools.Models.Compression.LZX
/// tree preceding the other trees.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
public class AlignedOffsetBlock
public class AlignedOffsetBlockData : BlockData
{
/// <summary>
/// Generic block header
/// </summary>
#if NET48
public BlockHeader Header { get; set; }
#else
public BlockHeader? Header { get; set; }
#endif
/// <summary>
/// Aligned offset tree
/// </summary>
@@ -86,8 +77,14 @@ namespace SabreTools.Models.Compression.LZX
public int[]? PathLengthsLengthTree { get; set; }
#endif
// Entry Comments Size
// ---------------------------------------------------------------------------------------
// Token sequence (matches and literals) Specified in section 2.6 Variable
/// <summary>
/// Token sequence (matches and literals)
/// </summary>
/// <remarks>Variable</remarks>
#if NET48
public byte[] TokenSequence { get; set; }
#else
public byte[]? TokenSequence { get; set; }
#endif
}
}

32
Compression/LZX/Block.cs Normal file
View File

@@ -0,0 +1,32 @@
namespace SabreTools.Models.Compression.LZX
{
/// <summary>
/// An LZXD block represents a sequence of compressed data that is encoded with the same set of
/// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
/// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
/// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
/// number of chunks is related to the size of the data being compressed, while the number of blocks is
/// related to how well the data is compressed.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
public class Block
{
/// <summary>
/// Block header
/// </summary>
#if NET48
public BlockHeader Header { get; set; }
#else
public BlockHeader? Header { get; set; }
#endif
/// <summary>
/// Block data
/// </summary>
#if NET48
public BlockData BlockData { get; set; }
#else
public BlockData? BlockData { get; set; }
#endif
}
}

View File

@@ -0,0 +1,8 @@
namespace SabreTools.Models.Compression.LZX
{
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
public abstract class BlockData
{
// No common fields between all block data
}
}

View File

@@ -1,14 +1,9 @@
namespace SabreTools.Models.Compression.LZX
{
/// <summary>
/// An LZXD block represents a sequence of compressed data that is encoded with the same set of
/// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
/// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
/// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
/// number of chunks is related to the size of the data being compressed, while the number of blocks is
/// related to how well the data is compressed. The Block Type field, as specified in section 2.3.1.1,
/// indicates which type of block follows, and the Block Size field, as specified in section 2.3.1.2,
/// indicates the number of uncompressed bytes represented by the block. Following the generic block
/// The Block Type field, as specified in section 2.3.1.1, indicates which type of block follows,
/// and the Block Size field, as specified in section 2.3.1.2, indicates the number of
/// uncompressed bytes represented by the block. Following the generic block
/// header is a type-specific header that describes the remainder of the block.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>

25
Compression/LZX/Chunk.cs Normal file
View File

@@ -0,0 +1,25 @@
namespace SabreTools.Models.Compression.LZX
{
/// <summary>
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
/// incompressible when the chunk is not the last one.
/// </summary>
public class Chunk
{
/// <summary>
/// Chunk header
/// </summary>
public ChunkHeader Header { get; set; }
/// <summary>
/// Block headers and data
/// </summary>
public Block[] Blocks { get; set; }
}
}

View File

@@ -0,0 +1,46 @@
namespace SabreTools.Models.Compression.LZX
{
/// <summary>
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
/// incompressible when the chunk is not the last one.
/// </summary>
public class ChunkHeader
{
/// <summary>
/// The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
/// the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
/// 16-bit field. The chunk prefix chain could be followed in the compressed stream without
/// decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
/// location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
/// size.
/// </summary>
public ushort ChunkSize { get; set; }
/// <summary>
/// The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
/// in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
/// single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
/// 32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
/// translation was enabled. Note that E8_file_size is completely independent of the length of the
/// uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
/// uncompressed data).
/// </summary>
public byte E8Translation { get; set; }
/// <summary>
/// E8 translation size, high WORD
/// </summary>
public ushort? TranslationSizeHighWord { get; set; }
/// <summary>
/// E8 translation size, low WORD
/// </summary>
public ushort? TranslationSizeLowWord { get; set; }
}
}

View File

@@ -3,44 +3,36 @@ namespace SabreTools.Models.Compression.LZX
public static class Constants
{
/* some constants defined by the LZX specification */
public const int LZX_MIN_MATCH = (2);
public const int LZX_MAX_MATCH = (257);
public const int LZX_NUM_CHARS = (256);
/// <summary>
/// also blocktypes 4-7 invalid
/// </summary>
public const int LZX_BLOCKTYPE_INVALID = (0);
public const int LZX_BLOCKTYPE_VERBATIM = (1);
public const int LZX_BLOCKTYPE_ALIGNED = (2);
public const int LZX_BLOCKTYPE_UNCOMPRESSED = (3);
public const int LZX_PRETREE_NUM_ELEMENTS = (20);
public const int LZX_MIN_MATCH = 2;
public const int LZX_MAX_MATCH = 257;
public const int LZX_NUM_CHARS = 256;
public const int LZX_PRETREE_NUM_ELEMENTS = 20;
/// <summary>
/// aligned offset tree #elements
/// </summary>
public const int LZX_ALIGNED_NUM_ELEMENTS = (8);
public const int LZX_ALIGNED_NUM_ELEMENTS = 8;
/// <summary>
/// this one missing from spec!
/// </summary>
public const int LZX_NUM_PRIMARY_LENGTHS = (7);
public const int LZX_NUM_PRIMARY_LENGTHS = 7;
/// <summary>
/// length tree #elements
/// </summary>
public const int LZX_NUM_SECONDARY_LENGTHS = (249);
public const int LZX_NUM_SECONDARY_LENGTHS = 249;
/* LZX huffman defines: tweak tablebits as desired */
public const int LZX_PRETREE_MAXSYMBOLS = (LZX_PRETREE_NUM_ELEMENTS);
public const int LZX_PRETREE_TABLEBITS = (6);
public const int LZX_MAINTREE_MAXSYMBOLS = (LZX_NUM_CHARS + 50 * 8);
public const int LZX_MAINTREE_TABLEBITS = (12);
public const int LZX_LENGTH_MAXSYMBOLS = (LZX_NUM_SECONDARY_LENGTHS + 1);
public const int LZX_LENGTH_TABLEBITS = (12);
public const int LZX_ALIGNED_MAXSYMBOLS = (LZX_ALIGNED_NUM_ELEMENTS);
public const int LZX_ALIGNED_TABLEBITS = (7);
public const int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
public const int LZX_PRETREE_TABLEBITS = 6;
public const int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
public const int LZX_MAINTREE_TABLEBITS = 12;
public const int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
public const int LZX_LENGTH_TABLEBITS = 12;
public const int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
public const int LZX_ALIGNED_TABLEBITS = 7;
public const int LZX_LENTABLE_SAFETY = (64); /* we allow length table decoding overruns */
public const int LZX_LENTABLE_SAFETY = 64; /* we allow length table decoding overruns */
}
}

View File

@@ -1,102 +0,0 @@
namespace SabreTools.Models.Compression.LZX
{
public class Header
{
/*
2.2 Header
2.2.1 Chunk Size
The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
ensure that an exact number of input bytes represent an exact number of output bytes for each
chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
(even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
incompressible when the chunk is not the last one.
The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
16-bit field. The chunk prefix chain could be followed in the compressed stream without
decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
size.
2.2.2 E8 Call Translation
E8 call translation is an optional feature that can be used when the data to compress contains x86
instruction sequences. E8 translation operates as a preprocessing stage before compressing each
chunk, and the compressed stream header contains a bit that indicates whether the decoder shall
reverse the translation as a postprocessing step after decompressing each chunk.
The x86 instruction beginning with a byte value of 0xE8 is followed by a 32-bit, little-endian relative
displacement to the call target. When E8 call translation is enabled, the following preprocessing steps
are performed on the uncompressed input before compression (assuming little-endian byte ordering):
Let chunk_offset refer to the total number of uncompressed bytes preceding this chunk.
Let E8_file_size refer to the caller-specified value given to the compressor or decoded from the header
of the compressed stream during decompression.
The following example shows how E8 translation is performed for each 32-KB chunk of uncompressed
data (or less than 32 KB if last chunk to compress).
if (( chunk_offset < 0x40000000 ) && ( chunk_size > 10 ))
for ( i = 0; i < (chunk_size 10); i++ )
if ( chunk_byte[ i ] == 0xE8 )
long current_pointer = chunk_offset + i;
long displacement = chunk_byte[ i+1 ] |
chunk_byte[ i+2 ] << 8 |
chunk_byte[ i+3 ] << 16 |
chunk_byte[ i+4 ] << 24;
long target = current_pointer + displacement;
if (( target >= 0 ) && ( target < E8_file_size+current_pointer))
if ( target >= E8_file_size )
target = displacement E8_file_size;
endif
chunk_byte[ i+1 ] = (byte)( target );
chunk_byte[ i+2 ] = (byte)( target >> 8 );
chunk_byte[ i+3 ] = (byte)( target >> 16 );
chunk_byte[ i+4 ] = (byte)( target >> 24 );
endif
i += 4;
endif
endfor
endif
After decompression, the E8 scanning algorithm is the same. The following example shows how E8
translation reversal is performed.
long value = chunk_byte[ i+1 ] |
chunk_byte[ i+2 ] << 8 |
chunk_byte[ i+3 ] << 16 |
chunk_byte[ i+4 ] << 24;
if (( value >= -current_pointer ) && ( value < E8_file_size ))
if ( value >= 0 )
displacement = value current_pointer;
else
displacement = value + E8_file_size;
endif
chunk_byte[ i+1 ] = (byte)( displacement );
chunk_byte[ i+2 ] = (byte)( displacement >> 8 );
chunk_byte[ i+3 ] = (byte)( displacement >> 16 );
chunk_byte[ i+4 ] = (byte)( displacement >> 24 );
endif
The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
translation was enabled. Note that E8_file_size is completely independent of the length of the
uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
uncompressed data).
Field Comments Size
----------------------------------------------------------------
E8 translation 0-disabled, 1-enabled 1 bit
Translation size high word Only present if enabled 0 or 16 bits
Translation size low word Only present if enabled 0 or 16 bits
*/
}
}

View File

@@ -14,17 +14,8 @@ namespace SabreTools.Models.Compression.LZX
/// subsequent compressed block if present.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
public class UncompressedBlock
public class UncompressedBlockData : BlockData
{
/// <summary>
/// Generic block header
/// </summary>
#if NET48
public BlockHeader Header { get; set; }
#else
public BlockHeader? Header { get; set; }
#endif
/// <summary>
/// Padding to align following field on 16-bit boundary
/// </summary>

View File

@@ -4,17 +4,8 @@ namespace SabreTools.Models.Compression.LZX
/// The fields of a verbatim block that follow the generic block header
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
public class VerbatimBlock
public class VerbatimBlockData : BlockData
{
/// <summary>
/// Generic block header
/// </summary>
#if NET48
public BlockHeader Header { get; set; }
#else
public BlockHeader? Header { get; set; }
#endif
/// <summary>
/// Pretree for first 256 elements of main tree
/// </summary>
@@ -75,8 +66,14 @@ namespace SabreTools.Models.Compression.LZX
public int[]? PathLengthsLengthTree { get; set; }
#endif
// Entry Comments Size
// ---------------------------------------------------------------------------------------
// Token sequence (matches and literals) Specified in section 2.6 Variable
/// <summary>
/// Token sequence (matches and literals)
/// </summary>
/// <remarks>Variable</remarks>
#if NET48
public byte[] TokenSequence { get; set; }
#else
public byte[]? TokenSequence { get; set; }
#endif
}
}

View File

@@ -1,45 +1,50 @@
namespace SabreTools.Models.Compression.Quantum
{
/// <see href="www.russotto.net/quantumcomp.html"/>
public static class Constants
{
/// <summary>
/// Mask for Quantum Compression Level
/// </summary>
public const ushort MASK_QUANTUM_LEVEL = 0x00F0;
public static readonly int[] PositionSlot = new int[]
{
0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c,
0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0,
0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00,
0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000,
0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000,
0x100000, 0x180000
};
public static readonly int[] PositionExtraBits = new int[]
{
0, 0, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14, 14,
15, 15, 16, 16, 17, 17, 18, 18,
19, 19
};
public static readonly int[] LengthSlot = new int[]
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26,
0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e,
0xbe, 0xde, 0xfe
};
public static readonly int[] LengthExtraBits = new int[]
{
0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 4, 4, 5, 5,
5, 5, 0
};
/// <summary>
/// Lowest Quantum Level (1)
/// Number of position slots for (tsize - 10)
/// </summary>
public const ushort QUANTUM_LEVEL_LO = 0x0010;
/// <summary>
/// Highest Quantum Level (7)
/// </summary>
public const ushort QUANTUM_LEVEL_HI = 0x0070;
/// <summary>
/// Amount to shift over to get int
/// </summary>
public const ushort SHIFT_QUANTUM_LEVEL = 4;
/// <summary>
/// Mask for Quantum Compression Memory
/// </summary>
public const ushort MASK_QUANTUM_MEM = 0x1F00;
/// <summary>
/// Lowest Quantum Memory (10)
/// </summary>
public const ushort QUANTUM_MEM_LO = 0x0A00;
/// <summary>
/// Highest Quantum Memory (21)
/// </summary>
public const ushort QUANTUM_MEM_HI = 0x1500;
/// <summary>
/// Amount to shift over to get int
/// </summary>
public const ushort SHIFT_QUANTUM_MEM = 8;
public static readonly int[] NumPositionSlots = new int[]
{
20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42
};
}
}

View File

@@ -1,23 +1,28 @@
namespace SabreTools.Models.Compression.Quantum
{
/// <see href="https://github.com/wine-mirror/wine/blob/master/dlls/cabinet/cabinet.h"/>
/// <see href="http://www.russotto.net/quantumcomp.html"/>
public sealed class Model
{
public int TimeToReorder { get; set; }
public int Entries { get; set; }
/// <remarks>
/// All the models are initialized with the symbols in symbol
/// order in the table, and with every symbol in the table
/// having a frequency of 1
/// </remarks>
#if NET48
public ModelSymbol[] Symbols { get; set; }
#else
public ModelSymbol?[]? Symbols { get; set; }
#endif
#if NET48
public ushort[] LookupTable { get; set; } = new ushort[256];
#else
public ushort[]? LookupTable { get; set; } = new ushort[256];
#endif
/// <remarks>
/// The initial total frequency is equal to the number of entries
/// in the table
/// </remarks>
public int TotalFrequency { get; set; }
/// <remarks>The initial time_to_reorder value is 4</remarks>
public int TimeToReorder { get; set; }
}
}

View File

@@ -1,11 +1,15 @@
namespace SabreTools.Models.Compression.Quantum
{
/// <see href="https://github.com/wine-mirror/wine/blob/master/dlls/cabinet/cabinet.h"/>
/// <see href="http://www.russotto.net/quantumcomp.html"/>
public sealed class ModelSymbol
{
public ushort Symbol { get; set; }
/// <summary>
/// The cumulative frequency is the frequency of all the symbols
/// which are at a higher index in the table than that symbol —
/// thus the last entry in the table has a cumulative frequency of 0.
/// </summary>
public ushort CumulativeFrequency { get; set; }
}
}

View File

@@ -11,5 +11,7 @@ namespace SabreTools.Models.PIC
public const string DiscTypeIdentifierReWritable = "BDW";
public const string DiscTypeIdentifierRecordable = "BDR";
public const string DiscTypeIdentifierXGD4 = "XG4";
}
}

View File

@@ -4,7 +4,7 @@
<!-- Assembly Properties -->
<TargetFrameworks>net48;net6.0;net7.0;net8.0</TargetFrameworks>
<RuntimeIdentifiers>win-x86;win-x64;linux-x64;osx-x64</RuntimeIdentifiers>
<Version>1.1.3</Version>
<Version>1.1.4</Version>
<!-- Package Properties -->
<Authors>Matt Nadareski</Authors>