mirror of
https://github.com/SabreTools/SabreTools.IO.git
synced 2026-02-10 05:44:31 +00:00
Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5054aeb077 | ||
|
|
d2e9b8d6e5 | ||
|
|
2c29aee834 | ||
|
|
576bafcb87 | ||
|
|
2b310ac528 | ||
|
|
4f6b6d7b59 | ||
|
|
17e55ee233 | ||
|
|
8b78906d1d | ||
|
|
cff2dcf4cc | ||
|
|
a56942cb73 | ||
|
|
5ed661b77c | ||
|
|
a0a0cd0386 | ||
|
|
bcc0fca4ad | ||
|
|
843e821e5f | ||
|
|
630b01283e | ||
|
|
22abb96013 | ||
|
|
314de12661 | ||
|
|
a0b24031b5 | ||
|
|
b4628485c3 | ||
|
|
4610ddc9b9 | ||
|
|
e392ddc8d7 | ||
|
|
1908d1b32e | ||
|
|
9d73195f86 | ||
|
|
335a486f17 | ||
|
|
d3e41ac187 |
@@ -143,27 +143,23 @@ namespace SabreTools.IO.Test.Extensions
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_UTF8_Filled()
|
||||
public void ReadStringsFrom_Latin1Strings_Filled()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
.. Encoding.UTF8.GetBytes("TEST"),
|
||||
.. Encoding.Latin1.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO"),
|
||||
.. Encoding.Latin1.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA"),
|
||||
.. Encoding.Latin1.GetBytes("DATA"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
@@ -195,11 +191,11 @@ namespace SabreTools.IO.Test.Extensions
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("DATA1"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TEST2"),
|
||||
.. Encoding.Latin1.GetBytes("TEST2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO2"),
|
||||
.. Encoding.Latin1.GetBytes("TWO2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA2"),
|
||||
.. Encoding.Latin1.GetBytes("DATA2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TEST3"),
|
||||
.. new byte[] { 0x00 },
|
||||
@@ -210,8 +206,6 @@ namespace SabreTools.IO.Test.Extensions
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(5);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(6, actual.Count);
|
||||
}
|
||||
|
||||
@@ -319,6 +313,22 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidAsciiChars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
.. Enumerable.Range(0x80, 0x80).Select(i => (byte)i),
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.ASCII);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_Latin1_Filled()
|
||||
{
|
||||
@@ -337,6 +347,25 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidLatin1Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
|
||||
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
|
||||
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.Latin1);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_UTF8_Filled()
|
||||
{
|
||||
@@ -355,6 +384,24 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidUTF8Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
.. Enumerable.Range(0x80, 0x42).Select(i => (byte)i),
|
||||
0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC,
|
||||
0xFD, 0xFE, 0xFF,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF8);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_UTF16_Filled()
|
||||
{
|
||||
@@ -373,6 +420,21 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidUTF16Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.Unicode);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_UTF32_Filled()
|
||||
{
|
||||
@@ -391,6 +453,21 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidUTF32Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF32);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,29 +200,25 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Stream? stream = new MemoryStream(bytes);
|
||||
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_UTF8_Filled()
|
||||
public void ReadStringsFrom_Latin1Strings_Filled()
|
||||
{
|
||||
byte[]? bytes =
|
||||
[
|
||||
.. Encoding.UTF8.GetBytes("TEST"),
|
||||
.. Encoding.Latin1.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO"),
|
||||
.. Encoding.Latin1.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA"),
|
||||
.. Encoding.Latin1.GetBytes("DATA"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
Stream? stream = new MemoryStream(bytes);
|
||||
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -254,11 +250,11 @@ namespace SabreTools.IO.Test.Extensions
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("DATA1"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TEST2"),
|
||||
.. Encoding.Latin1.GetBytes("TEST2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO2"),
|
||||
.. Encoding.Latin1.GetBytes("TWO2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA2"),
|
||||
.. Encoding.Latin1.GetBytes("DATA2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TEST3"),
|
||||
.. new byte[] { 0x00 },
|
||||
@@ -270,9 +266,7 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Stream? stream = new MemoryStream(bytes);
|
||||
var actual = stream.ReadStringsFrom(0, bytes.Length, 5);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(10, actual.Count);
|
||||
Assert.Equal(6, actual.Count);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
40
SabreTools.IO.Test/Streams/BufferedStreamTests.cs
Normal file
40
SabreTools.IO.Test/Streams/BufferedStreamTests.cs
Normal file
@@ -0,0 +1,40 @@
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace SabreTools.IO.Test.Streams
|
||||
{
|
||||
public class BufferedStreamTests
|
||||
{
|
||||
#region ReadNextByte
|
||||
|
||||
[Fact]
|
||||
public void ReadNextByte_Empty_Null()
|
||||
{
|
||||
var source = new MemoryStream();
|
||||
var stream = new IO.Streams.BufferedStream(source);
|
||||
byte? actual = stream.ReadNextByte();
|
||||
Assert.Null(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadNextByte_Filled_ValidPosition_Byte()
|
||||
{
|
||||
var source = new MemoryStream(new byte[1024]);
|
||||
var stream = new IO.Streams.BufferedStream(source);
|
||||
byte? actual = stream.ReadNextByte();
|
||||
Assert.Equal((byte)0x00, actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadNextByte_Filled_InvalidPosition_Null()
|
||||
{
|
||||
var source = new MemoryStream(new byte[1024]);
|
||||
source.Seek(0, SeekOrigin.End);
|
||||
var stream = new IO.Streams.BufferedStream(source);
|
||||
byte? actual = stream.ReadNextByte();
|
||||
Assert.Null(actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
@@ -3,8 +3,6 @@ using System.IO;
|
||||
using System.Text;
|
||||
using SabreTools.Hashing;
|
||||
using SabreTools.IO.Extensions;
|
||||
using SabreTools.Models.PKZIP;
|
||||
using static SabreTools.Models.PKZIP.Constants;
|
||||
|
||||
namespace SabreTools.IO.Compression.Deflate
|
||||
{
|
||||
@@ -20,6 +18,46 @@ namespace SabreTools.IO.Compression.Deflate
|
||||
/// </summary>
|
||||
private const int BufferSize = 1024 * 1024;
|
||||
|
||||
/// <summary>
|
||||
/// Local file header signature
|
||||
/// </summary>
|
||||
private const uint LocalFileHeaderSignature = 0x04034B50;
|
||||
|
||||
#endregion
|
||||
|
||||
#region Private Classes
|
||||
|
||||
/// <summary>
|
||||
/// Minimal PKZIP local file header information
|
||||
/// </summary>
|
||||
private class MinLocalFileHeader
|
||||
{
|
||||
/// <summary>
|
||||
/// Signature (0x04034B50)
|
||||
/// </summary>
|
||||
public uint Signature { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// CRC-32
|
||||
/// </summary>
|
||||
public uint CRC32 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Compressed size
|
||||
/// </summary>
|
||||
public uint CompressedSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Uncompressed size
|
||||
/// </summary>
|
||||
public uint UncompressedSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// File name (variable size)
|
||||
/// </summary>
|
||||
public string? FileName { get; set; }
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Extraction
|
||||
@@ -140,7 +178,7 @@ namespace SabreTools.IO.Compression.Deflate
|
||||
long current = source.Position;
|
||||
|
||||
// Parse the PKZIP header, if it exists
|
||||
LocalFileHeader? zipHeader = ParseLocalFileHeader(source);
|
||||
MinLocalFileHeader? zipHeader = ParseLocalFileHeader(source);
|
||||
long zipHeaderBytes = source.Position - current;
|
||||
|
||||
// Always trust the PKZIP CRC-32 value over what is supplied
|
||||
@@ -269,46 +307,39 @@ namespace SabreTools.IO.Compression.Deflate
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse a Stream into a local file header
|
||||
/// Parse a Stream into a minimal local file header
|
||||
/// </summary>
|
||||
/// <param name="data">Stream to parse</param>
|
||||
/// <returns>Filled local file header on success, null on error</returns>
|
||||
/// <remarks>Mirror of method in Serialization</remarks>
|
||||
private static LocalFileHeader? ParseLocalFileHeader(Stream data)
|
||||
/// <returns>Filled minimal local file header on success, null on error</returns>
|
||||
/// <remarks>Partial mirror of method in Serialization</remarks>
|
||||
private static MinLocalFileHeader? ParseLocalFileHeader(Stream data)
|
||||
{
|
||||
var header = new LocalFileHeader();
|
||||
var header = new MinLocalFileHeader();
|
||||
|
||||
header.Signature = data.ReadUInt32LittleEndian();
|
||||
if (header.Signature != LocalFileHeaderSignature)
|
||||
return null;
|
||||
|
||||
header.Version = data.ReadUInt16LittleEndian();
|
||||
header.Flags = (GeneralPurposeBitFlags)data.ReadUInt16LittleEndian();
|
||||
header.CompressionMethod = (CompressionMethod)data.ReadUInt16LittleEndian();
|
||||
header.LastModifedFileTime = data.ReadUInt16LittleEndian();
|
||||
header.LastModifiedFileDate = data.ReadUInt16LittleEndian();
|
||||
_ = data.ReadUInt16LittleEndian(); // Version
|
||||
_ = data.ReadUInt16LittleEndian(); // Flags
|
||||
_ = data.ReadUInt16LittleEndian(); // CompressionMethod
|
||||
_ = data.ReadUInt16LittleEndian(); // LastModifedFileTime
|
||||
_ = data.ReadUInt16LittleEndian(); // LastModifiedFileDate
|
||||
header.CRC32 = data.ReadUInt32LittleEndian();
|
||||
header.CompressedSize = data.ReadUInt32LittleEndian();
|
||||
header.UncompressedSize = data.ReadUInt32LittleEndian();
|
||||
header.FileNameLength = data.ReadUInt16LittleEndian();
|
||||
header.ExtraFieldLength = data.ReadUInt16LittleEndian();
|
||||
ushort fileNameLength = data.ReadUInt16LittleEndian();
|
||||
ushort extraFieldLength = data.ReadUInt16LittleEndian();
|
||||
|
||||
if (header.FileNameLength > 0 && data.Position + header.FileNameLength <= data.Length)
|
||||
if (fileNameLength > 0 && data.Position + fileNameLength <= data.Length)
|
||||
{
|
||||
byte[] filenameBytes = data.ReadBytes(header.FileNameLength);
|
||||
if (filenameBytes.Length != header.FileNameLength)
|
||||
return null;
|
||||
|
||||
byte[] filenameBytes = data.ReadBytes(fileNameLength);
|
||||
header.FileName = Encoding.ASCII.GetString(filenameBytes);
|
||||
}
|
||||
|
||||
// Parsing extras is skipped here, unlike in Serialization
|
||||
if (header.ExtraFieldLength > 0 && data.Position + header.ExtraFieldLength <= data.Length)
|
||||
{
|
||||
byte[] extraBytes = data.ReadBytes(header.ExtraFieldLength);
|
||||
if (extraBytes.Length != header.ExtraFieldLength)
|
||||
return null;
|
||||
}
|
||||
if (extraFieldLength > 0 && data.Position + extraFieldLength <= data.Length)
|
||||
_ = data.ReadBytes(extraFieldLength);
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
58
SabreTools.IO/Compression/LZX/AlignedOffsetBlockData.cs
Normal file
58
SabreTools.IO/Compression/LZX/AlignedOffsetBlockData.cs
Normal file
@@ -0,0 +1,58 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// An aligned offset block is identical to the verbatim block except for the presence of the aligned offset
|
||||
/// tree preceding the other trees.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class AlignedOffsetBlockData : BlockData
|
||||
{
|
||||
/// <summary>
|
||||
/// Aligned offset tree
|
||||
/// </summary>
|
||||
/// <remarks>8 elements, 3 bits each</remarks>
|
||||
public byte[]? AlignedOffsetTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for remainder of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of remaining elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for length tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of elements in length tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Token sequence (matches and literals)
|
||||
/// </summary>
|
||||
/// <remarks>Variable</remarks>
|
||||
public byte[]? TokenSequence { get; set; }
|
||||
}
|
||||
}
|
||||
24
SabreTools.IO/Compression/LZX/Block.cs
Normal file
24
SabreTools.IO/Compression/LZX/Block.cs
Normal file
@@ -0,0 +1,24 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// An LZXD block represents a sequence of compressed data that is encoded with the same set of
|
||||
/// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
|
||||
/// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
|
||||
/// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
|
||||
/// number of chunks is related to the size of the data being compressed, while the number of blocks is
|
||||
/// related to how well the data is compressed.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class Block
|
||||
{
|
||||
/// <summary>
|
||||
/// Block header
|
||||
/// </summary>
|
||||
public BlockHeader? Header { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block data
|
||||
/// </summary>
|
||||
public BlockData? BlockData { get; set; }
|
||||
}
|
||||
}
|
||||
8
SabreTools.IO/Compression/LZX/BlockData.cs
Normal file
8
SabreTools.IO/Compression/LZX/BlockData.cs
Normal file
@@ -0,0 +1,8 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal abstract class BlockData
|
||||
{
|
||||
// No common fields between all block data
|
||||
}
|
||||
}
|
||||
33
SabreTools.IO/Compression/LZX/BlockHeader.cs
Normal file
33
SabreTools.IO/Compression/LZX/BlockHeader.cs
Normal file
@@ -0,0 +1,33 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The Block Type field, as specified in section 2.3.1.1, indicates which type of block follows,
|
||||
/// and the Block Size field, as specified in section 2.3.1.2, indicates the number of
|
||||
/// uncompressed bytes represented by the block. Following the generic block
|
||||
/// header is a type-specific header that describes the remainder of the block.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class BlockHeader
|
||||
{
|
||||
/// <remarks>3 bits</remarks>
|
||||
public BlockType BlockType { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the high 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlockSizeMSB { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the middle 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlockSizeByte2 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the low 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlocksizeLSB { get; set; }
|
||||
}
|
||||
}
|
||||
25
SabreTools.IO/Compression/LZX/Chunk.cs
Normal file
25
SabreTools.IO/Compression/LZX/Chunk.cs
Normal file
@@ -0,0 +1,25 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
|
||||
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
|
||||
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
|
||||
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
|
||||
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
|
||||
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
|
||||
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
|
||||
/// incompressible when the chunk is not the last one.
|
||||
/// </summary>
|
||||
internal class Chunk
|
||||
{
|
||||
/// <summary>
|
||||
/// Chunk header
|
||||
/// </summary>
|
||||
public ChunkHeader? Header { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block headers and data
|
||||
/// </summary>
|
||||
public Block[]? Blocks { get; set; }
|
||||
}
|
||||
}
|
||||
46
SabreTools.IO/Compression/LZX/ChunkHeader.cs
Normal file
46
SabreTools.IO/Compression/LZX/ChunkHeader.cs
Normal file
@@ -0,0 +1,46 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
|
||||
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
|
||||
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
|
||||
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
|
||||
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
|
||||
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
|
||||
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
|
||||
/// incompressible when the chunk is not the last one.
|
||||
/// </summary>
|
||||
internal class ChunkHeader
|
||||
{
|
||||
/// <summary>
|
||||
/// The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
|
||||
/// the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
|
||||
/// 16-bit field. The chunk prefix chain could be followed in the compressed stream without
|
||||
/// decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
|
||||
/// location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
|
||||
/// size.
|
||||
/// </summary>
|
||||
public ushort ChunkSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
|
||||
/// in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
|
||||
/// single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
|
||||
/// 32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
|
||||
/// translation was enabled. Note that E8_file_size is completely independent of the length of the
|
||||
/// uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
|
||||
/// uncompressed data).
|
||||
/// </summary>
|
||||
public byte E8Translation { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// E8 translation size, high WORD
|
||||
/// </summary>
|
||||
public ushort? TranslationSizeHighWord { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// E8 translation size, low WORD
|
||||
/// </summary>
|
||||
public ushort? TranslationSizeLowWord { get; set; }
|
||||
}
|
||||
}
|
||||
38
SabreTools.IO/Compression/LZX/Constants.cs
Normal file
38
SabreTools.IO/Compression/LZX/Constants.cs
Normal file
@@ -0,0 +1,38 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
internal static class Constants
|
||||
{
|
||||
/* some constants defined by the LZX specification */
|
||||
public const int LZX_MIN_MATCH = 2;
|
||||
public const int LZX_MAX_MATCH = 257;
|
||||
public const int LZX_NUM_CHARS = 256;
|
||||
public const int LZX_PRETREE_NUM_ELEMENTS = 20;
|
||||
|
||||
/// <summary>
|
||||
/// aligned offset tree #elements
|
||||
/// </summary>
|
||||
public const int LZX_ALIGNED_NUM_ELEMENTS = 8;
|
||||
|
||||
/// <summary>
|
||||
/// this one missing from spec!
|
||||
/// </summary>
|
||||
public const int LZX_NUM_PRIMARY_LENGTHS = 7;
|
||||
|
||||
/// <summary>
|
||||
/// length tree #elements
|
||||
/// </summary>
|
||||
public const int LZX_NUM_SECONDARY_LENGTHS = 249;
|
||||
|
||||
/* LZX huffman defines: tweak tablebits as desired */
|
||||
public const int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
|
||||
public const int LZX_PRETREE_TABLEBITS = 6;
|
||||
public const int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
|
||||
public const int LZX_MAINTREE_TABLEBITS = 12;
|
||||
public const int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
|
||||
public const int LZX_LENGTH_TABLEBITS = 12;
|
||||
public const int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
|
||||
public const int LZX_ALIGNED_TABLEBITS = 7;
|
||||
|
||||
public const int LZX_LENTABLE_SAFETY = 64; /* we allow length table decoding overruns */
|
||||
}
|
||||
}
|
||||
48
SabreTools.IO/Compression/LZX/Enums.cs
Normal file
48
SabreTools.IO/Compression/LZX/Enums.cs
Normal file
@@ -0,0 +1,48 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// 3-bit block type
|
||||
/// </summary>
|
||||
internal enum BlockType : byte
|
||||
{
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_0 = 0b000,
|
||||
|
||||
/// <summary>
|
||||
/// Verbatim block
|
||||
/// </summary>
|
||||
Verbatim = 0b001,
|
||||
|
||||
/// <summary>
|
||||
/// Aligned offset block
|
||||
/// </summary>
|
||||
AlignedOffset = 0b010,
|
||||
|
||||
/// <summary>
|
||||
/// Uncompressed block
|
||||
/// </summary>
|
||||
Uncompressed = 0b011,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_4 = 0b100,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_5 = 0b101,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_6 = 0b110,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_7 = 0b111,
|
||||
}
|
||||
}
|
||||
54
SabreTools.IO/Compression/LZX/UncompressedBlockData.cs
Normal file
54
SabreTools.IO/Compression/LZX/UncompressedBlockData.cs
Normal file
@@ -0,0 +1,54 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// Following the generic block header, an uncompressed block begins with 1 to 16 bits of zero padding
|
||||
/// to align the bit buffer on a 16-bit boundary. At this point, the bitstream ends and a byte stream
|
||||
/// begins. Following the zero padding, new 32-bit values for R0, R1, and R2 are output in little-endian
|
||||
/// form, followed by the uncompressed data bytes themselves. Finally, if the uncompressed data length
|
||||
/// is odd, one extra byte of zero padding is encoded to realign the following bitstream.
|
||||
///
|
||||
/// Then the bitstream of byte-swapped 16-bit integers resumes for the next Block Type field (if there
|
||||
/// are subsequent blocks).
|
||||
///
|
||||
/// The decoded R0, R1, and R2 values are used as initial repeated offset values to decode the
|
||||
/// subsequent compressed block if present.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class UncompressedBlockData : BlockData
|
||||
{
|
||||
/// <summary>
|
||||
/// Padding to align following field on 16-bit boundary
|
||||
/// </summary>
|
||||
/// <remarks>Bits have a value of zero</remarks>
|
||||
public ushort PaddingBits { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD ([MS-DTYP]))
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R0 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD)
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R1 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD)
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R2 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Can use the direct memcpy function, as specified in [IEEE1003.1]
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public byte[]? RawDataBytes { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Only if uncompressed size is odd
|
||||
/// </summary>
|
||||
public byte AlignmentByte { get; set; }
|
||||
}
|
||||
}
|
||||
51
SabreTools.IO/Compression/LZX/VerbatimBlockData.cs
Normal file
51
SabreTools.IO/Compression/LZX/VerbatimBlockData.cs
Normal file
@@ -0,0 +1,51 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The fields of a verbatim block that follow the generic block header
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class VerbatimBlockData : BlockData
|
||||
{
|
||||
/// <summary>
|
||||
/// Pretree for first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for remainder of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of remaining elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for length tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of elements in length tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Token sequence (matches and literals)
|
||||
/// </summary>
|
||||
/// <remarks>Variable</remarks>
|
||||
public byte[]? TokenSequence { get; set; }
|
||||
}
|
||||
}
|
||||
28
SabreTools.IO/Compression/MSZIP/BlockHeader.cs
Normal file
28
SabreTools.IO/Compression/MSZIP/BlockHeader.cs
Normal file
@@ -0,0 +1,28 @@
|
||||
namespace SabreTools.IO.Compression.MSZIP
|
||||
{
|
||||
/// <summary>
|
||||
/// Each MSZIP block MUST consist of a 2-byte MSZIP signature and one or more RFC 1951 blocks. The
|
||||
/// 2-byte MSZIP signature MUST consist of the bytes 0x43 and 0x4B. The MSZIP signature MUST be
|
||||
/// the first 2 bytes in the MSZIP block. The MSZIP signature is shown in the following packet diagram.
|
||||
///
|
||||
/// Each MSZIP block is the result of a single deflate compression operation, as defined in [RFC1951].
|
||||
/// The compressor that performs the compression operation MUST generate one or more RFC 1951
|
||||
/// blocks, as defined in [RFC1951]. The number, deflation mode, and type of RFC 1951 blocks in each
|
||||
/// MSZIP block is determined by the compressor, as defined in [RFC1951]. The last RFC 1951 block in
|
||||
/// each MSZIP block MUST be marked as the "end" of the stream(1), as defined by [RFC1951]
|
||||
/// section 3.2.3. Decoding trees MUST be discarded after each RFC 1951 block, but the history buffer
|
||||
/// MUST be maintained.Each MSZIP block MUST represent no more than 32 KB of uncompressed data.
|
||||
///
|
||||
/// The maximum compressed size of each MSZIP block is 32 KB + 12 bytes. This enables the MSZIP
|
||||
/// block to contain 32 KB of data split between two noncompressed RFC 1951 blocks, each of which
|
||||
/// has a value of BTYPE = 00.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-MCI/%5bMS-MCI%5d.pdf"/>
|
||||
internal class BlockHeader
|
||||
{
|
||||
/// <summary>
|
||||
/// 'CK'
|
||||
/// </summary>
|
||||
public ushort Signature { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using SabreTools.IO.Extensions;
|
||||
using SabreTools.Models.Compression.MSZIP;
|
||||
|
||||
namespace SabreTools.IO.Compression.MSZIP
|
||||
{
|
||||
|
||||
50
SabreTools.IO/Compression/Quantum/Constants.cs
Normal file
50
SabreTools.IO/Compression/Quantum/Constants.cs
Normal file
@@ -0,0 +1,50 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
/// <see href="http://www.russotto.net/quantumcomp.html"/>
|
||||
internal static class Constants
|
||||
{
|
||||
public static readonly int[] PositionSlot =
|
||||
[
|
||||
0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c,
|
||||
0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0,
|
||||
0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00,
|
||||
0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000,
|
||||
0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000,
|
||||
0x100000, 0x180000
|
||||
];
|
||||
|
||||
public static readonly int[] PositionExtraBits =
|
||||
[
|
||||
0, 0, 0, 0, 1, 1, 2, 2,
|
||||
3, 3, 4, 4, 5, 5, 6, 6,
|
||||
7, 7, 8, 8, 9, 9, 10, 10,
|
||||
11, 11, 12, 12, 13, 13, 14, 14,
|
||||
15, 15, 16, 16, 17, 17, 18, 18,
|
||||
19, 19
|
||||
];
|
||||
|
||||
public static readonly int[] LengthSlot =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
|
||||
0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26,
|
||||
0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e,
|
||||
0xbe, 0xde, 0xfe
|
||||
];
|
||||
|
||||
public static readonly int[] LengthExtraBits =
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 1, 1,
|
||||
1, 1, 2, 2, 2, 2, 3, 3,
|
||||
3, 3, 4, 4, 4, 4, 5, 5,
|
||||
5, 5, 0
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Number of position slots for (tsize - 10)
|
||||
/// </summary>
|
||||
public static readonly int[] NumPositionSlots =
|
||||
[
|
||||
20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -2,8 +2,7 @@ using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using SabreTools.IO.Streams;
|
||||
using SabreTools.Models.Compression.Quantum;
|
||||
using static SabreTools.Models.Compression.Quantum.Constants;
|
||||
using static SabreTools.IO.Compression.Quantum.Constants;
|
||||
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
|
||||
45
SabreTools.IO/Compression/Quantum/Enums.cs
Normal file
45
SabreTools.IO/Compression/Quantum/Enums.cs
Normal file
@@ -0,0 +1,45 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
internal enum SelectorModel
|
||||
{
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_0 = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 64
|
||||
/// </summary>
|
||||
SELECTOR_1 = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 128
|
||||
/// </summary>
|
||||
SELECTOR_2 = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 192
|
||||
/// </summary>
|
||||
SELECTOR_3 = 3,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 3 character matches, max 24 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_4 = 4,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 4 character matches, max 36 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_5 = 5,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 5+ character matches, max 42 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_6_POSITION = 6,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 5+ character matches, 27 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_6_LENGTH = 7,
|
||||
}
|
||||
}
|
||||
24
SabreTools.IO/Compression/Quantum/Model.cs
Normal file
24
SabreTools.IO/Compression/Quantum/Model.cs
Normal file
@@ -0,0 +1,24 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
/// <see href="http://www.russotto.net/quantumcomp.html"/>
|
||||
internal sealed class Model
|
||||
{
|
||||
public int Entries { get; set; }
|
||||
|
||||
/// <remarks>
|
||||
/// All the models are initialized with the symbols in symbol
|
||||
/// order in the table, and with every symbol in the table
|
||||
/// having a frequency of 1
|
||||
/// </remarks>
|
||||
public ModelSymbol[]? Symbols { get; set; }
|
||||
|
||||
/// <remarks>
|
||||
/// The initial total frequency is equal to the number of entries
|
||||
/// in the table
|
||||
/// </remarks>
|
||||
public int TotalFrequency { get; set; }
|
||||
|
||||
/// <remarks>The initial time_to_reorder value is 4</remarks>
|
||||
public int TimeToReorder { get; set; }
|
||||
}
|
||||
}
|
||||
15
SabreTools.IO/Compression/Quantum/ModelSymbol.cs
Normal file
15
SabreTools.IO/Compression/Quantum/ModelSymbol.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
/// <see href="http://www.russotto.net/quantumcomp.html"/>
|
||||
internal sealed class ModelSymbol
|
||||
{
|
||||
public ushort Symbol { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The cumulative frequency is the frequency of all the symbols
|
||||
/// which are at a higher index in the table than that symbol —
|
||||
/// thus the last entry in the table has a cumulative frequency of 0.
|
||||
/// </summary>
|
||||
public ushort CumulativeFrequency { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using SabreTools.Models.LZ;
|
||||
|
||||
namespace SabreTools.IO.Compression.SZDD
|
||||
{
|
||||
@@ -15,7 +14,7 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
/// <summary>
|
||||
/// Source stream for the decompressor
|
||||
/// </summary>
|
||||
private readonly BufferedStream _source;
|
||||
private readonly Streams.BufferedStream _source;
|
||||
|
||||
/// <summary>
|
||||
/// SZDD format being decompressed
|
||||
@@ -37,19 +36,19 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
|
||||
// Initialize the window with space characters
|
||||
_window = Array.ConvertAll(_window, b => (byte)0x20);
|
||||
_source = new BufferedStream(source);
|
||||
_source = new Streams.BufferedStream(source);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create a KWAJ decompressor
|
||||
/// </summary>
|
||||
public static Decompressor CreateKWAJ(byte[] source, KWAJCompressionType compressionType)
|
||||
public static Decompressor CreateKWAJ(byte[] source, ushort compressionType)
|
||||
=> CreateKWAJ(new MemoryStream(source), compressionType);
|
||||
|
||||
/// <summary>
|
||||
/// Create a KWAJ decompressor
|
||||
/// </summary>
|
||||
public static Decompressor CreateKWAJ(Stream source, KWAJCompressionType compressionType)
|
||||
public static Decompressor CreateKWAJ(Stream source, ushort compressionType)
|
||||
{
|
||||
// Create the decompressor
|
||||
var decompressor = new Decompressor(source);
|
||||
@@ -57,11 +56,11 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
// Set the format and return
|
||||
decompressor._format = compressionType switch
|
||||
{
|
||||
KWAJCompressionType.NoCompression => Format.KWAJNoCompression,
|
||||
KWAJCompressionType.NoCompressionXor => Format.KWAJXor,
|
||||
KWAJCompressionType.QBasic => Format.KWAJQBasic,
|
||||
KWAJCompressionType.LZH => Format.KWAJLZH,
|
||||
KWAJCompressionType.MSZIP => Format.KWAJMSZIP,
|
||||
0x0000 => Format.KWAJNoCompression,
|
||||
0x0001 => Format.KWAJXor,
|
||||
0x0002 => Format.KWAJQBasic,
|
||||
0x0003 => Format.KWAJLZH,
|
||||
0x0004 => Format.KWAJMSZIP,
|
||||
_ => throw new IndexOutOfRangeException(nameof(source)),
|
||||
};
|
||||
return decompressor;
|
||||
@@ -229,77 +228,5 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
dest.Flush();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Buffered stream that reads in blocks
|
||||
/// </summary>
|
||||
private class BufferedStream
|
||||
{
|
||||
/// <summary>
|
||||
/// Source stream for populating the buffer
|
||||
/// </summary>
|
||||
private readonly Stream _source;
|
||||
|
||||
/// <summary>
|
||||
/// Internal buffer to read
|
||||
/// </summary>
|
||||
private readonly byte[] _buffer = new byte[2048];
|
||||
|
||||
/// <summary>
|
||||
/// Current pointer into the buffer
|
||||
/// </summary>
|
||||
private int _bufferPtr = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the number of available bytes
|
||||
/// </summary>
|
||||
private int _available = -1;
|
||||
|
||||
/// <summary>
|
||||
/// Create a new buffered stream
|
||||
/// </summary>
|
||||
public BufferedStream(Stream source)
|
||||
{
|
||||
_source = source;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the next byte from the buffer, if possible
|
||||
/// </summary>
|
||||
public byte? ReadNextByte()
|
||||
{
|
||||
// Ensure the buffer first
|
||||
if (!EnsureBuffer())
|
||||
return null;
|
||||
|
||||
// Return the next available value
|
||||
return _buffer[_bufferPtr++];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ensure the buffer has data to read
|
||||
/// </summary>
|
||||
private bool EnsureBuffer()
|
||||
{
|
||||
// Force an update if in the initial state
|
||||
if (_available == -1)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// If the pointer is out of range
|
||||
if (_bufferPtr >= _available)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// Otherwise, assume data is available
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ using System;
|
||||
using System.IO;
|
||||
using SabreTools.Hashing;
|
||||
using SabreTools.Matching;
|
||||
using static SabreTools.Models.MoPaQ.Constants;
|
||||
|
||||
namespace SabreTools.IO.Encryption
|
||||
{
|
||||
@@ -11,6 +10,14 @@ namespace SabreTools.IO.Encryption
|
||||
/// </summary>
|
||||
public class MoPaQDecrypter
|
||||
{
|
||||
#region Constants
|
||||
|
||||
private const uint MPQ_HASH_KEY2_MIX = 0x400;
|
||||
|
||||
private const uint STORM_BUFFER_SIZE = 0x500;
|
||||
|
||||
#endregion
|
||||
|
||||
#region Private Instance Variables
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -59,14 +59,24 @@ namespace SabreTools.IO.Extensions
|
||||
/// </summary>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
|
||||
/// <returns>String list containing the requested data, null on error</returns>
|
||||
#if NET5_0_OR_GREATER
|
||||
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
|
||||
#else
|
||||
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
|
||||
#endif
|
||||
public static List<string>? ReadStringsFrom(this byte[]? input, int charLimit = 5)
|
||||
{
|
||||
// Validate the data
|
||||
if (input == null || input.Length == 0)
|
||||
return null;
|
||||
|
||||
#if NET5_0_OR_GREATER
|
||||
// Check for Latin1 strings
|
||||
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Latin1);
|
||||
#else
|
||||
// Check for ASCII strings
|
||||
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
|
||||
#endif
|
||||
|
||||
// Check for Unicode strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
@@ -101,13 +111,17 @@ namespace SabreTools.IO.Extensions
|
||||
|
||||
// Short-circuit for some encoding types
|
||||
if (encoding.CodePage == Encoding.ASCII.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.ASCII, 1);
|
||||
return bytes.ReadAsciiStrings(charLimit);
|
||||
#if NET5_0_OR_GREATER
|
||||
else if (encoding.CodePage == Encoding.Latin1.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Latin1, 1);
|
||||
#endif
|
||||
else if (encoding.IsSingleByte)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, encoding, 1);
|
||||
else if (encoding.CodePage == Encoding.Unicode.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Unicode, 2);
|
||||
else if (encoding.CodePage == Encoding.BigEndianUnicode.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.BigEndianUnicode, 2);
|
||||
else if (encoding.CodePage == Encoding.UTF32.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.UTF32, 4);
|
||||
|
||||
@@ -133,7 +147,7 @@ namespace SabreTools.IO.Extensions
|
||||
char c = (char)reader.Read();
|
||||
|
||||
// If the character is invalid
|
||||
if (char.IsControl(c) || (c & 0xFF00) != 0)
|
||||
if (char.IsControl(c) || (c & 0xFFFFFF00) != 0)
|
||||
{
|
||||
// Seek to the end of the last found string
|
||||
string str = sb.ToString();
|
||||
@@ -141,6 +155,10 @@ namespace SabreTools.IO.Extensions
|
||||
ms.Seek(lastOffset, SeekOrigin.Begin);
|
||||
reader.DiscardBufferedData();
|
||||
|
||||
// If there is no cached string
|
||||
if (str.Length == 0)
|
||||
continue;
|
||||
|
||||
// Add the string if long enough
|
||||
if (str.Length >= charLimit)
|
||||
strings.Add(str);
|
||||
@@ -209,10 +227,77 @@ namespace SabreTools.IO.Extensions
|
||||
// Pretend only one byte was read
|
||||
offset -= width - 1;
|
||||
|
||||
// If there is no cached string
|
||||
if (sb.Length == 0)
|
||||
continue;
|
||||
|
||||
// Add the string if long enough
|
||||
string str = sb.ToString();
|
||||
if (str.Length >= charLimit)
|
||||
strings.Add(str);
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
// Clear the builder and continue
|
||||
#if NET20 || NET35
|
||||
sb = new();
|
||||
#else
|
||||
sb.Clear();
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, add the character to the builder and continue
|
||||
sb.Append(c);
|
||||
}
|
||||
|
||||
// Handle any remaining data
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
return strings;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read string data from a byte array using ASCII encoding
|
||||
/// </summary>
|
||||
/// <param name="bytes">Byte array representing the source data</param>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string</param>
|
||||
/// <returns>String list containing the requested data, empty on error</returns>
|
||||
/// <remarks>Handling for 7-bit ASCII needs to be done differently than other fixed-width encodings</remarks>
|
||||
#if NET20
|
||||
private static List<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
|
||||
#else
|
||||
private static HashSet<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
|
||||
#endif
|
||||
{
|
||||
if (charLimit <= 0 || charLimit > bytes.Length)
|
||||
return [];
|
||||
|
||||
// Create the string set to return
|
||||
#if NET20
|
||||
var strings = new List<string>();
|
||||
#else
|
||||
var strings = new HashSet<string>();
|
||||
#endif
|
||||
|
||||
// Create a string builder for the loop
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Check for strings
|
||||
int offset = 0;
|
||||
while (offset < bytes.Length)
|
||||
{
|
||||
// Read the next character from the stream
|
||||
char c = bytes.ReadChar(ref offset);
|
||||
|
||||
// If the character is invalid
|
||||
if (char.IsControl(c) || c > 0x7F)
|
||||
{
|
||||
// If there is no cached string
|
||||
if (sb.Length == 0)
|
||||
continue;
|
||||
|
||||
// Add the string if long enough
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
// Clear the builder and continue
|
||||
#if NET20 || NET35
|
||||
|
||||
@@ -1065,6 +1065,10 @@ namespace SabreTools.IO.Extensions
|
||||
/// </summary>
|
||||
private static byte[] ReadExactlyToBuffer(byte[] content, ref int offset, int length)
|
||||
{
|
||||
// If we have an invalid offset
|
||||
if (offset < 0 || offset >= content.Length)
|
||||
throw new ArgumentOutOfRangeException($"{nameof(offset)} must be between 0 and {content.Length}, {offset} provided");
|
||||
|
||||
// If we have an invalid length
|
||||
if (length < 0)
|
||||
throw new ArgumentOutOfRangeException($"{nameof(length)} must be 0 or a positive value, {length} requested");
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace SabreTools.IO.Extensions
|
||||
{
|
||||
@@ -76,10 +75,15 @@ namespace SabreTools.IO.Extensions
|
||||
/// <summary>
|
||||
/// Read string data from a Stream
|
||||
/// </summary>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
|
||||
/// <param name="position">Position in the source to read from</param>
|
||||
/// <param name="length">Length of the requested data</param>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
|
||||
/// <returns>String list containing the requested data, null on error</returns>
|
||||
#if NET5_0_OR_GREATER
|
||||
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
|
||||
#else
|
||||
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
|
||||
#endif
|
||||
public static List<string>? ReadStringsFrom(this Stream? input, int position, int length, int charLimit = 5)
|
||||
{
|
||||
// Read the data as a byte array first
|
||||
@@ -87,23 +91,7 @@ namespace SabreTools.IO.Extensions
|
||||
if (data == null)
|
||||
return null;
|
||||
|
||||
// Check for ASCII strings
|
||||
var asciiStrings = data.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
|
||||
|
||||
// Check for UTF-8 strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var utf8Strings = data.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
|
||||
|
||||
// Check for Unicode strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var unicodeStrings = data.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
|
||||
|
||||
// Ignore duplicate strings across encodings
|
||||
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
|
||||
|
||||
// Sort the strings and return
|
||||
sourceStrings.Sort();
|
||||
return sourceStrings;
|
||||
return data.ReadStringsFrom(charLimit);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
|
||||
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<Version>1.7.3</Version>
|
||||
<Version>1.7.4</Version>
|
||||
|
||||
<!-- Package Properties -->
|
||||
<Authors>Matt Nadareski</Authors>
|
||||
@@ -31,7 +31,6 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="SabreTools.Matching" Version="1.6.0" />
|
||||
<PackageReference Include="SabreTools.Models" Version="1.7.1" />
|
||||
<PackageReference Include="SabreTools.Hashing" Version="1.5.0" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
77
SabreTools.IO/Streams/BufferedStream.cs
Normal file
77
SabreTools.IO/Streams/BufferedStream.cs
Normal file
@@ -0,0 +1,77 @@
|
||||
using System.IO;
|
||||
|
||||
namespace SabreTools.IO.Streams
|
||||
{
|
||||
/// <summary>
|
||||
/// Buffered stream that reads in blocks
|
||||
/// </summary>
|
||||
/// <remarks>Not a true <see cref="Stream"/> implementation yet</remarks>
|
||||
public class BufferedStream
|
||||
{
|
||||
/// <summary>
|
||||
/// Source stream for populating the buffer
|
||||
/// </summary>
|
||||
private readonly Stream _source;
|
||||
|
||||
/// <summary>
|
||||
/// Internal buffer to read
|
||||
/// </summary>
|
||||
private readonly byte[] _buffer = new byte[2048];
|
||||
|
||||
/// <summary>
|
||||
/// Current pointer into the buffer
|
||||
/// </summary>
|
||||
private int _bufferPtr = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the number of available bytes
|
||||
/// </summary>
|
||||
private int _available = -1;
|
||||
|
||||
/// <summary>
|
||||
/// Create a new buffered stream
|
||||
/// </summary>
|
||||
public BufferedStream(Stream source)
|
||||
{
|
||||
_source = source;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the next byte from the buffer, if possible
|
||||
/// </summary>
|
||||
public byte? ReadNextByte()
|
||||
{
|
||||
// Ensure the buffer first
|
||||
if (!EnsureBuffer())
|
||||
return null;
|
||||
|
||||
// Return the next available value
|
||||
return _buffer[_bufferPtr++];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ensure the buffer has data to read
|
||||
/// </summary>
|
||||
private bool EnsureBuffer()
|
||||
{
|
||||
// Force an update if in the initial state
|
||||
if (_available == -1)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// If the pointer is out of range
|
||||
if (_bufferPtr >= _available)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// Otherwise, assume data is available
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user