25 Commits
1.7.3 ... 1.7.4

Author SHA1 Message Date
Matt Nadareski
5054aeb077 Bump version 2025-09-22 17:48:18 -04:00
Matt Nadareski
d2e9b8d6e5 Fix byte array test 2025-09-22 17:44:41 -04:00
Matt Nadareski
2c29aee834 Remove Models from references 2025-09-22 11:03:40 -04:00
Matt Nadareski
576bafcb87 Create minimal model for InflateWrapper 2025-09-22 11:03:07 -04:00
Matt Nadareski
2b310ac528 SZDD no longer uses models 2025-09-22 10:55:14 -04:00
Matt Nadareski
4f6b6d7b59 Reduce Models use another notch 2025-09-22 10:52:26 -04:00
Matt Nadareski
17e55ee233 Move BufferedStream out of SZDD 2025-09-22 10:50:53 -04:00
Matt Nadareski
8b78906d1d Move MoPaQ encryption constants from Models 2025-09-22 10:37:27 -04:00
Matt Nadareski
cff2dcf4cc Move LZX models from Models 2025-09-22 10:35:04 -04:00
Matt Nadareski
a56942cb73 Move Quantum compression models from Models 2025-09-22 10:31:14 -04:00
Matt Nadareski
5ed661b77c Move MSZIP "model" from Models 2025-09-22 10:27:03 -04:00
Matt Nadareski
a0a0cd0386 Add more complete UTF-8 first-byte tests 2025-09-21 16:34:46 -04:00
Matt Nadareski
bcc0fca4ad Ensure 7-bit ASCII never reads above 0x7F 2025-09-21 16:27:03 -04:00
Matt Nadareski
843e821e5f Use extended check in slow path too 2025-09-21 16:07:20 -04:00
Matt Nadareski
630b01283e Latin1 instead of ASCII for .NET 5.0 and beyond 2025-09-21 15:12:16 -04:00
Matt Nadareski
22abb96013 Add remarks about what encodings are used 2025-09-21 15:02:33 -04:00
Matt Nadareski
314de12661 Fix tests, remove UTF-8 checks from irrelevant places 2025-09-21 14:03:45 -04:00
Matt Nadareski
a0b24031b5 Remove duplicate code from Stream implementation 2025-09-21 13:58:46 -04:00
Matt Nadareski
b4628485c3 Sync stream implementation with byte one 2025-09-21 13:58:13 -04:00
Matt Nadareski
4610ddc9b9 Don't read the string unless it's long enough 2025-09-21 13:53:16 -04:00
Matt Nadareski
e392ddc8d7 Fix code formatting 2025-09-21 13:52:05 -04:00
Matt Nadareski
1908d1b32e More generically support single-byte encodings 2025-09-21 13:50:08 -04:00
Matt Nadareski
9d73195f86 Big-endian unicode support because it's there 2025-09-21 13:42:05 -04:00
Matt Nadareski
335a486f17 Special handling of empty string builders 2025-09-21 13:41:06 -04:00
Matt Nadareski
d3e41ac187 Handle invalid offsets in byte array extensions 2025-09-21 11:43:07 -04:00
28 changed files with 941 additions and 167 deletions

View File

@@ -143,27 +143,23 @@ namespace SabreTools.IO.Test.Extensions
];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsFrom_UTF8_Filled()
public void ReadStringsFrom_Latin1Strings_Filled()
{
byte[]? arr =
[
.. Encoding.UTF8.GetBytes("TEST"),
.. Encoding.Latin1.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO"),
.. Encoding.Latin1.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA"),
.. Encoding.Latin1.GetBytes("DATA"),
.. new byte[] { 0x00 },
];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(2, actual.Count);
}
@@ -195,11 +191,11 @@ namespace SabreTools.IO.Test.Extensions
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("DATA1"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TEST2"),
.. Encoding.Latin1.GetBytes("TEST2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO2"),
.. Encoding.Latin1.GetBytes("TWO2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA2"),
.. Encoding.Latin1.GetBytes("DATA2"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TEST3"),
.. new byte[] { 0x00 },
@@ -210,8 +206,6 @@ namespace SabreTools.IO.Test.Extensions
];
var actual = arr.ReadStringsFrom(5);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(6, actual.Count);
}
@@ -319,6 +313,22 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidAsciiChars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
.. Enumerable.Range(0x80, 0x80).Select(i => (byte)i),
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.ASCII);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_Latin1_Filled()
{
@@ -337,6 +347,25 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidLatin1Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.Latin1);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_UTF8_Filled()
{
@@ -355,6 +384,24 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidUTF8Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
.. Enumerable.Range(0x80, 0x42).Select(i => (byte)i),
0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC,
0xFD, 0xFE, 0xFF,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF8);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_UTF16_Filled()
{
@@ -373,6 +420,21 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidUTF16Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.Unicode);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_UTF32_Filled()
{
@@ -391,6 +453,21 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidUTF32Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF32);
Assert.NotNull(actual);
Assert.Empty(actual);
}
#endregion
}
}

View File

@@ -200,29 +200,25 @@ namespace SabreTools.IO.Test.Extensions
Stream? stream = new MemoryStream(bytes);
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsFrom_UTF8_Filled()
public void ReadStringsFrom_Latin1Strings_Filled()
{
byte[]? bytes =
[
.. Encoding.UTF8.GetBytes("TEST"),
.. Encoding.Latin1.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO"),
.. Encoding.Latin1.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA"),
.. Encoding.Latin1.GetBytes("DATA"),
.. new byte[] { 0x00 },
];
Stream? stream = new MemoryStream(bytes);
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
Assert.Equal(2, actual.Count);
}
[Fact]
@@ -254,11 +250,11 @@ namespace SabreTools.IO.Test.Extensions
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("DATA1"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TEST2"),
.. Encoding.Latin1.GetBytes("TEST2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO2"),
.. Encoding.Latin1.GetBytes("TWO2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA2"),
.. Encoding.Latin1.GetBytes("DATA2"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TEST3"),
.. new byte[] { 0x00 },
@@ -270,9 +266,7 @@ namespace SabreTools.IO.Test.Extensions
Stream? stream = new MemoryStream(bytes);
var actual = stream.ReadStringsFrom(0, bytes.Length, 5);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(10, actual.Count);
Assert.Equal(6, actual.Count);
}
#endregion

View File

@@ -0,0 +1,40 @@
using System.IO;
using Xunit;
namespace SabreTools.IO.Test.Streams
{
public class BufferedStreamTests
{
#region ReadNextByte
[Fact]
public void ReadNextByte_Empty_Null()
{
var source = new MemoryStream();
var stream = new IO.Streams.BufferedStream(source);
byte? actual = stream.ReadNextByte();
Assert.Null(actual);
}
[Fact]
public void ReadNextByte_Filled_ValidPosition_Byte()
{
var source = new MemoryStream(new byte[1024]);
var stream = new IO.Streams.BufferedStream(source);
byte? actual = stream.ReadNextByte();
Assert.Equal((byte)0x00, actual);
}
[Fact]
public void ReadNextByte_Filled_InvalidPosition_Null()
{
var source = new MemoryStream(new byte[1024]);
source.Seek(0, SeekOrigin.End);
var stream = new IO.Streams.BufferedStream(source);
byte? actual = stream.ReadNextByte();
Assert.Null(actual);
}
#endregion
}
}

View File

@@ -3,8 +3,6 @@ using System.IO;
using System.Text;
using SabreTools.Hashing;
using SabreTools.IO.Extensions;
using SabreTools.Models.PKZIP;
using static SabreTools.Models.PKZIP.Constants;
namespace SabreTools.IO.Compression.Deflate
{
@@ -20,6 +18,46 @@ namespace SabreTools.IO.Compression.Deflate
/// </summary>
private const int BufferSize = 1024 * 1024;
/// <summary>
/// Local file header signature
/// </summary>
private const uint LocalFileHeaderSignature = 0x04034B50;
#endregion
#region Private Classes
/// <summary>
/// Minimal PKZIP local file header information
/// </summary>
private class MinLocalFileHeader
{
/// <summary>
/// Signature (0x04034B50)
/// </summary>
public uint Signature { get; set; }
/// <summary>
/// CRC-32
/// </summary>
public uint CRC32 { get; set; }
/// <summary>
/// Compressed size
/// </summary>
public uint CompressedSize { get; set; }
/// <summary>
/// Uncompressed size
/// </summary>
public uint UncompressedSize { get; set; }
/// <summary>
/// File name (variable size)
/// </summary>
public string? FileName { get; set; }
}
#endregion
#region Extraction
@@ -140,7 +178,7 @@ namespace SabreTools.IO.Compression.Deflate
long current = source.Position;
// Parse the PKZIP header, if it exists
LocalFileHeader? zipHeader = ParseLocalFileHeader(source);
MinLocalFileHeader? zipHeader = ParseLocalFileHeader(source);
long zipHeaderBytes = source.Position - current;
// Always trust the PKZIP CRC-32 value over what is supplied
@@ -269,46 +307,39 @@ namespace SabreTools.IO.Compression.Deflate
}
/// <summary>
/// Parse a Stream into a local file header
/// Parse a Stream into a minimal local file header
/// </summary>
/// <param name="data">Stream to parse</param>
/// <returns>Filled local file header on success, null on error</returns>
/// <remarks>Mirror of method in Serialization</remarks>
private static LocalFileHeader? ParseLocalFileHeader(Stream data)
/// <returns>Filled minimal local file header on success, null on error</returns>
/// <remarks>Partial mirror of method in Serialization</remarks>
private static MinLocalFileHeader? ParseLocalFileHeader(Stream data)
{
var header = new LocalFileHeader();
var header = new MinLocalFileHeader();
header.Signature = data.ReadUInt32LittleEndian();
if (header.Signature != LocalFileHeaderSignature)
return null;
header.Version = data.ReadUInt16LittleEndian();
header.Flags = (GeneralPurposeBitFlags)data.ReadUInt16LittleEndian();
header.CompressionMethod = (CompressionMethod)data.ReadUInt16LittleEndian();
header.LastModifedFileTime = data.ReadUInt16LittleEndian();
header.LastModifiedFileDate = data.ReadUInt16LittleEndian();
_ = data.ReadUInt16LittleEndian(); // Version
_ = data.ReadUInt16LittleEndian(); // Flags
_ = data.ReadUInt16LittleEndian(); // CompressionMethod
_ = data.ReadUInt16LittleEndian(); // LastModifedFileTime
_ = data.ReadUInt16LittleEndian(); // LastModifiedFileDate
header.CRC32 = data.ReadUInt32LittleEndian();
header.CompressedSize = data.ReadUInt32LittleEndian();
header.UncompressedSize = data.ReadUInt32LittleEndian();
header.FileNameLength = data.ReadUInt16LittleEndian();
header.ExtraFieldLength = data.ReadUInt16LittleEndian();
ushort fileNameLength = data.ReadUInt16LittleEndian();
ushort extraFieldLength = data.ReadUInt16LittleEndian();
if (header.FileNameLength > 0 && data.Position + header.FileNameLength <= data.Length)
if (fileNameLength > 0 && data.Position + fileNameLength <= data.Length)
{
byte[] filenameBytes = data.ReadBytes(header.FileNameLength);
if (filenameBytes.Length != header.FileNameLength)
return null;
byte[] filenameBytes = data.ReadBytes(fileNameLength);
header.FileName = Encoding.ASCII.GetString(filenameBytes);
}
// Parsing extras is skipped here, unlike in Serialization
if (header.ExtraFieldLength > 0 && data.Position + header.ExtraFieldLength <= data.Length)
{
byte[] extraBytes = data.ReadBytes(header.ExtraFieldLength);
if (extraBytes.Length != header.ExtraFieldLength)
return null;
}
if (extraFieldLength > 0 && data.Position + extraFieldLength <= data.Length)
_ = data.ReadBytes(extraFieldLength);
return header;
}

View File

@@ -0,0 +1,58 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// An aligned offset block is identical to the verbatim block except for the presence of the aligned offset
/// tree preceding the other trees.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class AlignedOffsetBlockData : BlockData
{
/// <summary>
/// Aligned offset tree
/// </summary>
/// <remarks>8 elements, 3 bits each</remarks>
public byte[]? AlignedOffsetTree { get; set; }
/// <summary>
/// Pretree for first 256 elements of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeFirst256 { get; set; }
/// <summary>
/// Path lengths of first 256 elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsFirst256 { get; set; }
/// <summary>
/// Pretree for remainder of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeRemainder { get; set; }
/// <summary>
/// Path lengths of remaining elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsRemainder { get; set; }
/// <summary>
/// Pretree for length tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeLengthTree { get; set; }
/// <summary>
/// Path lengths of elements in length tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsLengthTree { get; set; }
/// <summary>
/// Token sequence (matches and literals)
/// </summary>
/// <remarks>Variable</remarks>
public byte[]? TokenSequence { get; set; }
}
}

View File

@@ -0,0 +1,24 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// An LZXD block represents a sequence of compressed data that is encoded with the same set of
/// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
/// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
/// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
/// number of chunks is related to the size of the data being compressed, while the number of blocks is
/// related to how well the data is compressed.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class Block
{
/// <summary>
/// Block header
/// </summary>
public BlockHeader? Header { get; set; }
/// <summary>
/// Block data
/// </summary>
public BlockData? BlockData { get; set; }
}
}

View File

@@ -0,0 +1,8 @@
namespace SabreTools.IO.Compression.LZX
{
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal abstract class BlockData
{
// No common fields between all block data
}
}

View File

@@ -0,0 +1,33 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The Block Type field, as specified in section 2.3.1.1, indicates which type of block follows,
/// and the Block Size field, as specified in section 2.3.1.2, indicates the number of
/// uncompressed bytes represented by the block. Following the generic block
/// header is a type-specific header that describes the remainder of the block.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class BlockHeader
{
/// <remarks>3 bits</remarks>
public BlockType BlockType { get; set; }
/// <summary>
/// Block size is the high 8 bits of 24
/// </summary>
/// <remarks>8 bits</remarks>
public byte BlockSizeMSB { get; set; }
/// <summary>
/// Block size is the middle 8 bits of 24
/// </summary>
/// <remarks>8 bits</remarks>
public byte BlockSizeByte2 { get; set; }
/// <summary>
/// Block size is the low 8 bits of 24
/// </summary>
/// <remarks>8 bits</remarks>
public byte BlocksizeLSB { get; set; }
}
}

View File

@@ -0,0 +1,25 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
/// incompressible when the chunk is not the last one.
/// </summary>
internal class Chunk
{
/// <summary>
/// Chunk header
/// </summary>
public ChunkHeader? Header { get; set; }
/// <summary>
/// Block headers and data
/// </summary>
public Block[]? Blocks { get; set; }
}
}

View File

@@ -0,0 +1,46 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
/// incompressible when the chunk is not the last one.
/// </summary>
internal class ChunkHeader
{
/// <summary>
/// The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
/// the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
/// 16-bit field. The chunk prefix chain could be followed in the compressed stream without
/// decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
/// location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
/// size.
/// </summary>
public ushort ChunkSize { get; set; }
/// <summary>
/// The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
/// in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
/// single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
/// 32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
/// translation was enabled. Note that E8_file_size is completely independent of the length of the
/// uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
/// uncompressed data).
/// </summary>
public byte E8Translation { get; set; }
/// <summary>
/// E8 translation size, high WORD
/// </summary>
public ushort? TranslationSizeHighWord { get; set; }
/// <summary>
/// E8 translation size, low WORD
/// </summary>
public ushort? TranslationSizeLowWord { get; set; }
}
}

View File

@@ -0,0 +1,38 @@
namespace SabreTools.IO.Compression.LZX
{
internal static class Constants
{
/* some constants defined by the LZX specification */
public const int LZX_MIN_MATCH = 2;
public const int LZX_MAX_MATCH = 257;
public const int LZX_NUM_CHARS = 256;
public const int LZX_PRETREE_NUM_ELEMENTS = 20;
/// <summary>
/// aligned offset tree #elements
/// </summary>
public const int LZX_ALIGNED_NUM_ELEMENTS = 8;
/// <summary>
/// this one missing from spec!
/// </summary>
public const int LZX_NUM_PRIMARY_LENGTHS = 7;
/// <summary>
/// length tree #elements
/// </summary>
public const int LZX_NUM_SECONDARY_LENGTHS = 249;
/* LZX huffman defines: tweak tablebits as desired */
public const int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
public const int LZX_PRETREE_TABLEBITS = 6;
public const int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
public const int LZX_MAINTREE_TABLEBITS = 12;
public const int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
public const int LZX_LENGTH_TABLEBITS = 12;
public const int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
public const int LZX_ALIGNED_TABLEBITS = 7;
public const int LZX_LENTABLE_SAFETY = 64; /* we allow length table decoding overruns */
}
}

View File

@@ -0,0 +1,48 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// 3-bit block type
/// </summary>
internal enum BlockType : byte
{
/// <summary>
/// Not valid
/// </summary>
INVALID_0 = 0b000,
/// <summary>
/// Verbatim block
/// </summary>
Verbatim = 0b001,
/// <summary>
/// Aligned offset block
/// </summary>
AlignedOffset = 0b010,
/// <summary>
/// Uncompressed block
/// </summary>
Uncompressed = 0b011,
/// <summary>
/// Not valid
/// </summary>
INVALID_4 = 0b100,
/// <summary>
/// Not valid
/// </summary>
INVALID_5 = 0b101,
/// <summary>
/// Not valid
/// </summary>
INVALID_6 = 0b110,
/// <summary>
/// Not valid
/// </summary>
INVALID_7 = 0b111,
}
}

View File

@@ -0,0 +1,54 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// Following the generic block header, an uncompressed block begins with 1 to 16 bits of zero padding
/// to align the bit buffer on a 16-bit boundary. At this point, the bitstream ends and a byte stream
/// begins. Following the zero padding, new 32-bit values for R0, R1, and R2 are output in little-endian
/// form, followed by the uncompressed data bytes themselves. Finally, if the uncompressed data length
/// is odd, one extra byte of zero padding is encoded to realign the following bitstream.
///
/// Then the bitstream of byte-swapped 16-bit integers resumes for the next Block Type field (if there
/// are subsequent blocks).
///
/// The decoded R0, R1, and R2 values are used as initial repeated offset values to decode the
/// subsequent compressed block if present.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class UncompressedBlockData : BlockData
{
/// <summary>
/// Padding to align following field on 16-bit boundary
/// </summary>
/// <remarks>Bits have a value of zero</remarks>
public ushort PaddingBits { get; set; }
/// <summary>
/// Least significant to most significant byte (little-endian DWORD ([MS-DTYP]))
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public uint R0 { get; set; }
/// <summary>
/// Least significant to most significant byte (little-endian DWORD)
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public uint R1 { get; set; }
/// <summary>
/// Least significant to most significant byte (little-endian DWORD)
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public uint R2 { get; set; }
/// <summary>
/// Can use the direct memcpy function, as specified in [IEEE1003.1]
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public byte[]? RawDataBytes { get; set; }
/// <summary>
/// Only if uncompressed size is odd
/// </summary>
public byte AlignmentByte { get; set; }
}
}

View File

@@ -0,0 +1,51 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The fields of a verbatim block that follow the generic block header
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class VerbatimBlockData : BlockData
{
/// <summary>
/// Pretree for first 256 elements of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeFirst256 { get; set; }
/// <summary>
/// Path lengths of first 256 elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsFirst256 { get; set; }
/// <summary>
/// Pretree for remainder of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeRemainder { get; set; }
/// <summary>
/// Path lengths of remaining elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsRemainder { get; set; }
/// <summary>
/// Pretree for length tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeLengthTree { get; set; }
/// <summary>
/// Path lengths of elements in length tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsLengthTree { get; set; }
/// <summary>
/// Token sequence (matches and literals)
/// </summary>
/// <remarks>Variable</remarks>
public byte[]? TokenSequence { get; set; }
}
}

View File

@@ -0,0 +1,28 @@
namespace SabreTools.IO.Compression.MSZIP
{
/// <summary>
/// Each MSZIP block MUST consist of a 2-byte MSZIP signature and one or more RFC 1951 blocks. The
/// 2-byte MSZIP signature MUST consist of the bytes 0x43 and 0x4B. The MSZIP signature MUST be
/// the first 2 bytes in the MSZIP block. The MSZIP signature is shown in the following packet diagram.
///
/// Each MSZIP block is the result of a single deflate compression operation, as defined in [RFC1951].
/// The compressor that performs the compression operation MUST generate one or more RFC 1951
/// blocks, as defined in [RFC1951]. The number, deflation mode, and type of RFC 1951 blocks in each
/// MSZIP block is determined by the compressor, as defined in [RFC1951]. The last RFC 1951 block in
/// each MSZIP block MUST be marked as the "end" of the stream(1), as defined by [RFC1951]
/// section 3.2.3. Decoding trees MUST be discarded after each RFC 1951 block, but the history buffer
/// MUST be maintained.Each MSZIP block MUST represent no more than 32 KB of uncompressed data.
///
/// The maximum compressed size of each MSZIP block is 32 KB + 12 bytes. This enables the MSZIP
/// block to contain 32 KB of data split between two noncompressed RFC 1951 blocks, each of which
/// has a value of BTYPE = 00.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-MCI/%5bMS-MCI%5d.pdf"/>
internal class BlockHeader
{
/// <summary>
/// 'CK'
/// </summary>
public ushort Signature { get; set; }
}
}

View File

@@ -1,7 +1,6 @@
using System;
using System.IO;
using SabreTools.IO.Extensions;
using SabreTools.Models.Compression.MSZIP;
namespace SabreTools.IO.Compression.MSZIP
{

View File

@@ -0,0 +1,50 @@
namespace SabreTools.IO.Compression.Quantum
{
/// <see href="http://www.russotto.net/quantumcomp.html"/>
internal static class Constants
{
public static readonly int[] PositionSlot =
[
0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c,
0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0,
0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00,
0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000,
0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000,
0x100000, 0x180000
];
public static readonly int[] PositionExtraBits =
[
0, 0, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14, 14,
15, 15, 16, 16, 17, 17, 18, 18,
19, 19
];
public static readonly int[] LengthSlot =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26,
0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e,
0xbe, 0xde, 0xfe
];
public static readonly int[] LengthExtraBits =
[
0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 4, 4, 5, 5,
5, 5, 0
];
/// <summary>
/// Number of position slots for (tsize - 10)
/// </summary>
public static readonly int[] NumPositionSlots =
[
20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42
];
}
}

View File

@@ -2,8 +2,7 @@ using System;
using System.Collections.Generic;
using System.IO;
using SabreTools.IO.Streams;
using SabreTools.Models.Compression.Quantum;
using static SabreTools.Models.Compression.Quantum.Constants;
using static SabreTools.IO.Compression.Quantum.Constants;
namespace SabreTools.IO.Compression.Quantum
{

View File

@@ -0,0 +1,45 @@
namespace SabreTools.IO.Compression.Quantum
{
internal enum SelectorModel
{
/// <summary>
/// Literal model, 64 entries, start at symbol 0
/// </summary>
SELECTOR_0 = 0,
/// <summary>
/// Literal model, 64 entries, start at symbol 64
/// </summary>
SELECTOR_1 = 1,
/// <summary>
/// Literal model, 64 entries, start at symbol 128
/// </summary>
SELECTOR_2 = 2,
/// <summary>
/// Literal model, 64 entries, start at symbol 192
/// </summary>
SELECTOR_3 = 3,
/// <summary>
/// LZ model, 3 character matches, max 24 entries, start at symbol 0
/// </summary>
SELECTOR_4 = 4,
/// <summary>
/// LZ model, 4 character matches, max 36 entries, start at symbol 0
/// </summary>
SELECTOR_5 = 5,
/// <summary>
/// LZ model, 5+ character matches, max 42 entries, start at symbol 0
/// </summary>
SELECTOR_6_POSITION = 6,
/// <summary>
/// LZ model, 5+ character matches, 27 entries, start at symbol 0
/// </summary>
SELECTOR_6_LENGTH = 7,
}
}

View File

@@ -0,0 +1,24 @@
namespace SabreTools.IO.Compression.Quantum
{
/// <see href="http://www.russotto.net/quantumcomp.html"/>
internal sealed class Model
{
public int Entries { get; set; }
/// <remarks>
/// All the models are initialized with the symbols in symbol
/// order in the table, and with every symbol in the table
/// having a frequency of 1
/// </remarks>
public ModelSymbol[]? Symbols { get; set; }
/// <remarks>
/// The initial total frequency is equal to the number of entries
/// in the table
/// </remarks>
public int TotalFrequency { get; set; }
/// <remarks>The initial time_to_reorder value is 4</remarks>
public int TimeToReorder { get; set; }
}
}

View File

@@ -0,0 +1,15 @@
namespace SabreTools.IO.Compression.Quantum
{
/// <see href="http://www.russotto.net/quantumcomp.html"/>
internal sealed class ModelSymbol
{
public ushort Symbol { get; set; }
/// <summary>
/// The cumulative frequency is the frequency of all the symbols
/// which are at a higher index in the table than that symbol —
/// thus the last entry in the table has a cumulative frequency of 0.
/// </summary>
public ushort CumulativeFrequency { get; set; }
}
}

View File

@@ -1,6 +1,5 @@
using System;
using System.IO;
using SabreTools.Models.LZ;
namespace SabreTools.IO.Compression.SZDD
{
@@ -15,7 +14,7 @@ namespace SabreTools.IO.Compression.SZDD
/// <summary>
/// Source stream for the decompressor
/// </summary>
private readonly BufferedStream _source;
private readonly Streams.BufferedStream _source;
/// <summary>
/// SZDD format being decompressed
@@ -37,19 +36,19 @@ namespace SabreTools.IO.Compression.SZDD
// Initialize the window with space characters
_window = Array.ConvertAll(_window, b => (byte)0x20);
_source = new BufferedStream(source);
_source = new Streams.BufferedStream(source);
}
/// <summary>
/// Create a KWAJ decompressor
/// </summary>
public static Decompressor CreateKWAJ(byte[] source, KWAJCompressionType compressionType)
public static Decompressor CreateKWAJ(byte[] source, ushort compressionType)
=> CreateKWAJ(new MemoryStream(source), compressionType);
/// <summary>
/// Create a KWAJ decompressor
/// </summary>
public static Decompressor CreateKWAJ(Stream source, KWAJCompressionType compressionType)
public static Decompressor CreateKWAJ(Stream source, ushort compressionType)
{
// Create the decompressor
var decompressor = new Decompressor(source);
@@ -57,11 +56,11 @@ namespace SabreTools.IO.Compression.SZDD
// Set the format and return
decompressor._format = compressionType switch
{
KWAJCompressionType.NoCompression => Format.KWAJNoCompression,
KWAJCompressionType.NoCompressionXor => Format.KWAJXor,
KWAJCompressionType.QBasic => Format.KWAJQBasic,
KWAJCompressionType.LZH => Format.KWAJLZH,
KWAJCompressionType.MSZIP => Format.KWAJMSZIP,
0x0000 => Format.KWAJNoCompression,
0x0001 => Format.KWAJXor,
0x0002 => Format.KWAJQBasic,
0x0003 => Format.KWAJLZH,
0x0004 => Format.KWAJMSZIP,
_ => throw new IndexOutOfRangeException(nameof(source)),
};
return decompressor;
@@ -229,77 +228,5 @@ namespace SabreTools.IO.Compression.SZDD
dest.Flush();
return true;
}
/// <summary>
/// Buffered stream that reads in blocks
/// </summary>
private class BufferedStream
{
/// <summary>
/// Source stream for populating the buffer
/// </summary>
private readonly Stream _source;
/// <summary>
/// Internal buffer to read
/// </summary>
private readonly byte[] _buffer = new byte[2048];
/// <summary>
/// Current pointer into the buffer
/// </summary>
private int _bufferPtr = 0;
/// <summary>
/// Represents the number of available bytes
/// </summary>
private int _available = -1;
/// <summary>
/// Create a new buffered stream
/// </summary>
public BufferedStream(Stream source)
{
_source = source;
}
/// <summary>
/// Read the next byte from the buffer, if possible
/// </summary>
public byte? ReadNextByte()
{
// Ensure the buffer first
if (!EnsureBuffer())
return null;
// Return the next available value
return _buffer[_bufferPtr++];
}
/// <summary>
/// Ensure the buffer has data to read
/// </summary>
private bool EnsureBuffer()
{
// Force an update if in the initial state
if (_available == -1)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// If the pointer is out of range
if (_bufferPtr >= _available)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// Otherwise, assume data is available
return true;
}
}
}
}

View File

@@ -2,7 +2,6 @@ using System;
using System.IO;
using SabreTools.Hashing;
using SabreTools.Matching;
using static SabreTools.Models.MoPaQ.Constants;
namespace SabreTools.IO.Encryption
{
@@ -11,6 +10,14 @@ namespace SabreTools.IO.Encryption
/// </summary>
public class MoPaQDecrypter
{
#region Constants
private const uint MPQ_HASH_KEY2_MIX = 0x400;
private const uint STORM_BUFFER_SIZE = 0x500;
#endregion
#region Private Instance Variables
/// <summary>

View File

@@ -59,14 +59,24 @@ namespace SabreTools.IO.Extensions
/// </summary>
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
/// <returns>String list containing the requested data, null on error</returns>
#if NET5_0_OR_GREATER
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
#else
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
#endif
public static List<string>? ReadStringsFrom(this byte[]? input, int charLimit = 5)
{
// Validate the data
if (input == null || input.Length == 0)
return null;
#if NET5_0_OR_GREATER
// Check for Latin1 strings
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Latin1);
#else
// Check for ASCII strings
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
#endif
// Check for Unicode strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
@@ -101,13 +111,17 @@ namespace SabreTools.IO.Extensions
// Short-circuit for some encoding types
if (encoding.CodePage == Encoding.ASCII.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.ASCII, 1);
return bytes.ReadAsciiStrings(charLimit);
#if NET5_0_OR_GREATER
else if (encoding.CodePage == Encoding.Latin1.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Latin1, 1);
#endif
else if (encoding.IsSingleByte)
return bytes.ReadFixedWidthEncodingStrings(charLimit, encoding, 1);
else if (encoding.CodePage == Encoding.Unicode.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Unicode, 2);
else if (encoding.CodePage == Encoding.BigEndianUnicode.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.BigEndianUnicode, 2);
else if (encoding.CodePage == Encoding.UTF32.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.UTF32, 4);
@@ -133,7 +147,7 @@ namespace SabreTools.IO.Extensions
char c = (char)reader.Read();
// If the character is invalid
if (char.IsControl(c) || (c & 0xFF00) != 0)
if (char.IsControl(c) || (c & 0xFFFFFF00) != 0)
{
// Seek to the end of the last found string
string str = sb.ToString();
@@ -141,6 +155,10 @@ namespace SabreTools.IO.Extensions
ms.Seek(lastOffset, SeekOrigin.Begin);
reader.DiscardBufferedData();
// If there is no cached string
if (str.Length == 0)
continue;
// Add the string if long enough
if (str.Length >= charLimit)
strings.Add(str);
@@ -209,10 +227,77 @@ namespace SabreTools.IO.Extensions
// Pretend only one byte was read
offset -= width - 1;
// If there is no cached string
if (sb.Length == 0)
continue;
// Add the string if long enough
string str = sb.ToString();
if (str.Length >= charLimit)
strings.Add(str);
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
// Clear the builder and continue
#if NET20 || NET35
sb = new();
#else
sb.Clear();
#endif
continue;
}
// Otherwise, add the character to the builder and continue
sb.Append(c);
}
// Handle any remaining data
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
return strings;
}
/// <summary>
/// Read string data from a byte array using ASCII encoding
/// </summary>
/// <param name="bytes">Byte array representing the source data</param>
/// <param name="charLimit">Number of characters needed to be a valid string</param>
/// <returns>String list containing the requested data, empty on error</returns>
/// <remarks>Handling for 7-bit ASCII needs to be done differently than other fixed-width encodings</remarks>
#if NET20
private static List<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
#else
private static HashSet<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
#endif
{
if (charLimit <= 0 || charLimit > bytes.Length)
return [];
// Create the string set to return
#if NET20
var strings = new List<string>();
#else
var strings = new HashSet<string>();
#endif
// Create a string builder for the loop
var sb = new StringBuilder();
// Check for strings
int offset = 0;
while (offset < bytes.Length)
{
// Read the next character from the stream
char c = bytes.ReadChar(ref offset);
// If the character is invalid
if (char.IsControl(c) || c > 0x7F)
{
// If there is no cached string
if (sb.Length == 0)
continue;
// Add the string if long enough
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
// Clear the builder and continue
#if NET20 || NET35

View File

@@ -1065,6 +1065,10 @@ namespace SabreTools.IO.Extensions
/// </summary>
private static byte[] ReadExactlyToBuffer(byte[] content, ref int offset, int length)
{
// If we have an invalid offset
if (offset < 0 || offset >= content.Length)
throw new ArgumentOutOfRangeException($"{nameof(offset)} must be between 0 and {content.Length}, {offset} provided");
// If we have an invalid length
if (length < 0)
throw new ArgumentOutOfRangeException($"{nameof(length)} must be 0 or a positive value, {length} requested");

View File

@@ -1,6 +1,5 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SabreTools.IO.Extensions
{
@@ -76,10 +75,15 @@ namespace SabreTools.IO.Extensions
/// <summary>
/// Read string data from a Stream
/// </summary>
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
/// <param name="position">Position in the source to read from</param>
/// <param name="length">Length of the requested data</param>
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
/// <returns>String list containing the requested data, null on error</returns>
#if NET5_0_OR_GREATER
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
#else
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
#endif
public static List<string>? ReadStringsFrom(this Stream? input, int position, int length, int charLimit = 5)
{
// Read the data as a byte array first
@@ -87,23 +91,7 @@ namespace SabreTools.IO.Extensions
if (data == null)
return null;
// Check for ASCII strings
var asciiStrings = data.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
// Check for UTF-8 strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var utf8Strings = data.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
// Check for Unicode strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var unicodeStrings = data.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
// Ignore duplicate strings across encodings
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
// Sort the strings and return
sourceStrings.Sort();
return sourceStrings;
return data.ReadStringsFrom(charLimit);
}
/// <summary>

View File

@@ -11,7 +11,7 @@
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Version>1.7.3</Version>
<Version>1.7.4</Version>
<!-- Package Properties -->
<Authors>Matt Nadareski</Authors>
@@ -31,7 +31,6 @@
<ItemGroup>
<PackageReference Include="SabreTools.Matching" Version="1.6.0" />
<PackageReference Include="SabreTools.Models" Version="1.7.1" />
<PackageReference Include="SabreTools.Hashing" Version="1.5.0" />
</ItemGroup>

View File

@@ -0,0 +1,77 @@
using System.IO;
namespace SabreTools.IO.Streams
{
/// <summary>
/// Buffered stream that reads in blocks
/// </summary>
/// <remarks>Not a true <see cref="Stream"/> implementation yet</remarks>
public class BufferedStream
{
/// <summary>
/// Source stream for populating the buffer
/// </summary>
private readonly Stream _source;
/// <summary>
/// Internal buffer to read
/// </summary>
private readonly byte[] _buffer = new byte[2048];
/// <summary>
/// Current pointer into the buffer
/// </summary>
private int _bufferPtr = 0;
/// <summary>
/// Represents the number of available bytes
/// </summary>
private int _available = -1;
/// <summary>
/// Create a new buffered stream
/// </summary>
public BufferedStream(Stream source)
{
_source = source;
}
/// <summary>
/// Read the next byte from the buffer, if possible
/// </summary>
public byte? ReadNextByte()
{
// Ensure the buffer first
if (!EnsureBuffer())
return null;
// Return the next available value
return _buffer[_bufferPtr++];
}
/// <summary>
/// Ensure the buffer has data to read
/// </summary>
private bool EnsureBuffer()
{
// Force an update if in the initial state
if (_available == -1)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// If the pointer is out of range
if (_bufferPtr >= _available)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// Otherwise, assume data is available
return true;
}
}
}