32 Commits
1.7.2 ... 1.7.4

Author SHA1 Message Date
Matt Nadareski
5054aeb077 Bump version 2025-09-22 17:48:18 -04:00
Matt Nadareski
d2e9b8d6e5 Fix byte array test 2025-09-22 17:44:41 -04:00
Matt Nadareski
2c29aee834 Remove Models from references 2025-09-22 11:03:40 -04:00
Matt Nadareski
576bafcb87 Create minimal model for InflateWrapper 2025-09-22 11:03:07 -04:00
Matt Nadareski
2b310ac528 SZDD no longer uses models 2025-09-22 10:55:14 -04:00
Matt Nadareski
4f6b6d7b59 Reduce Models use another notch 2025-09-22 10:52:26 -04:00
Matt Nadareski
17e55ee233 Move BufferedStream out of SZDD 2025-09-22 10:50:53 -04:00
Matt Nadareski
8b78906d1d Move MoPaQ encryption constants from Models 2025-09-22 10:37:27 -04:00
Matt Nadareski
cff2dcf4cc Move LZX models from Models 2025-09-22 10:35:04 -04:00
Matt Nadareski
a56942cb73 Move Quantum compression models from Models 2025-09-22 10:31:14 -04:00
Matt Nadareski
5ed661b77c Move MSZIP "model" from Models 2025-09-22 10:27:03 -04:00
Matt Nadareski
a0a0cd0386 Add more complete UTF-8 first-byte tests 2025-09-21 16:34:46 -04:00
Matt Nadareski
bcc0fca4ad Ensure 7-bit ASCII never reads above 0x7F 2025-09-21 16:27:03 -04:00
Matt Nadareski
843e821e5f Use extended check in slow path too 2025-09-21 16:07:20 -04:00
Matt Nadareski
630b01283e Latin1 instead of ASCII for .NET 5.0 and beyond 2025-09-21 15:12:16 -04:00
Matt Nadareski
22abb96013 Add remarks about what encodings are used 2025-09-21 15:02:33 -04:00
Matt Nadareski
314de12661 Fix tests, remove UTF-8 checks from irrelevant places 2025-09-21 14:03:45 -04:00
Matt Nadareski
a0b24031b5 Remove duplicate code from Stream implementation 2025-09-21 13:58:46 -04:00
Matt Nadareski
b4628485c3 Sync stream implementation with byte one 2025-09-21 13:58:13 -04:00
Matt Nadareski
4610ddc9b9 Don't read the string unless it's long enough 2025-09-21 13:53:16 -04:00
Matt Nadareski
e392ddc8d7 Fix code formatting 2025-09-21 13:52:05 -04:00
Matt Nadareski
1908d1b32e More generically support single-byte encodings 2025-09-21 13:50:08 -04:00
Matt Nadareski
9d73195f86 Big-endian unicode support because it's there 2025-09-21 13:42:05 -04:00
Matt Nadareski
335a486f17 Special handling of empty string builders 2025-09-21 13:41:06 -04:00
Matt Nadareski
d3e41ac187 Handle invalid offsets in byte array extensions 2025-09-21 11:43:07 -04:00
Matt Nadareski
8ddd9f3f78 Bump version 2025-09-20 22:16:06 -04:00
Matt Nadareski
54ad538c08 Short-circuit fixed-width encodings 2025-09-20 22:10:54 -04:00
Matt Nadareski
e6bc9ab3e3 Add OptionalEndsWith string extension 2025-09-20 18:04:37 -04:00
Matt Nadareski
94934b00a9 There 2025-09-10 21:53:52 -04:00
Matt Nadareski
e49f56fccc Add an enumerable extension from BOS 2025-09-06 15:42:48 -04:00
Matt Nadareski
79c64ddfa8 .NET Standard had issues with that last one 2025-09-06 15:37:24 -04:00
Matt Nadareski
b22384d5f3 Add neat string extensions from BOS 2025-09-06 15:32:36 -04:00
33 changed files with 1256 additions and 184 deletions

7
LICENSE Normal file
View File

@@ -0,0 +1,7 @@
Copyright (c) 2018-2025 Matt Nadareski
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -143,28 +143,24 @@ namespace SabreTools.IO.Test.Extensions
];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsFrom_UTF8_Filled()
public void ReadStringsFrom_Latin1Strings_Filled()
{
byte[]? arr =
[
.. Encoding.UTF8.GetBytes("TEST"),
.. Encoding.Latin1.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO"),
.. Encoding.Latin1.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA"),
.. Encoding.Latin1.GetBytes("DATA"),
.. new byte[] { 0x00 },
];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
Assert.Equal(2, actual.Count);
}
[Fact]
@@ -195,11 +191,11 @@ namespace SabreTools.IO.Test.Extensions
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("DATA1"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TEST2"),
.. Encoding.Latin1.GetBytes("TEST2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO2"),
.. Encoding.Latin1.GetBytes("TWO2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA2"),
.. Encoding.Latin1.GetBytes("DATA2"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TEST3"),
.. new byte[] { 0x00 },
@@ -210,11 +206,56 @@ namespace SabreTools.IO.Test.Extensions
];
var actual = arr.ReadStringsFrom(5);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(10, actual.Count);
Assert.Equal(6, actual.Count);
}
/// <summary>
/// This test is here mainly for performance testing
/// and should not be enabled unless there are changes
/// to the core reading methods that need comparison.
/// </summary>
// [Fact]
// public void ReadStringsFrom_Mixed_MASSIVE()
// {
// byte[]? arr =
// [
// .. Encoding.ASCII.GetBytes("TEST1"),
// .. new byte[] { 0x00 },
// .. Encoding.ASCII.GetBytes("TWO1"),
// .. new byte[] { 0x00 },
// .. Encoding.ASCII.GetBytes("DATA1"),
// .. new byte[] { 0x00 },
// .. Encoding.UTF8.GetBytes("TEST2"),
// .. new byte[] { 0x00 },
// .. Encoding.UTF8.GetBytes("TWO2"),
// .. new byte[] { 0x00 },
// .. Encoding.UTF8.GetBytes("DATA2"),
// .. new byte[] { 0x00 },
// .. Encoding.Unicode.GetBytes("TEST3"),
// .. new byte[] { 0x00 },
// .. Encoding.Unicode.GetBytes("TWO3"),
// .. new byte[] { 0x00 },
// .. Encoding.Unicode.GetBytes("DATA3"),
// .. new byte[] { 0x00 },
// ];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// arr = [.. arr, .. arr, .. arr, .. arr];
// // arr = [.. arr, .. arr, .. arr, .. arr];
// // arr = [.. arr, .. arr, .. arr, .. arr];
// var actual = arr.ReadStringsFrom(5);
// Assert.NotNull(actual);
// Assert.NotEmpty(actual);
// }
#endregion
#region ReadStringsWithEncoding
@@ -272,6 +313,22 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidAsciiChars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
.. Enumerable.Range(0x80, 0x80).Select(i => (byte)i),
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.ASCII);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_Latin1_Filled()
{
@@ -290,6 +347,25 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidLatin1Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.Latin1);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_UTF8_Filled()
{
@@ -308,6 +384,24 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidUTF8Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
.. Enumerable.Range(0x80, 0x42).Select(i => (byte)i),
0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC,
0xFD, 0xFE, 0xFF,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF8);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_UTF16_Filled()
{
@@ -326,6 +420,21 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidUTF16Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.Unicode);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsWithEncoding_UTF32_Filled()
{
@@ -344,6 +453,21 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsWithEncoding_InvalidUTF32Chars_Empty()
{
byte[]? arr =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
];
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF32);
Assert.NotNull(actual);
Assert.Empty(actual);
}
#endregion
}
}

View File

@@ -2,6 +2,7 @@ using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using SabreTools.IO.Extensions;
using Xunit;
@@ -9,6 +10,22 @@ namespace SabreTools.IO.Test.Extensions
{
public class EnumerableExtensionsTests
{
#region IterateWithAction
[Fact]
public void IterateWithActionTest()
{
List<int> source = [1, 2, 3, 4];
int actual = 0;
source.IterateWithAction(i => Interlocked.Add(ref actual, i));
Assert.Equal(10, actual);
}
#endregion
#region SafeEnumerate
[Fact]
public void SafeEnumerate_Empty()
{
@@ -60,6 +77,8 @@ namespace SabreTools.IO.Test.Extensions
Assert.Equal(2, list.Count);
}
#endregion
/// <summary>
/// Fake enumerable that uses <see cref="ErrorEnumerator"/>
/// </summary>

View File

@@ -200,29 +200,25 @@ namespace SabreTools.IO.Test.Extensions
Stream? stream = new MemoryStream(bytes);
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsFrom_UTF8_Filled()
public void ReadStringsFrom_Latin1Strings_Filled()
{
byte[]? bytes =
[
.. Encoding.UTF8.GetBytes("TEST"),
.. Encoding.Latin1.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO"),
.. Encoding.Latin1.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA"),
.. Encoding.Latin1.GetBytes("DATA"),
.. new byte[] { 0x00 },
];
Stream? stream = new MemoryStream(bytes);
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
Assert.Equal(2, actual.Count);
}
[Fact]
@@ -254,11 +250,11 @@ namespace SabreTools.IO.Test.Extensions
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("DATA1"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TEST2"),
.. Encoding.Latin1.GetBytes("TEST2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO2"),
.. Encoding.Latin1.GetBytes("TWO2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA2"),
.. Encoding.Latin1.GetBytes("DATA2"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TEST3"),
.. new byte[] { 0x00 },
@@ -270,9 +266,7 @@ namespace SabreTools.IO.Test.Extensions
Stream? stream = new MemoryStream(bytes);
var actual = stream.ReadStringsFrom(0, bytes.Length, 5);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(10, actual.Count);
Assert.Equal(6, actual.Count);
}
#endregion

View File

@@ -0,0 +1,76 @@
using SabreTools.IO.Extensions;
using Xunit;
namespace SabreTools.IO.Test.Extensions
{
public class StringExtensionsTests
{
#region OptionalContains
[Theory]
[InlineData(null, "ANY", false)]
[InlineData("", "ANY", false)]
[InlineData("ANY", "ANY", true)]
[InlineData("ANYTHING", "ANY", true)]
[InlineData("THING", "ANY", false)]
[InlineData("THINGANY", "ANY", true)]
public void OptionalContainsTest(string? haystack, string needle, bool expected)
{
bool actual = haystack.OptionalContains(needle);
Assert.Equal(expected, actual);
}
#endregion
#region OptionalEndsWith
[Theory]
[InlineData(null, "ANY", false)]
[InlineData("", "ANY", false)]
[InlineData("ANY", "ANY", true)]
[InlineData("ANYTHING", "ANY", false)]
[InlineData("THING", "ANY", false)]
[InlineData("THINGANY", "ANY", true)]
public void OptionalEndsWithTest(string? haystack, string needle, bool expected)
{
bool actual = haystack.OptionalEndsWith(needle);
Assert.Equal(expected, actual);
}
#endregion
#region OptionalEquals
[Theory]
[InlineData(null, "ANY", false)]
[InlineData("", "ANY", false)]
[InlineData("ANY", "ANY", true)]
[InlineData("ANYTHING", "ANY", false)]
[InlineData("THING", "ANY", false)]
[InlineData("THINGANY", "ANY", false)]
public void OptionalEqualsTest(string? haystack, string needle, bool expected)
{
bool actual = haystack.OptionalEquals(needle);
Assert.Equal(expected, actual);
}
#endregion
#region OptionalStartsWith
[Theory]
[InlineData(null, "ANY", false)]
[InlineData("", "ANY", false)]
[InlineData("ANY", "ANY", true)]
[InlineData("ANYTHING", "ANY", true)]
[InlineData("THING", "ANY", false)]
[InlineData("THINGANY", "ANY", false)]
public void OptionalStartsWithTest(string? haystack, string needle, bool expected)
{
bool actual = haystack.OptionalStartsWith(needle);
Assert.Equal(expected, actual);
}
#endregion
}
}

View File

@@ -0,0 +1,40 @@
using System.IO;
using Xunit;
namespace SabreTools.IO.Test.Streams
{
public class BufferedStreamTests
{
#region ReadNextByte
[Fact]
public void ReadNextByte_Empty_Null()
{
var source = new MemoryStream();
var stream = new IO.Streams.BufferedStream(source);
byte? actual = stream.ReadNextByte();
Assert.Null(actual);
}
[Fact]
public void ReadNextByte_Filled_ValidPosition_Byte()
{
var source = new MemoryStream(new byte[1024]);
var stream = new IO.Streams.BufferedStream(source);
byte? actual = stream.ReadNextByte();
Assert.Equal((byte)0x00, actual);
}
[Fact]
public void ReadNextByte_Filled_InvalidPosition_Null()
{
var source = new MemoryStream(new byte[1024]);
source.Seek(0, SeekOrigin.End);
var stream = new IO.Streams.BufferedStream(source);
byte? actual = stream.ReadNextByte();
Assert.Null(actual);
}
#endregion
}
}

View File

@@ -3,8 +3,6 @@ using System.IO;
using System.Text;
using SabreTools.Hashing;
using SabreTools.IO.Extensions;
using SabreTools.Models.PKZIP;
using static SabreTools.Models.PKZIP.Constants;
namespace SabreTools.IO.Compression.Deflate
{
@@ -20,6 +18,46 @@ namespace SabreTools.IO.Compression.Deflate
/// </summary>
private const int BufferSize = 1024 * 1024;
/// <summary>
/// Local file header signature
/// </summary>
private const uint LocalFileHeaderSignature = 0x04034B50;
#endregion
#region Private Classes
/// <summary>
/// Minimal PKZIP local file header information
/// </summary>
private class MinLocalFileHeader
{
/// <summary>
/// Signature (0x04034B50)
/// </summary>
public uint Signature { get; set; }
/// <summary>
/// CRC-32
/// </summary>
public uint CRC32 { get; set; }
/// <summary>
/// Compressed size
/// </summary>
public uint CompressedSize { get; set; }
/// <summary>
/// Uncompressed size
/// </summary>
public uint UncompressedSize { get; set; }
/// <summary>
/// File name (variable size)
/// </summary>
public string? FileName { get; set; }
}
#endregion
#region Extraction
@@ -140,7 +178,7 @@ namespace SabreTools.IO.Compression.Deflate
long current = source.Position;
// Parse the PKZIP header, if it exists
LocalFileHeader? zipHeader = ParseLocalFileHeader(source);
MinLocalFileHeader? zipHeader = ParseLocalFileHeader(source);
long zipHeaderBytes = source.Position - current;
// Always trust the PKZIP CRC-32 value over what is supplied
@@ -269,46 +307,39 @@ namespace SabreTools.IO.Compression.Deflate
}
/// <summary>
/// Parse a Stream into a local file header
/// Parse a Stream into a minimal local file header
/// </summary>
/// <param name="data">Stream to parse</param>
/// <returns>Filled local file header on success, null on error</returns>
/// <remarks>Mirror of method in Serialization</remarks>
private static LocalFileHeader? ParseLocalFileHeader(Stream data)
/// <returns>Filled minimal local file header on success, null on error</returns>
/// <remarks>Partial mirror of method in Serialization</remarks>
private static MinLocalFileHeader? ParseLocalFileHeader(Stream data)
{
var header = new LocalFileHeader();
var header = new MinLocalFileHeader();
header.Signature = data.ReadUInt32LittleEndian();
if (header.Signature != LocalFileHeaderSignature)
return null;
header.Version = data.ReadUInt16LittleEndian();
header.Flags = (GeneralPurposeBitFlags)data.ReadUInt16LittleEndian();
header.CompressionMethod = (CompressionMethod)data.ReadUInt16LittleEndian();
header.LastModifedFileTime = data.ReadUInt16LittleEndian();
header.LastModifiedFileDate = data.ReadUInt16LittleEndian();
_ = data.ReadUInt16LittleEndian(); // Version
_ = data.ReadUInt16LittleEndian(); // Flags
_ = data.ReadUInt16LittleEndian(); // CompressionMethod
_ = data.ReadUInt16LittleEndian(); // LastModifedFileTime
_ = data.ReadUInt16LittleEndian(); // LastModifiedFileDate
header.CRC32 = data.ReadUInt32LittleEndian();
header.CompressedSize = data.ReadUInt32LittleEndian();
header.UncompressedSize = data.ReadUInt32LittleEndian();
header.FileNameLength = data.ReadUInt16LittleEndian();
header.ExtraFieldLength = data.ReadUInt16LittleEndian();
ushort fileNameLength = data.ReadUInt16LittleEndian();
ushort extraFieldLength = data.ReadUInt16LittleEndian();
if (header.FileNameLength > 0 && data.Position + header.FileNameLength <= data.Length)
if (fileNameLength > 0 && data.Position + fileNameLength <= data.Length)
{
byte[] filenameBytes = data.ReadBytes(header.FileNameLength);
if (filenameBytes.Length != header.FileNameLength)
return null;
byte[] filenameBytes = data.ReadBytes(fileNameLength);
header.FileName = Encoding.ASCII.GetString(filenameBytes);
}
// Parsing extras is skipped here, unlike in Serialization
if (header.ExtraFieldLength > 0 && data.Position + header.ExtraFieldLength <= data.Length)
{
byte[] extraBytes = data.ReadBytes(header.ExtraFieldLength);
if (extraBytes.Length != header.ExtraFieldLength)
return null;
}
if (extraFieldLength > 0 && data.Position + extraFieldLength <= data.Length)
_ = data.ReadBytes(extraFieldLength);
return header;
}

View File

@@ -0,0 +1,58 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// An aligned offset block is identical to the verbatim block except for the presence of the aligned offset
/// tree preceding the other trees.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class AlignedOffsetBlockData : BlockData
{
/// <summary>
/// Aligned offset tree
/// </summary>
/// <remarks>8 elements, 3 bits each</remarks>
public byte[]? AlignedOffsetTree { get; set; }
/// <summary>
/// Pretree for first 256 elements of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeFirst256 { get; set; }
/// <summary>
/// Path lengths of first 256 elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsFirst256 { get; set; }
/// <summary>
/// Pretree for remainder of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeRemainder { get; set; }
/// <summary>
/// Path lengths of remaining elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsRemainder { get; set; }
/// <summary>
/// Pretree for length tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeLengthTree { get; set; }
/// <summary>
/// Path lengths of elements in length tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsLengthTree { get; set; }
/// <summary>
/// Token sequence (matches and literals)
/// </summary>
/// <remarks>Variable</remarks>
public byte[]? TokenSequence { get; set; }
}
}

View File

@@ -0,0 +1,24 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// An LZXD block represents a sequence of compressed data that is encoded with the same set of
/// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
/// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
/// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
/// number of chunks is related to the size of the data being compressed, while the number of blocks is
/// related to how well the data is compressed.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class Block
{
/// <summary>
/// Block header
/// </summary>
public BlockHeader? Header { get; set; }
/// <summary>
/// Block data
/// </summary>
public BlockData? BlockData { get; set; }
}
}

View File

@@ -0,0 +1,8 @@
namespace SabreTools.IO.Compression.LZX
{
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal abstract class BlockData
{
// No common fields between all block data
}
}

View File

@@ -0,0 +1,33 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The Block Type field, as specified in section 2.3.1.1, indicates which type of block follows,
/// and the Block Size field, as specified in section 2.3.1.2, indicates the number of
/// uncompressed bytes represented by the block. Following the generic block
/// header is a type-specific header that describes the remainder of the block.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class BlockHeader
{
/// <remarks>3 bits</remarks>
public BlockType BlockType { get; set; }
/// <summary>
/// Block size is the high 8 bits of 24
/// </summary>
/// <remarks>8 bits</remarks>
public byte BlockSizeMSB { get; set; }
/// <summary>
/// Block size is the middle 8 bits of 24
/// </summary>
/// <remarks>8 bits</remarks>
public byte BlockSizeByte2 { get; set; }
/// <summary>
/// Block size is the low 8 bits of 24
/// </summary>
/// <remarks>8 bits</remarks>
public byte BlocksizeLSB { get; set; }
}
}

View File

@@ -0,0 +1,25 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
/// incompressible when the chunk is not the last one.
/// </summary>
internal class Chunk
{
/// <summary>
/// Chunk header
/// </summary>
public ChunkHeader? Header { get; set; }
/// <summary>
/// Block headers and data
/// </summary>
public Block[]? Blocks { get; set; }
}
}

View File

@@ -0,0 +1,46 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
/// incompressible when the chunk is not the last one.
/// </summary>
internal class ChunkHeader
{
/// <summary>
/// The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
/// the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
/// 16-bit field. The chunk prefix chain could be followed in the compressed stream without
/// decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
/// location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
/// size.
/// </summary>
public ushort ChunkSize { get; set; }
/// <summary>
/// The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
/// in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
/// single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
/// 32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
/// translation was enabled. Note that E8_file_size is completely independent of the length of the
/// uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
/// uncompressed data).
/// </summary>
public byte E8Translation { get; set; }
/// <summary>
/// E8 translation size, high WORD
/// </summary>
public ushort? TranslationSizeHighWord { get; set; }
/// <summary>
/// E8 translation size, low WORD
/// </summary>
public ushort? TranslationSizeLowWord { get; set; }
}
}

View File

@@ -0,0 +1,38 @@
namespace SabreTools.IO.Compression.LZX
{
internal static class Constants
{
/* some constants defined by the LZX specification */
public const int LZX_MIN_MATCH = 2;
public const int LZX_MAX_MATCH = 257;
public const int LZX_NUM_CHARS = 256;
public const int LZX_PRETREE_NUM_ELEMENTS = 20;
/// <summary>
/// aligned offset tree #elements
/// </summary>
public const int LZX_ALIGNED_NUM_ELEMENTS = 8;
/// <summary>
/// this one missing from spec!
/// </summary>
public const int LZX_NUM_PRIMARY_LENGTHS = 7;
/// <summary>
/// length tree #elements
/// </summary>
public const int LZX_NUM_SECONDARY_LENGTHS = 249;
/* LZX huffman defines: tweak tablebits as desired */
public const int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
public const int LZX_PRETREE_TABLEBITS = 6;
public const int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
public const int LZX_MAINTREE_TABLEBITS = 12;
public const int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
public const int LZX_LENGTH_TABLEBITS = 12;
public const int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
public const int LZX_ALIGNED_TABLEBITS = 7;
public const int LZX_LENTABLE_SAFETY = 64; /* we allow length table decoding overruns */
}
}

View File

@@ -0,0 +1,48 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// 3-bit block type
/// </summary>
internal enum BlockType : byte
{
/// <summary>
/// Not valid
/// </summary>
INVALID_0 = 0b000,
/// <summary>
/// Verbatim block
/// </summary>
Verbatim = 0b001,
/// <summary>
/// Aligned offset block
/// </summary>
AlignedOffset = 0b010,
/// <summary>
/// Uncompressed block
/// </summary>
Uncompressed = 0b011,
/// <summary>
/// Not valid
/// </summary>
INVALID_4 = 0b100,
/// <summary>
/// Not valid
/// </summary>
INVALID_5 = 0b101,
/// <summary>
/// Not valid
/// </summary>
INVALID_6 = 0b110,
/// <summary>
/// Not valid
/// </summary>
INVALID_7 = 0b111,
}
}

View File

@@ -0,0 +1,54 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// Following the generic block header, an uncompressed block begins with 1 to 16 bits of zero padding
/// to align the bit buffer on a 16-bit boundary. At this point, the bitstream ends and a byte stream
/// begins. Following the zero padding, new 32-bit values for R0, R1, and R2 are output in little-endian
/// form, followed by the uncompressed data bytes themselves. Finally, if the uncompressed data length
/// is odd, one extra byte of zero padding is encoded to realign the following bitstream.
///
/// Then the bitstream of byte-swapped 16-bit integers resumes for the next Block Type field (if there
/// are subsequent blocks).
///
/// The decoded R0, R1, and R2 values are used as initial repeated offset values to decode the
/// subsequent compressed block if present.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class UncompressedBlockData : BlockData
{
/// <summary>
/// Padding to align following field on 16-bit boundary
/// </summary>
/// <remarks>Bits have a value of zero</remarks>
public ushort PaddingBits { get; set; }
/// <summary>
/// Least significant to most significant byte (little-endian DWORD ([MS-DTYP]))
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public uint R0 { get; set; }
/// <summary>
/// Least significant to most significant byte (little-endian DWORD)
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public uint R1 { get; set; }
/// <summary>
/// Least significant to most significant byte (little-endian DWORD)
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public uint R2 { get; set; }
/// <summary>
/// Can use the direct memcpy function, as specified in [IEEE1003.1]
/// </summary>
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
public byte[]? RawDataBytes { get; set; }
/// <summary>
/// Only if uncompressed size is odd
/// </summary>
public byte AlignmentByte { get; set; }
}
}

View File

@@ -0,0 +1,51 @@
namespace SabreTools.IO.Compression.LZX
{
/// <summary>
/// The fields of a verbatim block that follow the generic block header
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
internal class VerbatimBlockData : BlockData
{
/// <summary>
/// Pretree for first 256 elements of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeFirst256 { get; set; }
/// <summary>
/// Path lengths of first 256 elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsFirst256 { get; set; }
/// <summary>
/// Pretree for remainder of main tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeRemainder { get; set; }
/// <summary>
/// Path lengths of remaining elements of main tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsRemainder { get; set; }
/// <summary>
/// Pretree for length tree
/// </summary>
/// <remarks>20 elements, 4 bits each</remarks>
public byte[]? PretreeLengthTree { get; set; }
/// <summary>
/// Path lengths of elements in length tree
/// </summary>
/// <remarks>Encoded using pretree</remarks>
public int[]? PathLengthsLengthTree { get; set; }
/// <summary>
/// Token sequence (matches and literals)
/// </summary>
/// <remarks>Variable</remarks>
public byte[]? TokenSequence { get; set; }
}
}

View File

@@ -0,0 +1,28 @@
namespace SabreTools.IO.Compression.MSZIP
{
/// <summary>
/// Each MSZIP block MUST consist of a 2-byte MSZIP signature and one or more RFC 1951 blocks. The
/// 2-byte MSZIP signature MUST consist of the bytes 0x43 and 0x4B. The MSZIP signature MUST be
/// the first 2 bytes in the MSZIP block. The MSZIP signature is shown in the following packet diagram.
///
/// Each MSZIP block is the result of a single deflate compression operation, as defined in [RFC1951].
/// The compressor that performs the compression operation MUST generate one or more RFC 1951
/// blocks, as defined in [RFC1951]. The number, deflation mode, and type of RFC 1951 blocks in each
/// MSZIP block is determined by the compressor, as defined in [RFC1951]. The last RFC 1951 block in
/// each MSZIP block MUST be marked as the "end" of the stream(1), as defined by [RFC1951]
/// section 3.2.3. Decoding trees MUST be discarded after each RFC 1951 block, but the history buffer
/// MUST be maintained.Each MSZIP block MUST represent no more than 32 KB of uncompressed data.
///
/// The maximum compressed size of each MSZIP block is 32 KB + 12 bytes. This enables the MSZIP
/// block to contain 32 KB of data split between two noncompressed RFC 1951 blocks, each of which
/// has a value of BTYPE = 00.
/// </summary>
/// <see href="https://interoperability.blob.core.windows.net/files/MS-MCI/%5bMS-MCI%5d.pdf"/>
internal class BlockHeader
{
/// <summary>
/// 'CK'
/// </summary>
public ushort Signature { get; set; }
}
}

View File

@@ -1,7 +1,6 @@
using System;
using System.IO;
using SabreTools.IO.Extensions;
using SabreTools.Models.Compression.MSZIP;
namespace SabreTools.IO.Compression.MSZIP
{

View File

@@ -0,0 +1,50 @@
namespace SabreTools.IO.Compression.Quantum
{
/// <see href="http://www.russotto.net/quantumcomp.html"/>
internal static class Constants
{
public static readonly int[] PositionSlot =
[
0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c,
0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0,
0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00,
0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000,
0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000,
0x100000, 0x180000
];
public static readonly int[] PositionExtraBits =
[
0, 0, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13, 14, 14,
15, 15, 16, 16, 17, 17, 18, 18,
19, 19
];
public static readonly int[] LengthSlot =
[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26,
0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e,
0xbe, 0xde, 0xfe
];
public static readonly int[] LengthExtraBits =
[
0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 4, 4, 5, 5,
5, 5, 0
];
/// <summary>
/// Number of position slots for (tsize - 10)
/// </summary>
public static readonly int[] NumPositionSlots =
[
20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42
];
}
}

View File

@@ -2,8 +2,7 @@ using System;
using System.Collections.Generic;
using System.IO;
using SabreTools.IO.Streams;
using SabreTools.Models.Compression.Quantum;
using static SabreTools.Models.Compression.Quantum.Constants;
using static SabreTools.IO.Compression.Quantum.Constants;
namespace SabreTools.IO.Compression.Quantum
{

View File

@@ -0,0 +1,45 @@
namespace SabreTools.IO.Compression.Quantum
{
internal enum SelectorModel
{
/// <summary>
/// Literal model, 64 entries, start at symbol 0
/// </summary>
SELECTOR_0 = 0,
/// <summary>
/// Literal model, 64 entries, start at symbol 64
/// </summary>
SELECTOR_1 = 1,
/// <summary>
/// Literal model, 64 entries, start at symbol 128
/// </summary>
SELECTOR_2 = 2,
/// <summary>
/// Literal model, 64 entries, start at symbol 192
/// </summary>
SELECTOR_3 = 3,
/// <summary>
/// LZ model, 3 character matches, max 24 entries, start at symbol 0
/// </summary>
SELECTOR_4 = 4,
/// <summary>
/// LZ model, 4 character matches, max 36 entries, start at symbol 0
/// </summary>
SELECTOR_5 = 5,
/// <summary>
/// LZ model, 5+ character matches, max 42 entries, start at symbol 0
/// </summary>
SELECTOR_6_POSITION = 6,
/// <summary>
/// LZ model, 5+ character matches, 27 entries, start at symbol 0
/// </summary>
SELECTOR_6_LENGTH = 7,
}
}

View File

@@ -0,0 +1,24 @@
namespace SabreTools.IO.Compression.Quantum
{
/// <see href="http://www.russotto.net/quantumcomp.html"/>
internal sealed class Model
{
public int Entries { get; set; }
/// <remarks>
/// All the models are initialized with the symbols in symbol
/// order in the table, and with every symbol in the table
/// having a frequency of 1
/// </remarks>
public ModelSymbol[]? Symbols { get; set; }
/// <remarks>
/// The initial total frequency is equal to the number of entries
/// in the table
/// </remarks>
public int TotalFrequency { get; set; }
/// <remarks>The initial time_to_reorder value is 4</remarks>
public int TimeToReorder { get; set; }
}
}

View File

@@ -0,0 +1,15 @@
namespace SabreTools.IO.Compression.Quantum
{
/// <see href="http://www.russotto.net/quantumcomp.html"/>
internal sealed class ModelSymbol
{
public ushort Symbol { get; set; }
/// <summary>
/// The cumulative frequency is the frequency of all the symbols
/// which are at a higher index in the table than that symbol —
/// thus the last entry in the table has a cumulative frequency of 0.
/// </summary>
public ushort CumulativeFrequency { get; set; }
}
}

View File

@@ -1,6 +1,5 @@
using System;
using System.IO;
using SabreTools.Models.LZ;
namespace SabreTools.IO.Compression.SZDD
{
@@ -15,7 +14,7 @@ namespace SabreTools.IO.Compression.SZDD
/// <summary>
/// Source stream for the decompressor
/// </summary>
private readonly BufferedStream _source;
private readonly Streams.BufferedStream _source;
/// <summary>
/// SZDD format being decompressed
@@ -37,19 +36,19 @@ namespace SabreTools.IO.Compression.SZDD
// Initialize the window with space characters
_window = Array.ConvertAll(_window, b => (byte)0x20);
_source = new BufferedStream(source);
_source = new Streams.BufferedStream(source);
}
/// <summary>
/// Create a KWAJ decompressor
/// </summary>
public static Decompressor CreateKWAJ(byte[] source, KWAJCompressionType compressionType)
public static Decompressor CreateKWAJ(byte[] source, ushort compressionType)
=> CreateKWAJ(new MemoryStream(source), compressionType);
/// <summary>
/// Create a KWAJ decompressor
/// </summary>
public static Decompressor CreateKWAJ(Stream source, KWAJCompressionType compressionType)
public static Decompressor CreateKWAJ(Stream source, ushort compressionType)
{
// Create the decompressor
var decompressor = new Decompressor(source);
@@ -57,11 +56,11 @@ namespace SabreTools.IO.Compression.SZDD
// Set the format and return
decompressor._format = compressionType switch
{
KWAJCompressionType.NoCompression => Format.KWAJNoCompression,
KWAJCompressionType.NoCompressionXor => Format.KWAJXor,
KWAJCompressionType.QBasic => Format.KWAJQBasic,
KWAJCompressionType.LZH => Format.KWAJLZH,
KWAJCompressionType.MSZIP => Format.KWAJMSZIP,
0x0000 => Format.KWAJNoCompression,
0x0001 => Format.KWAJXor,
0x0002 => Format.KWAJQBasic,
0x0003 => Format.KWAJLZH,
0x0004 => Format.KWAJMSZIP,
_ => throw new IndexOutOfRangeException(nameof(source)),
};
return decompressor;
@@ -229,77 +228,5 @@ namespace SabreTools.IO.Compression.SZDD
dest.Flush();
return true;
}
/// <summary>
/// Buffered stream that reads in blocks
/// </summary>
private class BufferedStream
{
/// <summary>
/// Source stream for populating the buffer
/// </summary>
private readonly Stream _source;
/// <summary>
/// Internal buffer to read
/// </summary>
private readonly byte[] _buffer = new byte[2048];
/// <summary>
/// Current pointer into the buffer
/// </summary>
private int _bufferPtr = 0;
/// <summary>
/// Represents the number of available bytes
/// </summary>
private int _available = -1;
/// <summary>
/// Create a new buffered stream
/// </summary>
public BufferedStream(Stream source)
{
_source = source;
}
/// <summary>
/// Read the next byte from the buffer, if possible
/// </summary>
public byte? ReadNextByte()
{
// Ensure the buffer first
if (!EnsureBuffer())
return null;
// Return the next available value
return _buffer[_bufferPtr++];
}
/// <summary>
/// Ensure the buffer has data to read
/// </summary>
private bool EnsureBuffer()
{
// Force an update if in the initial state
if (_available == -1)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// If the pointer is out of range
if (_bufferPtr >= _available)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// Otherwise, assume data is available
return true;
}
}
}
}

View File

@@ -2,7 +2,6 @@ using System;
using System.IO;
using SabreTools.Hashing;
using SabreTools.Matching;
using static SabreTools.Models.MoPaQ.Constants;
namespace SabreTools.IO.Encryption
{
@@ -11,6 +10,14 @@ namespace SabreTools.IO.Encryption
/// </summary>
public class MoPaQDecrypter
{
#region Constants
private const uint MPQ_HASH_KEY2_MIX = 0x400;
private const uint STORM_BUFFER_SIZE = 0x500;
#endregion
#region Private Instance Variables
/// <summary>

View File

@@ -59,33 +59,31 @@ namespace SabreTools.IO.Extensions
/// </summary>
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
/// <returns>String list containing the requested data, null on error</returns>
/// <remarks>A maximum of 16KiB of data can be scanned at a time</remarks>
#if NET5_0_OR_GREATER
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
#else
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
#endif
public static List<string>? ReadStringsFrom(this byte[]? input, int charLimit = 5)
{
// Validate the data
if (input == null || input.Length == 0)
return null;
// Limit to 16KiB of data
if (input.Length > 16384)
{
int offset = 0;
input = input.ReadBytes(ref offset, 16384);
}
#if NET5_0_OR_GREATER
// Check for Latin1 strings
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Latin1);
#else
// Check for ASCII strings
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
// Check for UTF-8 strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var utf8Strings = input.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
#endif
// Check for Unicode strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var unicodeStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
// Ignore duplicate strings across encodings
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
List<string> sourceStrings = [.. asciiStrings, .. unicodeStrings];
// Sort the strings and return
sourceStrings.Sort();
@@ -99,11 +97,7 @@ namespace SabreTools.IO.Extensions
/// <param name="charLimit">Number of characters needed to be a valid string</param>
/// <param name="encoding">Character encoding to use for checking</param>
/// <returns>String list containing the requested data, empty on error</returns>
/// <remarks>
/// This method has a couple of notable implementation details:
/// - Strings can only have a maximum of 64 characters
/// - Characters that fall outside of the extended ASCII set will be unused
/// </remarks>
/// <remarks>Characters with the higher bytes set are unused</remarks>
#if NET20
public static List<string> ReadStringsWithEncoding(this byte[]? bytes, int charLimit, Encoding encoding)
#else
@@ -115,6 +109,22 @@ namespace SabreTools.IO.Extensions
if (charLimit <= 0 || charLimit > bytes.Length)
return [];
// Short-circuit for some encoding types
if (encoding.CodePage == Encoding.ASCII.CodePage)
return bytes.ReadAsciiStrings(charLimit);
#if NET5_0_OR_GREATER
else if (encoding.CodePage == Encoding.Latin1.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Latin1, 1);
#endif
else if (encoding.IsSingleByte)
return bytes.ReadFixedWidthEncodingStrings(charLimit, encoding, 1);
else if (encoding.CodePage == Encoding.Unicode.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Unicode, 2);
else if (encoding.CodePage == Encoding.BigEndianUnicode.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.BigEndianUnicode, 2);
else if (encoding.CodePage == Encoding.UTF32.CodePage)
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.UTF32, 4);
// Create the string set to return
#if NET20
var strings = new List<string>();
@@ -137,7 +147,7 @@ namespace SabreTools.IO.Extensions
char c = (char)reader.Read();
// If the character is invalid
if (char.IsControl(c) || (c & 0xFF00) != 0)
if (char.IsControl(c) || (c & 0xFFFFFF00) != 0)
{
// Seek to the end of the last found string
string str = sb.ToString();
@@ -145,6 +155,10 @@ namespace SabreTools.IO.Extensions
ms.Seek(lastOffset, SeekOrigin.Begin);
reader.DiscardBufferedData();
// If there is no cached string
if (str.Length == 0)
continue;
// Add the string if long enough
if (str.Length >= charLimit)
strings.Add(str);
@@ -168,5 +182,143 @@ namespace SabreTools.IO.Extensions
return strings;
}
#region Fixed Byte-Width Encoding Helpers
/// <summary>
/// Read string data from a byte array using an encoding with a fixed width
/// </summary>
/// <param name="bytes">Byte array representing the source data</param>
/// <param name="charLimit">Number of characters needed to be a valid string</param>
/// <param name="encoding">Character encoding to use for checking</param>
/// <param name="width">Character width of the encoding</param>
/// <returns>String list containing the requested data, empty on error</returns>
/// <remarks>Characters with the higher bytes set are unused</remarks>
#if NET20
private static List<string> ReadFixedWidthEncodingStrings(this byte[] bytes, int charLimit, Encoding encoding, int width)
#else
private static HashSet<string> ReadFixedWidthEncodingStrings(this byte[] bytes, int charLimit, Encoding encoding, int width)
#endif
{
if (charLimit <= 0 || charLimit > bytes.Length)
return [];
// Create the string set to return
#if NET20
var strings = new List<string>();
#else
var strings = new HashSet<string>();
#endif
// Create a string builder for the loop
var sb = new StringBuilder();
// Check for strings
int offset = 0;
while (offset <= bytes.Length - width)
{
// Read the next character from the stream
char c = encoding.GetChars(bytes, offset, width)[0];
offset += width;
// If the character is invalid
if (char.IsControl(c) || (c & 0xFFFFFF00) != 0)
{
// Pretend only one byte was read
offset -= width - 1;
// If there is no cached string
if (sb.Length == 0)
continue;
// Add the string if long enough
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
// Clear the builder and continue
#if NET20 || NET35
sb = new();
#else
sb.Clear();
#endif
continue;
}
// Otherwise, add the character to the builder and continue
sb.Append(c);
}
// Handle any remaining data
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
return strings;
}
/// <summary>
/// Read string data from a byte array using ASCII encoding
/// </summary>
/// <param name="bytes">Byte array representing the source data</param>
/// <param name="charLimit">Number of characters needed to be a valid string</param>
/// <returns>String list containing the requested data, empty on error</returns>
/// <remarks>Handling for 7-bit ASCII needs to be done differently than other fixed-width encodings</remarks>
#if NET20
private static List<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
#else
private static HashSet<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
#endif
{
if (charLimit <= 0 || charLimit > bytes.Length)
return [];
// Create the string set to return
#if NET20
var strings = new List<string>();
#else
var strings = new HashSet<string>();
#endif
// Create a string builder for the loop
var sb = new StringBuilder();
// Check for strings
int offset = 0;
while (offset < bytes.Length)
{
// Read the next character from the stream
char c = bytes.ReadChar(ref offset);
// If the character is invalid
if (char.IsControl(c) || c > 0x7F)
{
// If there is no cached string
if (sb.Length == 0)
continue;
// Add the string if long enough
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
// Clear the builder and continue
#if NET20 || NET35
sb = new();
#else
sb.Clear();
#endif
continue;
}
// Otherwise, add the character to the builder and continue
sb.Append(c);
}
// Handle any remaining data
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
return strings;
}
#endregion
}
}

View File

@@ -1065,6 +1065,10 @@ namespace SabreTools.IO.Extensions
/// </summary>
private static byte[] ReadExactlyToBuffer(byte[] content, ref int offset, int length)
{
// If we have an invalid offset
if (offset < 0 || offset >= content.Length)
throw new ArgumentOutOfRangeException($"{nameof(offset)} must be between 0 and {content.Length}, {offset} provided");
// If we have an invalid length
if (length < 0)
throw new ArgumentOutOfRangeException($"{nameof(length)} must be 0 or a positive value, {length} requested");

View File

@@ -5,6 +5,25 @@ namespace SabreTools.IO.Extensions
{
public static class EnumerableExtensions
{
/// <summary>
/// Wrap iterating through an enumerable with an action
/// </summary>
/// <remarks>
/// .NET Frameworks 2.0 and 3.5 process in series.
/// .NET Frameworks 4.0 onward process in parallel.
/// </remarks>
public static void IterateWithAction<T>(this IEnumerable<T> source, Action<T> action)
{
#if NET20 || NET35
foreach (var item in source)
{
action(item);
}
#else
System.Threading.Tasks.Parallel.ForEach(source, action);
#endif
}
/// <summary>
/// Safely iterate through an enumerable, skipping any errors
/// </summary>

View File

@@ -1,6 +1,5 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SabreTools.IO.Extensions
{
@@ -76,10 +75,15 @@ namespace SabreTools.IO.Extensions
/// <summary>
/// Read string data from a Stream
/// </summary>
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
/// <param name="position">Position in the source to read from</param>
/// <param name="length">Length of the requested data</param>
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
/// <returns>String list containing the requested data, null on error</returns>
#if NET5_0_OR_GREATER
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
#else
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
#endif
public static List<string>? ReadStringsFrom(this Stream? input, int position, int length, int charLimit = 5)
{
// Read the data as a byte array first
@@ -87,23 +91,7 @@ namespace SabreTools.IO.Extensions
if (data == null)
return null;
// Check for ASCII strings
var asciiStrings = data.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
// Check for UTF-8 strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var utf8Strings = data.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
// Check for Unicode strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var unicodeStrings = data.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
// Ignore duplicate strings across encodings
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
// Sort the strings and return
sourceStrings.Sort();
return sourceStrings;
return data.ReadStringsFrom(charLimit);
}
/// <summary>

View File

@@ -0,0 +1,63 @@
using System;
namespace SabreTools.IO.Extensions
{
public static class StringExtensions
{
/// <inheritdoc cref="string.Contains(string)"/>
public static bool OptionalContains(this string? self, string value)
=> OptionalContains(self, value, StringComparison.Ordinal);
/// <inheritdoc cref="string.Contains(string, StringComparison)"/>
public static bool OptionalContains(this string? self, string value, StringComparison comparisonType)
{
if (self == null)
return false;
#if NETFRAMEWORK || NETSTANDARD2_0
return self.Contains(value);
#else
return self.Contains(value, comparisonType);
#endif
}
/// <inheritdoc cref="string.EndsWith(string)"/>
public static bool OptionalEndsWith(this string? self, string value)
=> OptionalEndsWith(self, value, StringComparison.Ordinal);
/// <inheritdoc cref="string.EndsWith(string, StringComparison)"/>
public static bool OptionalEndsWith(this string? self, string value, StringComparison comparisonType)
{
if (self == null)
return false;
return self.EndsWith(value, comparisonType);
}
/// <inheritdoc cref="string.Equals(string)"/>
public static bool OptionalEquals(this string? self, string value)
=> OptionalEquals(self, value, StringComparison.Ordinal);
/// <inheritdoc cref="string.Equals(string, StringComparison)"/>
public static bool OptionalEquals(this string? self, string value, StringComparison comparisonType)
{
if (self == null)
return false;
return self.Equals(value, comparisonType);
}
/// <inheritdoc cref="string.StartsWith(string)"/>
public static bool OptionalStartsWith(this string? self, string value)
=> OptionalStartsWith(self, value, StringComparison.Ordinal);
/// <inheritdoc cref="string.StartsWith(string, StringComparison)"/>
public static bool OptionalStartsWith(this string? self, string value, StringComparison comparisonType)
{
if (self == null)
return false;
return self.StartsWith(value, comparisonType);
}
}
}

View File

@@ -11,7 +11,7 @@
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Version>1.7.2</Version>
<Version>1.7.4</Version>
<!-- Package Properties -->
<Authors>Matt Nadareski</Authors>
@@ -31,7 +31,6 @@
<ItemGroup>
<PackageReference Include="SabreTools.Matching" Version="1.6.0" />
<PackageReference Include="SabreTools.Models" Version="1.7.1" />
<PackageReference Include="SabreTools.Hashing" Version="1.5.0" />
</ItemGroup>

View File

@@ -0,0 +1,77 @@
using System.IO;
namespace SabreTools.IO.Streams
{
/// <summary>
/// Buffered stream that reads in blocks
/// </summary>
/// <remarks>Not a true <see cref="Stream"/> implementation yet</remarks>
public class BufferedStream
{
/// <summary>
/// Source stream for populating the buffer
/// </summary>
private readonly Stream _source;
/// <summary>
/// Internal buffer to read
/// </summary>
private readonly byte[] _buffer = new byte[2048];
/// <summary>
/// Current pointer into the buffer
/// </summary>
private int _bufferPtr = 0;
/// <summary>
/// Represents the number of available bytes
/// </summary>
private int _available = -1;
/// <summary>
/// Create a new buffered stream
/// </summary>
public BufferedStream(Stream source)
{
_source = source;
}
/// <summary>
/// Read the next byte from the buffer, if possible
/// </summary>
public byte? ReadNextByte()
{
// Ensure the buffer first
if (!EnsureBuffer())
return null;
// Return the next available value
return _buffer[_bufferPtr++];
}
/// <summary>
/// Ensure the buffer has data to read
/// </summary>
private bool EnsureBuffer()
{
// Force an update if in the initial state
if (_available == -1)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// If the pointer is out of range
if (_bufferPtr >= _available)
{
_available = _source.Read(_buffer, 0, _buffer.Length);
_bufferPtr = 0;
return _available != 0;
}
// Otherwise, assume data is available
return true;
}
}
}