mirror of
https://github.com/SabreTools/SabreTools.IO.git
synced 2026-02-08 05:37:52 +00:00
Compare commits
32 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5054aeb077 | ||
|
|
d2e9b8d6e5 | ||
|
|
2c29aee834 | ||
|
|
576bafcb87 | ||
|
|
2b310ac528 | ||
|
|
4f6b6d7b59 | ||
|
|
17e55ee233 | ||
|
|
8b78906d1d | ||
|
|
cff2dcf4cc | ||
|
|
a56942cb73 | ||
|
|
5ed661b77c | ||
|
|
a0a0cd0386 | ||
|
|
bcc0fca4ad | ||
|
|
843e821e5f | ||
|
|
630b01283e | ||
|
|
22abb96013 | ||
|
|
314de12661 | ||
|
|
a0b24031b5 | ||
|
|
b4628485c3 | ||
|
|
4610ddc9b9 | ||
|
|
e392ddc8d7 | ||
|
|
1908d1b32e | ||
|
|
9d73195f86 | ||
|
|
335a486f17 | ||
|
|
d3e41ac187 | ||
|
|
8ddd9f3f78 | ||
|
|
54ad538c08 | ||
|
|
e6bc9ab3e3 | ||
|
|
94934b00a9 | ||
|
|
e49f56fccc | ||
|
|
79c64ddfa8 | ||
|
|
b22384d5f3 |
7
LICENSE
Normal file
7
LICENSE
Normal file
@@ -0,0 +1,7 @@
|
||||
Copyright (c) 2018-2025 Matt Nadareski
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
@@ -143,28 +143,24 @@ namespace SabreTools.IO.Test.Extensions
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_UTF8_Filled()
|
||||
public void ReadStringsFrom_Latin1Strings_Filled()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
.. Encoding.UTF8.GetBytes("TEST"),
|
||||
.. Encoding.Latin1.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO"),
|
||||
.. Encoding.Latin1.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA"),
|
||||
.. Encoding.Latin1.GetBytes("DATA"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -195,11 +191,11 @@ namespace SabreTools.IO.Test.Extensions
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("DATA1"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TEST2"),
|
||||
.. Encoding.Latin1.GetBytes("TEST2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO2"),
|
||||
.. Encoding.Latin1.GetBytes("TWO2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA2"),
|
||||
.. Encoding.Latin1.GetBytes("DATA2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TEST3"),
|
||||
.. new byte[] { 0x00 },
|
||||
@@ -210,11 +206,56 @@ namespace SabreTools.IO.Test.Extensions
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(5);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(10, actual.Count);
|
||||
Assert.Equal(6, actual.Count);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This test is here mainly for performance testing
|
||||
/// and should not be enabled unless there are changes
|
||||
/// to the core reading methods that need comparison.
|
||||
/// </summary>
|
||||
// [Fact]
|
||||
// public void ReadStringsFrom_Mixed_MASSIVE()
|
||||
// {
|
||||
// byte[]? arr =
|
||||
// [
|
||||
// .. Encoding.ASCII.GetBytes("TEST1"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.ASCII.GetBytes("TWO1"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.ASCII.GetBytes("DATA1"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.UTF8.GetBytes("TEST2"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.UTF8.GetBytes("TWO2"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.UTF8.GetBytes("DATA2"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.Unicode.GetBytes("TEST3"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.Unicode.GetBytes("TWO3"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// .. Encoding.Unicode.GetBytes("DATA3"),
|
||||
// .. new byte[] { 0x00 },
|
||||
// ];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// // arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
// // arr = [.. arr, .. arr, .. arr, .. arr];
|
||||
|
||||
// var actual = arr.ReadStringsFrom(5);
|
||||
// Assert.NotNull(actual);
|
||||
// Assert.NotEmpty(actual);
|
||||
// }
|
||||
|
||||
#endregion
|
||||
|
||||
#region ReadStringsWithEncoding
|
||||
@@ -272,6 +313,22 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidAsciiChars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
.. Enumerable.Range(0x80, 0x80).Select(i => (byte)i),
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.ASCII);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_Latin1_Filled()
|
||||
{
|
||||
@@ -290,6 +347,25 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidLatin1Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
|
||||
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
|
||||
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.Latin1);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_UTF8_Filled()
|
||||
{
|
||||
@@ -308,6 +384,24 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidUTF8Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
.. Enumerable.Range(0x80, 0x42).Select(i => (byte)i),
|
||||
0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC,
|
||||
0xFD, 0xFE, 0xFF,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF8);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_UTF16_Filled()
|
||||
{
|
||||
@@ -326,6 +420,21 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidUTF16Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.Unicode);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_UTF32_Filled()
|
||||
{
|
||||
@@ -344,6 +453,21 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsWithEncoding_InvalidUTF32Chars_Empty()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
||||
];
|
||||
var actual = arr.ReadStringsWithEncoding(1, Encoding.UTF32);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using SabreTools.IO.Extensions;
|
||||
using Xunit;
|
||||
|
||||
@@ -9,6 +10,22 @@ namespace SabreTools.IO.Test.Extensions
|
||||
{
|
||||
public class EnumerableExtensionsTests
|
||||
{
|
||||
#region IterateWithAction
|
||||
|
||||
[Fact]
|
||||
public void IterateWithActionTest()
|
||||
{
|
||||
List<int> source = [1, 2, 3, 4];
|
||||
int actual = 0;
|
||||
|
||||
source.IterateWithAction(i => Interlocked.Add(ref actual, i));
|
||||
Assert.Equal(10, actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region SafeEnumerate
|
||||
|
||||
[Fact]
|
||||
public void SafeEnumerate_Empty()
|
||||
{
|
||||
@@ -60,6 +77,8 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Assert.Equal(2, list.Count);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Fake enumerable that uses <see cref="ErrorEnumerator"/>
|
||||
/// </summary>
|
||||
|
||||
@@ -200,29 +200,25 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Stream? stream = new MemoryStream(bytes);
|
||||
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_UTF8_Filled()
|
||||
public void ReadStringsFrom_Latin1Strings_Filled()
|
||||
{
|
||||
byte[]? bytes =
|
||||
[
|
||||
.. Encoding.UTF8.GetBytes("TEST"),
|
||||
.. Encoding.Latin1.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO"),
|
||||
.. Encoding.Latin1.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA"),
|
||||
.. Encoding.Latin1.GetBytes("DATA"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
Stream? stream = new MemoryStream(bytes);
|
||||
var actual = stream.ReadStringsFrom(0, bytes.Length, 4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -254,11 +250,11 @@ namespace SabreTools.IO.Test.Extensions
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("DATA1"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TEST2"),
|
||||
.. Encoding.Latin1.GetBytes("TEST2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO2"),
|
||||
.. Encoding.Latin1.GetBytes("TWO2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA2"),
|
||||
.. Encoding.Latin1.GetBytes("DATA2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TEST3"),
|
||||
.. new byte[] { 0x00 },
|
||||
@@ -270,9 +266,7 @@ namespace SabreTools.IO.Test.Extensions
|
||||
Stream? stream = new MemoryStream(bytes);
|
||||
var actual = stream.ReadStringsFrom(0, bytes.Length, 5);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(10, actual.Count);
|
||||
Assert.Equal(6, actual.Count);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
76
SabreTools.IO.Test/Extensions/StringExtensionsTests.cs
Normal file
76
SabreTools.IO.Test/Extensions/StringExtensionsTests.cs
Normal file
@@ -0,0 +1,76 @@
|
||||
using SabreTools.IO.Extensions;
|
||||
using Xunit;
|
||||
|
||||
namespace SabreTools.IO.Test.Extensions
|
||||
{
|
||||
public class StringExtensionsTests
|
||||
{
|
||||
#region OptionalContains
|
||||
|
||||
[Theory]
|
||||
[InlineData(null, "ANY", false)]
|
||||
[InlineData("", "ANY", false)]
|
||||
[InlineData("ANY", "ANY", true)]
|
||||
[InlineData("ANYTHING", "ANY", true)]
|
||||
[InlineData("THING", "ANY", false)]
|
||||
[InlineData("THINGANY", "ANY", true)]
|
||||
public void OptionalContainsTest(string? haystack, string needle, bool expected)
|
||||
{
|
||||
bool actual = haystack.OptionalContains(needle);
|
||||
Assert.Equal(expected, actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region OptionalEndsWith
|
||||
|
||||
[Theory]
|
||||
[InlineData(null, "ANY", false)]
|
||||
[InlineData("", "ANY", false)]
|
||||
[InlineData("ANY", "ANY", true)]
|
||||
[InlineData("ANYTHING", "ANY", false)]
|
||||
[InlineData("THING", "ANY", false)]
|
||||
[InlineData("THINGANY", "ANY", true)]
|
||||
public void OptionalEndsWithTest(string? haystack, string needle, bool expected)
|
||||
{
|
||||
bool actual = haystack.OptionalEndsWith(needle);
|
||||
Assert.Equal(expected, actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region OptionalEquals
|
||||
|
||||
[Theory]
|
||||
[InlineData(null, "ANY", false)]
|
||||
[InlineData("", "ANY", false)]
|
||||
[InlineData("ANY", "ANY", true)]
|
||||
[InlineData("ANYTHING", "ANY", false)]
|
||||
[InlineData("THING", "ANY", false)]
|
||||
[InlineData("THINGANY", "ANY", false)]
|
||||
public void OptionalEqualsTest(string? haystack, string needle, bool expected)
|
||||
{
|
||||
bool actual = haystack.OptionalEquals(needle);
|
||||
Assert.Equal(expected, actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region OptionalStartsWith
|
||||
|
||||
[Theory]
|
||||
[InlineData(null, "ANY", false)]
|
||||
[InlineData("", "ANY", false)]
|
||||
[InlineData("ANY", "ANY", true)]
|
||||
[InlineData("ANYTHING", "ANY", true)]
|
||||
[InlineData("THING", "ANY", false)]
|
||||
[InlineData("THINGANY", "ANY", false)]
|
||||
public void OptionalStartsWithTest(string? haystack, string needle, bool expected)
|
||||
{
|
||||
bool actual = haystack.OptionalStartsWith(needle);
|
||||
Assert.Equal(expected, actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
40
SabreTools.IO.Test/Streams/BufferedStreamTests.cs
Normal file
40
SabreTools.IO.Test/Streams/BufferedStreamTests.cs
Normal file
@@ -0,0 +1,40 @@
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
namespace SabreTools.IO.Test.Streams
|
||||
{
|
||||
public class BufferedStreamTests
|
||||
{
|
||||
#region ReadNextByte
|
||||
|
||||
[Fact]
|
||||
public void ReadNextByte_Empty_Null()
|
||||
{
|
||||
var source = new MemoryStream();
|
||||
var stream = new IO.Streams.BufferedStream(source);
|
||||
byte? actual = stream.ReadNextByte();
|
||||
Assert.Null(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadNextByte_Filled_ValidPosition_Byte()
|
||||
{
|
||||
var source = new MemoryStream(new byte[1024]);
|
||||
var stream = new IO.Streams.BufferedStream(source);
|
||||
byte? actual = stream.ReadNextByte();
|
||||
Assert.Equal((byte)0x00, actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadNextByte_Filled_InvalidPosition_Null()
|
||||
{
|
||||
var source = new MemoryStream(new byte[1024]);
|
||||
source.Seek(0, SeekOrigin.End);
|
||||
var stream = new IO.Streams.BufferedStream(source);
|
||||
byte? actual = stream.ReadNextByte();
|
||||
Assert.Null(actual);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
@@ -3,8 +3,6 @@ using System.IO;
|
||||
using System.Text;
|
||||
using SabreTools.Hashing;
|
||||
using SabreTools.IO.Extensions;
|
||||
using SabreTools.Models.PKZIP;
|
||||
using static SabreTools.Models.PKZIP.Constants;
|
||||
|
||||
namespace SabreTools.IO.Compression.Deflate
|
||||
{
|
||||
@@ -20,6 +18,46 @@ namespace SabreTools.IO.Compression.Deflate
|
||||
/// </summary>
|
||||
private const int BufferSize = 1024 * 1024;
|
||||
|
||||
/// <summary>
|
||||
/// Local file header signature
|
||||
/// </summary>
|
||||
private const uint LocalFileHeaderSignature = 0x04034B50;
|
||||
|
||||
#endregion
|
||||
|
||||
#region Private Classes
|
||||
|
||||
/// <summary>
|
||||
/// Minimal PKZIP local file header information
|
||||
/// </summary>
|
||||
private class MinLocalFileHeader
|
||||
{
|
||||
/// <summary>
|
||||
/// Signature (0x04034B50)
|
||||
/// </summary>
|
||||
public uint Signature { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// CRC-32
|
||||
/// </summary>
|
||||
public uint CRC32 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Compressed size
|
||||
/// </summary>
|
||||
public uint CompressedSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Uncompressed size
|
||||
/// </summary>
|
||||
public uint UncompressedSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// File name (variable size)
|
||||
/// </summary>
|
||||
public string? FileName { get; set; }
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Extraction
|
||||
@@ -140,7 +178,7 @@ namespace SabreTools.IO.Compression.Deflate
|
||||
long current = source.Position;
|
||||
|
||||
// Parse the PKZIP header, if it exists
|
||||
LocalFileHeader? zipHeader = ParseLocalFileHeader(source);
|
||||
MinLocalFileHeader? zipHeader = ParseLocalFileHeader(source);
|
||||
long zipHeaderBytes = source.Position - current;
|
||||
|
||||
// Always trust the PKZIP CRC-32 value over what is supplied
|
||||
@@ -269,46 +307,39 @@ namespace SabreTools.IO.Compression.Deflate
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse a Stream into a local file header
|
||||
/// Parse a Stream into a minimal local file header
|
||||
/// </summary>
|
||||
/// <param name="data">Stream to parse</param>
|
||||
/// <returns>Filled local file header on success, null on error</returns>
|
||||
/// <remarks>Mirror of method in Serialization</remarks>
|
||||
private static LocalFileHeader? ParseLocalFileHeader(Stream data)
|
||||
/// <returns>Filled minimal local file header on success, null on error</returns>
|
||||
/// <remarks>Partial mirror of method in Serialization</remarks>
|
||||
private static MinLocalFileHeader? ParseLocalFileHeader(Stream data)
|
||||
{
|
||||
var header = new LocalFileHeader();
|
||||
var header = new MinLocalFileHeader();
|
||||
|
||||
header.Signature = data.ReadUInt32LittleEndian();
|
||||
if (header.Signature != LocalFileHeaderSignature)
|
||||
return null;
|
||||
|
||||
header.Version = data.ReadUInt16LittleEndian();
|
||||
header.Flags = (GeneralPurposeBitFlags)data.ReadUInt16LittleEndian();
|
||||
header.CompressionMethod = (CompressionMethod)data.ReadUInt16LittleEndian();
|
||||
header.LastModifedFileTime = data.ReadUInt16LittleEndian();
|
||||
header.LastModifiedFileDate = data.ReadUInt16LittleEndian();
|
||||
_ = data.ReadUInt16LittleEndian(); // Version
|
||||
_ = data.ReadUInt16LittleEndian(); // Flags
|
||||
_ = data.ReadUInt16LittleEndian(); // CompressionMethod
|
||||
_ = data.ReadUInt16LittleEndian(); // LastModifedFileTime
|
||||
_ = data.ReadUInt16LittleEndian(); // LastModifiedFileDate
|
||||
header.CRC32 = data.ReadUInt32LittleEndian();
|
||||
header.CompressedSize = data.ReadUInt32LittleEndian();
|
||||
header.UncompressedSize = data.ReadUInt32LittleEndian();
|
||||
header.FileNameLength = data.ReadUInt16LittleEndian();
|
||||
header.ExtraFieldLength = data.ReadUInt16LittleEndian();
|
||||
ushort fileNameLength = data.ReadUInt16LittleEndian();
|
||||
ushort extraFieldLength = data.ReadUInt16LittleEndian();
|
||||
|
||||
if (header.FileNameLength > 0 && data.Position + header.FileNameLength <= data.Length)
|
||||
if (fileNameLength > 0 && data.Position + fileNameLength <= data.Length)
|
||||
{
|
||||
byte[] filenameBytes = data.ReadBytes(header.FileNameLength);
|
||||
if (filenameBytes.Length != header.FileNameLength)
|
||||
return null;
|
||||
|
||||
byte[] filenameBytes = data.ReadBytes(fileNameLength);
|
||||
header.FileName = Encoding.ASCII.GetString(filenameBytes);
|
||||
}
|
||||
|
||||
// Parsing extras is skipped here, unlike in Serialization
|
||||
if (header.ExtraFieldLength > 0 && data.Position + header.ExtraFieldLength <= data.Length)
|
||||
{
|
||||
byte[] extraBytes = data.ReadBytes(header.ExtraFieldLength);
|
||||
if (extraBytes.Length != header.ExtraFieldLength)
|
||||
return null;
|
||||
}
|
||||
if (extraFieldLength > 0 && data.Position + extraFieldLength <= data.Length)
|
||||
_ = data.ReadBytes(extraFieldLength);
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
58
SabreTools.IO/Compression/LZX/AlignedOffsetBlockData.cs
Normal file
58
SabreTools.IO/Compression/LZX/AlignedOffsetBlockData.cs
Normal file
@@ -0,0 +1,58 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// An aligned offset block is identical to the verbatim block except for the presence of the aligned offset
|
||||
/// tree preceding the other trees.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class AlignedOffsetBlockData : BlockData
|
||||
{
|
||||
/// <summary>
|
||||
/// Aligned offset tree
|
||||
/// </summary>
|
||||
/// <remarks>8 elements, 3 bits each</remarks>
|
||||
public byte[]? AlignedOffsetTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for remainder of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of remaining elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for length tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of elements in length tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Token sequence (matches and literals)
|
||||
/// </summary>
|
||||
/// <remarks>Variable</remarks>
|
||||
public byte[]? TokenSequence { get; set; }
|
||||
}
|
||||
}
|
||||
24
SabreTools.IO/Compression/LZX/Block.cs
Normal file
24
SabreTools.IO/Compression/LZX/Block.cs
Normal file
@@ -0,0 +1,24 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// An LZXD block represents a sequence of compressed data that is encoded with the same set of
|
||||
/// Huffman trees, or a sequence of uncompressed data. There can be one or more LZXD blocks in a
|
||||
/// compressed stream, each with its own set of Huffman trees. Blocks do not have to start or end on a
|
||||
/// chunk boundary; blocks can span multiple chunks, or a single chunk can contain multiple blocks. The
|
||||
/// number of chunks is related to the size of the data being compressed, while the number of blocks is
|
||||
/// related to how well the data is compressed.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class Block
|
||||
{
|
||||
/// <summary>
|
||||
/// Block header
|
||||
/// </summary>
|
||||
public BlockHeader? Header { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block data
|
||||
/// </summary>
|
||||
public BlockData? BlockData { get; set; }
|
||||
}
|
||||
}
|
||||
8
SabreTools.IO/Compression/LZX/BlockData.cs
Normal file
8
SabreTools.IO/Compression/LZX/BlockData.cs
Normal file
@@ -0,0 +1,8 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal abstract class BlockData
|
||||
{
|
||||
// No common fields between all block data
|
||||
}
|
||||
}
|
||||
33
SabreTools.IO/Compression/LZX/BlockHeader.cs
Normal file
33
SabreTools.IO/Compression/LZX/BlockHeader.cs
Normal file
@@ -0,0 +1,33 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The Block Type field, as specified in section 2.3.1.1, indicates which type of block follows,
|
||||
/// and the Block Size field, as specified in section 2.3.1.2, indicates the number of
|
||||
/// uncompressed bytes represented by the block. Following the generic block
|
||||
/// header is a type-specific header that describes the remainder of the block.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class BlockHeader
|
||||
{
|
||||
/// <remarks>3 bits</remarks>
|
||||
public BlockType BlockType { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the high 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlockSizeMSB { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the middle 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlockSizeByte2 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block size is the low 8 bits of 24
|
||||
/// </summary>
|
||||
/// <remarks>8 bits</remarks>
|
||||
public byte BlocksizeLSB { get; set; }
|
||||
}
|
||||
}
|
||||
25
SabreTools.IO/Compression/LZX/Chunk.cs
Normal file
25
SabreTools.IO/Compression/LZX/Chunk.cs
Normal file
@@ -0,0 +1,25 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
|
||||
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
|
||||
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
|
||||
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
|
||||
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
|
||||
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
|
||||
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
|
||||
/// incompressible when the chunk is not the last one.
|
||||
/// </summary>
|
||||
internal class Chunk
|
||||
{
|
||||
/// <summary>
|
||||
/// Chunk header
|
||||
/// </summary>
|
||||
public ChunkHeader? Header { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Block headers and data
|
||||
/// </summary>
|
||||
public Block[]? Blocks { get; set; }
|
||||
}
|
||||
}
|
||||
46
SabreTools.IO/Compression/LZX/ChunkHeader.cs
Normal file
46
SabreTools.IO/Compression/LZX/ChunkHeader.cs
Normal file
@@ -0,0 +1,46 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The LZXD compressor emits chunks of compressed data. A chunk represents exactly 32 KB of
|
||||
/// uncompressed data until the last chunk in the stream, which can represent less than 32 KB. To
|
||||
/// ensure that an exact number of input bytes represent an exact number of output bytes for each
|
||||
/// chunk, after each 32 KB of uncompressed data is represented in the output compressed bitstream, the
|
||||
/// output bitstream is padded with up to 15 bits of zeros to realign the bitstream on a 16-bit boundary
|
||||
/// (even byte boundary) for the next 32 KB of data. This results in a compressed chunk of a byte-aligned
|
||||
/// size. The compressed chunk could be smaller than 32 KB or larger than 32 KB if the data is
|
||||
/// incompressible when the chunk is not the last one.
|
||||
/// </summary>
|
||||
internal class ChunkHeader
|
||||
{
|
||||
/// <summary>
|
||||
/// The LZXD engine encodes a compressed, chunk-size prefix field preceding each compressed chunk in
|
||||
/// the compressed byte stream. The compressed, chunk-size prefix field is a byte aligned, little-endian,
|
||||
/// 16-bit field. The chunk prefix chain could be followed in the compressed stream without
|
||||
/// decompressing any data. The next chunk prefix is at a location computed by the absolute byte offset
|
||||
/// location of this chunk prefix plus 2 (for the size of the chunk-size prefix field) plus the current chunk
|
||||
/// size.
|
||||
/// </summary>
|
||||
public ushort ChunkSize { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The first bit in the first chunk in the LZXD bitstream (following the 2-byte, chunk-size prefix described
|
||||
/// in section 2.2.1) indicates the presence or absence of two 16-bit fields immediately following the
|
||||
/// single bit. If the bit is set, E8 translation is enabled for all the following chunks in the stream using the
|
||||
/// 32-bit value derived from the two 16-bit fields as the E8_file_size provided to the compressor when E8
|
||||
/// translation was enabled. Note that E8_file_size is completely independent of the length of the
|
||||
/// uncompressed data. E8 call translation is disabled after the 32,768th chunk (after 1 gigabyte (GB) of
|
||||
/// uncompressed data).
|
||||
/// </summary>
|
||||
public byte E8Translation { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// E8 translation size, high WORD
|
||||
/// </summary>
|
||||
public ushort? TranslationSizeHighWord { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// E8 translation size, low WORD
|
||||
/// </summary>
|
||||
public ushort? TranslationSizeLowWord { get; set; }
|
||||
}
|
||||
}
|
||||
38
SabreTools.IO/Compression/LZX/Constants.cs
Normal file
38
SabreTools.IO/Compression/LZX/Constants.cs
Normal file
@@ -0,0 +1,38 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
internal static class Constants
|
||||
{
|
||||
/* some constants defined by the LZX specification */
|
||||
public const int LZX_MIN_MATCH = 2;
|
||||
public const int LZX_MAX_MATCH = 257;
|
||||
public const int LZX_NUM_CHARS = 256;
|
||||
public const int LZX_PRETREE_NUM_ELEMENTS = 20;
|
||||
|
||||
/// <summary>
|
||||
/// aligned offset tree #elements
|
||||
/// </summary>
|
||||
public const int LZX_ALIGNED_NUM_ELEMENTS = 8;
|
||||
|
||||
/// <summary>
|
||||
/// this one missing from spec!
|
||||
/// </summary>
|
||||
public const int LZX_NUM_PRIMARY_LENGTHS = 7;
|
||||
|
||||
/// <summary>
|
||||
/// length tree #elements
|
||||
/// </summary>
|
||||
public const int LZX_NUM_SECONDARY_LENGTHS = 249;
|
||||
|
||||
/* LZX huffman defines: tweak tablebits as desired */
|
||||
public const int LZX_PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS;
|
||||
public const int LZX_PRETREE_TABLEBITS = 6;
|
||||
public const int LZX_MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50 * 8;
|
||||
public const int LZX_MAINTREE_TABLEBITS = 12;
|
||||
public const int LZX_LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS + 1;
|
||||
public const int LZX_LENGTH_TABLEBITS = 12;
|
||||
public const int LZX_ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS;
|
||||
public const int LZX_ALIGNED_TABLEBITS = 7;
|
||||
|
||||
public const int LZX_LENTABLE_SAFETY = 64; /* we allow length table decoding overruns */
|
||||
}
|
||||
}
|
||||
48
SabreTools.IO/Compression/LZX/Enums.cs
Normal file
48
SabreTools.IO/Compression/LZX/Enums.cs
Normal file
@@ -0,0 +1,48 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// 3-bit block type
|
||||
/// </summary>
|
||||
internal enum BlockType : byte
|
||||
{
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_0 = 0b000,
|
||||
|
||||
/// <summary>
|
||||
/// Verbatim block
|
||||
/// </summary>
|
||||
Verbatim = 0b001,
|
||||
|
||||
/// <summary>
|
||||
/// Aligned offset block
|
||||
/// </summary>
|
||||
AlignedOffset = 0b010,
|
||||
|
||||
/// <summary>
|
||||
/// Uncompressed block
|
||||
/// </summary>
|
||||
Uncompressed = 0b011,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_4 = 0b100,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_5 = 0b101,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_6 = 0b110,
|
||||
|
||||
/// <summary>
|
||||
/// Not valid
|
||||
/// </summary>
|
||||
INVALID_7 = 0b111,
|
||||
}
|
||||
}
|
||||
54
SabreTools.IO/Compression/LZX/UncompressedBlockData.cs
Normal file
54
SabreTools.IO/Compression/LZX/UncompressedBlockData.cs
Normal file
@@ -0,0 +1,54 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// Following the generic block header, an uncompressed block begins with 1 to 16 bits of zero padding
|
||||
/// to align the bit buffer on a 16-bit boundary. At this point, the bitstream ends and a byte stream
|
||||
/// begins. Following the zero padding, new 32-bit values for R0, R1, and R2 are output in little-endian
|
||||
/// form, followed by the uncompressed data bytes themselves. Finally, if the uncompressed data length
|
||||
/// is odd, one extra byte of zero padding is encoded to realign the following bitstream.
|
||||
///
|
||||
/// Then the bitstream of byte-swapped 16-bit integers resumes for the next Block Type field (if there
|
||||
/// are subsequent blocks).
|
||||
///
|
||||
/// The decoded R0, R1, and R2 values are used as initial repeated offset values to decode the
|
||||
/// subsequent compressed block if present.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class UncompressedBlockData : BlockData
|
||||
{
|
||||
/// <summary>
|
||||
/// Padding to align following field on 16-bit boundary
|
||||
/// </summary>
|
||||
/// <remarks>Bits have a value of zero</remarks>
|
||||
public ushort PaddingBits { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD ([MS-DTYP]))
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R0 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD)
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R1 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Least significant to most significant byte (little-endian DWORD)
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public uint R2 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Can use the direct memcpy function, as specified in [IEEE1003.1]
|
||||
/// </summary>
|
||||
/// <remarks>Encoded directly in the byte stream, not in the bitstream of byte-swapped 16-bit words</remarks>
|
||||
public byte[]? RawDataBytes { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Only if uncompressed size is odd
|
||||
/// </summary>
|
||||
public byte AlignmentByte { get; set; }
|
||||
}
|
||||
}
|
||||
51
SabreTools.IO/Compression/LZX/VerbatimBlockData.cs
Normal file
51
SabreTools.IO/Compression/LZX/VerbatimBlockData.cs
Normal file
@@ -0,0 +1,51 @@
|
||||
namespace SabreTools.IO.Compression.LZX
|
||||
{
|
||||
/// <summary>
|
||||
/// The fields of a verbatim block that follow the generic block header
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-PATCH/%5bMS-PATCH%5d.pdf"/>
|
||||
internal class VerbatimBlockData : BlockData
|
||||
{
|
||||
/// <summary>
|
||||
/// Pretree for first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of first 256 elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsFirst256 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for remainder of main tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of remaining elements of main tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsRemainder { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Pretree for length tree
|
||||
/// </summary>
|
||||
/// <remarks>20 elements, 4 bits each</remarks>
|
||||
public byte[]? PretreeLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Path lengths of elements in length tree
|
||||
/// </summary>
|
||||
/// <remarks>Encoded using pretree</remarks>
|
||||
public int[]? PathLengthsLengthTree { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Token sequence (matches and literals)
|
||||
/// </summary>
|
||||
/// <remarks>Variable</remarks>
|
||||
public byte[]? TokenSequence { get; set; }
|
||||
}
|
||||
}
|
||||
28
SabreTools.IO/Compression/MSZIP/BlockHeader.cs
Normal file
28
SabreTools.IO/Compression/MSZIP/BlockHeader.cs
Normal file
@@ -0,0 +1,28 @@
|
||||
namespace SabreTools.IO.Compression.MSZIP
|
||||
{
|
||||
/// <summary>
|
||||
/// Each MSZIP block MUST consist of a 2-byte MSZIP signature and one or more RFC 1951 blocks. The
|
||||
/// 2-byte MSZIP signature MUST consist of the bytes 0x43 and 0x4B. The MSZIP signature MUST be
|
||||
/// the first 2 bytes in the MSZIP block. The MSZIP signature is shown in the following packet diagram.
|
||||
///
|
||||
/// Each MSZIP block is the result of a single deflate compression operation, as defined in [RFC1951].
|
||||
/// The compressor that performs the compression operation MUST generate one or more RFC 1951
|
||||
/// blocks, as defined in [RFC1951]. The number, deflation mode, and type of RFC 1951 blocks in each
|
||||
/// MSZIP block is determined by the compressor, as defined in [RFC1951]. The last RFC 1951 block in
|
||||
/// each MSZIP block MUST be marked as the "end" of the stream(1), as defined by [RFC1951]
|
||||
/// section 3.2.3. Decoding trees MUST be discarded after each RFC 1951 block, but the history buffer
|
||||
/// MUST be maintained.Each MSZIP block MUST represent no more than 32 KB of uncompressed data.
|
||||
///
|
||||
/// The maximum compressed size of each MSZIP block is 32 KB + 12 bytes. This enables the MSZIP
|
||||
/// block to contain 32 KB of data split between two noncompressed RFC 1951 blocks, each of which
|
||||
/// has a value of BTYPE = 00.
|
||||
/// </summary>
|
||||
/// <see href="https://interoperability.blob.core.windows.net/files/MS-MCI/%5bMS-MCI%5d.pdf"/>
|
||||
internal class BlockHeader
|
||||
{
|
||||
/// <summary>
|
||||
/// 'CK'
|
||||
/// </summary>
|
||||
public ushort Signature { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using SabreTools.IO.Extensions;
|
||||
using SabreTools.Models.Compression.MSZIP;
|
||||
|
||||
namespace SabreTools.IO.Compression.MSZIP
|
||||
{
|
||||
|
||||
50
SabreTools.IO/Compression/Quantum/Constants.cs
Normal file
50
SabreTools.IO/Compression/Quantum/Constants.cs
Normal file
@@ -0,0 +1,50 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
/// <see href="http://www.russotto.net/quantumcomp.html"/>
|
||||
internal static class Constants
|
||||
{
|
||||
public static readonly int[] PositionSlot =
|
||||
[
|
||||
0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c,
|
||||
0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0,
|
||||
0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00,
|
||||
0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000,
|
||||
0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000,
|
||||
0x100000, 0x180000
|
||||
];
|
||||
|
||||
public static readonly int[] PositionExtraBits =
|
||||
[
|
||||
0, 0, 0, 0, 1, 1, 2, 2,
|
||||
3, 3, 4, 4, 5, 5, 6, 6,
|
||||
7, 7, 8, 8, 9, 9, 10, 10,
|
||||
11, 11, 12, 12, 13, 13, 14, 14,
|
||||
15, 15, 16, 16, 17, 17, 18, 18,
|
||||
19, 19
|
||||
];
|
||||
|
||||
public static readonly int[] LengthSlot =
|
||||
[
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
|
||||
0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26,
|
||||
0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e,
|
||||
0xbe, 0xde, 0xfe
|
||||
];
|
||||
|
||||
public static readonly int[] LengthExtraBits =
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 1, 1,
|
||||
1, 1, 2, 2, 2, 2, 3, 3,
|
||||
3, 3, 4, 4, 4, 4, 5, 5,
|
||||
5, 5, 0
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Number of position slots for (tsize - 10)
|
||||
/// </summary>
|
||||
public static readonly int[] NumPositionSlots =
|
||||
[
|
||||
20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -2,8 +2,7 @@ using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using SabreTools.IO.Streams;
|
||||
using SabreTools.Models.Compression.Quantum;
|
||||
using static SabreTools.Models.Compression.Quantum.Constants;
|
||||
using static SabreTools.IO.Compression.Quantum.Constants;
|
||||
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
|
||||
45
SabreTools.IO/Compression/Quantum/Enums.cs
Normal file
45
SabreTools.IO/Compression/Quantum/Enums.cs
Normal file
@@ -0,0 +1,45 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
internal enum SelectorModel
|
||||
{
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_0 = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 64
|
||||
/// </summary>
|
||||
SELECTOR_1 = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 128
|
||||
/// </summary>
|
||||
SELECTOR_2 = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Literal model, 64 entries, start at symbol 192
|
||||
/// </summary>
|
||||
SELECTOR_3 = 3,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 3 character matches, max 24 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_4 = 4,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 4 character matches, max 36 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_5 = 5,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 5+ character matches, max 42 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_6_POSITION = 6,
|
||||
|
||||
/// <summary>
|
||||
/// LZ model, 5+ character matches, 27 entries, start at symbol 0
|
||||
/// </summary>
|
||||
SELECTOR_6_LENGTH = 7,
|
||||
}
|
||||
}
|
||||
24
SabreTools.IO/Compression/Quantum/Model.cs
Normal file
24
SabreTools.IO/Compression/Quantum/Model.cs
Normal file
@@ -0,0 +1,24 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
/// <see href="http://www.russotto.net/quantumcomp.html"/>
|
||||
internal sealed class Model
|
||||
{
|
||||
public int Entries { get; set; }
|
||||
|
||||
/// <remarks>
|
||||
/// All the models are initialized with the symbols in symbol
|
||||
/// order in the table, and with every symbol in the table
|
||||
/// having a frequency of 1
|
||||
/// </remarks>
|
||||
public ModelSymbol[]? Symbols { get; set; }
|
||||
|
||||
/// <remarks>
|
||||
/// The initial total frequency is equal to the number of entries
|
||||
/// in the table
|
||||
/// </remarks>
|
||||
public int TotalFrequency { get; set; }
|
||||
|
||||
/// <remarks>The initial time_to_reorder value is 4</remarks>
|
||||
public int TimeToReorder { get; set; }
|
||||
}
|
||||
}
|
||||
15
SabreTools.IO/Compression/Quantum/ModelSymbol.cs
Normal file
15
SabreTools.IO/Compression/Quantum/ModelSymbol.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
namespace SabreTools.IO.Compression.Quantum
|
||||
{
|
||||
/// <see href="http://www.russotto.net/quantumcomp.html"/>
|
||||
internal sealed class ModelSymbol
|
||||
{
|
||||
public ushort Symbol { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The cumulative frequency is the frequency of all the symbols
|
||||
/// which are at a higher index in the table than that symbol —
|
||||
/// thus the last entry in the table has a cumulative frequency of 0.
|
||||
/// </summary>
|
||||
public ushort CumulativeFrequency { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using SabreTools.Models.LZ;
|
||||
|
||||
namespace SabreTools.IO.Compression.SZDD
|
||||
{
|
||||
@@ -15,7 +14,7 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
/// <summary>
|
||||
/// Source stream for the decompressor
|
||||
/// </summary>
|
||||
private readonly BufferedStream _source;
|
||||
private readonly Streams.BufferedStream _source;
|
||||
|
||||
/// <summary>
|
||||
/// SZDD format being decompressed
|
||||
@@ -37,19 +36,19 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
|
||||
// Initialize the window with space characters
|
||||
_window = Array.ConvertAll(_window, b => (byte)0x20);
|
||||
_source = new BufferedStream(source);
|
||||
_source = new Streams.BufferedStream(source);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create a KWAJ decompressor
|
||||
/// </summary>
|
||||
public static Decompressor CreateKWAJ(byte[] source, KWAJCompressionType compressionType)
|
||||
public static Decompressor CreateKWAJ(byte[] source, ushort compressionType)
|
||||
=> CreateKWAJ(new MemoryStream(source), compressionType);
|
||||
|
||||
/// <summary>
|
||||
/// Create a KWAJ decompressor
|
||||
/// </summary>
|
||||
public static Decompressor CreateKWAJ(Stream source, KWAJCompressionType compressionType)
|
||||
public static Decompressor CreateKWAJ(Stream source, ushort compressionType)
|
||||
{
|
||||
// Create the decompressor
|
||||
var decompressor = new Decompressor(source);
|
||||
@@ -57,11 +56,11 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
// Set the format and return
|
||||
decompressor._format = compressionType switch
|
||||
{
|
||||
KWAJCompressionType.NoCompression => Format.KWAJNoCompression,
|
||||
KWAJCompressionType.NoCompressionXor => Format.KWAJXor,
|
||||
KWAJCompressionType.QBasic => Format.KWAJQBasic,
|
||||
KWAJCompressionType.LZH => Format.KWAJLZH,
|
||||
KWAJCompressionType.MSZIP => Format.KWAJMSZIP,
|
||||
0x0000 => Format.KWAJNoCompression,
|
||||
0x0001 => Format.KWAJXor,
|
||||
0x0002 => Format.KWAJQBasic,
|
||||
0x0003 => Format.KWAJLZH,
|
||||
0x0004 => Format.KWAJMSZIP,
|
||||
_ => throw new IndexOutOfRangeException(nameof(source)),
|
||||
};
|
||||
return decompressor;
|
||||
@@ -229,77 +228,5 @@ namespace SabreTools.IO.Compression.SZDD
|
||||
dest.Flush();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Buffered stream that reads in blocks
|
||||
/// </summary>
|
||||
private class BufferedStream
|
||||
{
|
||||
/// <summary>
|
||||
/// Source stream for populating the buffer
|
||||
/// </summary>
|
||||
private readonly Stream _source;
|
||||
|
||||
/// <summary>
|
||||
/// Internal buffer to read
|
||||
/// </summary>
|
||||
private readonly byte[] _buffer = new byte[2048];
|
||||
|
||||
/// <summary>
|
||||
/// Current pointer into the buffer
|
||||
/// </summary>
|
||||
private int _bufferPtr = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the number of available bytes
|
||||
/// </summary>
|
||||
private int _available = -1;
|
||||
|
||||
/// <summary>
|
||||
/// Create a new buffered stream
|
||||
/// </summary>
|
||||
public BufferedStream(Stream source)
|
||||
{
|
||||
_source = source;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the next byte from the buffer, if possible
|
||||
/// </summary>
|
||||
public byte? ReadNextByte()
|
||||
{
|
||||
// Ensure the buffer first
|
||||
if (!EnsureBuffer())
|
||||
return null;
|
||||
|
||||
// Return the next available value
|
||||
return _buffer[_bufferPtr++];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ensure the buffer has data to read
|
||||
/// </summary>
|
||||
private bool EnsureBuffer()
|
||||
{
|
||||
// Force an update if in the initial state
|
||||
if (_available == -1)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// If the pointer is out of range
|
||||
if (_bufferPtr >= _available)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// Otherwise, assume data is available
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ using System;
|
||||
using System.IO;
|
||||
using SabreTools.Hashing;
|
||||
using SabreTools.Matching;
|
||||
using static SabreTools.Models.MoPaQ.Constants;
|
||||
|
||||
namespace SabreTools.IO.Encryption
|
||||
{
|
||||
@@ -11,6 +10,14 @@ namespace SabreTools.IO.Encryption
|
||||
/// </summary>
|
||||
public class MoPaQDecrypter
|
||||
{
|
||||
#region Constants
|
||||
|
||||
private const uint MPQ_HASH_KEY2_MIX = 0x400;
|
||||
|
||||
private const uint STORM_BUFFER_SIZE = 0x500;
|
||||
|
||||
#endregion
|
||||
|
||||
#region Private Instance Variables
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -59,33 +59,31 @@ namespace SabreTools.IO.Extensions
|
||||
/// </summary>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
|
||||
/// <returns>String list containing the requested data, null on error</returns>
|
||||
/// <remarks>A maximum of 16KiB of data can be scanned at a time</remarks>
|
||||
#if NET5_0_OR_GREATER
|
||||
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
|
||||
#else
|
||||
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
|
||||
#endif
|
||||
public static List<string>? ReadStringsFrom(this byte[]? input, int charLimit = 5)
|
||||
{
|
||||
// Validate the data
|
||||
if (input == null || input.Length == 0)
|
||||
return null;
|
||||
|
||||
// Limit to 16KiB of data
|
||||
if (input.Length > 16384)
|
||||
{
|
||||
int offset = 0;
|
||||
input = input.ReadBytes(ref offset, 16384);
|
||||
}
|
||||
|
||||
#if NET5_0_OR_GREATER
|
||||
// Check for Latin1 strings
|
||||
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Latin1);
|
||||
#else
|
||||
// Check for ASCII strings
|
||||
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
|
||||
|
||||
// Check for UTF-8 strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var utf8Strings = input.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
|
||||
#endif
|
||||
|
||||
// Check for Unicode strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var unicodeStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
|
||||
|
||||
// Ignore duplicate strings across encodings
|
||||
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
|
||||
List<string> sourceStrings = [.. asciiStrings, .. unicodeStrings];
|
||||
|
||||
// Sort the strings and return
|
||||
sourceStrings.Sort();
|
||||
@@ -99,11 +97,7 @@ namespace SabreTools.IO.Extensions
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string</param>
|
||||
/// <param name="encoding">Character encoding to use for checking</param>
|
||||
/// <returns>String list containing the requested data, empty on error</returns>
|
||||
/// <remarks>
|
||||
/// This method has a couple of notable implementation details:
|
||||
/// - Strings can only have a maximum of 64 characters
|
||||
/// - Characters that fall outside of the extended ASCII set will be unused
|
||||
/// </remarks>
|
||||
/// <remarks>Characters with the higher bytes set are unused</remarks>
|
||||
#if NET20
|
||||
public static List<string> ReadStringsWithEncoding(this byte[]? bytes, int charLimit, Encoding encoding)
|
||||
#else
|
||||
@@ -115,6 +109,22 @@ namespace SabreTools.IO.Extensions
|
||||
if (charLimit <= 0 || charLimit > bytes.Length)
|
||||
return [];
|
||||
|
||||
// Short-circuit for some encoding types
|
||||
if (encoding.CodePage == Encoding.ASCII.CodePage)
|
||||
return bytes.ReadAsciiStrings(charLimit);
|
||||
#if NET5_0_OR_GREATER
|
||||
else if (encoding.CodePage == Encoding.Latin1.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Latin1, 1);
|
||||
#endif
|
||||
else if (encoding.IsSingleByte)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, encoding, 1);
|
||||
else if (encoding.CodePage == Encoding.Unicode.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.Unicode, 2);
|
||||
else if (encoding.CodePage == Encoding.BigEndianUnicode.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.BigEndianUnicode, 2);
|
||||
else if (encoding.CodePage == Encoding.UTF32.CodePage)
|
||||
return bytes.ReadFixedWidthEncodingStrings(charLimit, Encoding.UTF32, 4);
|
||||
|
||||
// Create the string set to return
|
||||
#if NET20
|
||||
var strings = new List<string>();
|
||||
@@ -137,7 +147,7 @@ namespace SabreTools.IO.Extensions
|
||||
char c = (char)reader.Read();
|
||||
|
||||
// If the character is invalid
|
||||
if (char.IsControl(c) || (c & 0xFF00) != 0)
|
||||
if (char.IsControl(c) || (c & 0xFFFFFF00) != 0)
|
||||
{
|
||||
// Seek to the end of the last found string
|
||||
string str = sb.ToString();
|
||||
@@ -145,6 +155,10 @@ namespace SabreTools.IO.Extensions
|
||||
ms.Seek(lastOffset, SeekOrigin.Begin);
|
||||
reader.DiscardBufferedData();
|
||||
|
||||
// If there is no cached string
|
||||
if (str.Length == 0)
|
||||
continue;
|
||||
|
||||
// Add the string if long enough
|
||||
if (str.Length >= charLimit)
|
||||
strings.Add(str);
|
||||
@@ -168,5 +182,143 @@ namespace SabreTools.IO.Extensions
|
||||
|
||||
return strings;
|
||||
}
|
||||
|
||||
#region Fixed Byte-Width Encoding Helpers
|
||||
|
||||
/// <summary>
|
||||
/// Read string data from a byte array using an encoding with a fixed width
|
||||
/// </summary>
|
||||
/// <param name="bytes">Byte array representing the source data</param>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string</param>
|
||||
/// <param name="encoding">Character encoding to use for checking</param>
|
||||
/// <param name="width">Character width of the encoding</param>
|
||||
/// <returns>String list containing the requested data, empty on error</returns>
|
||||
/// <remarks>Characters with the higher bytes set are unused</remarks>
|
||||
#if NET20
|
||||
private static List<string> ReadFixedWidthEncodingStrings(this byte[] bytes, int charLimit, Encoding encoding, int width)
|
||||
#else
|
||||
private static HashSet<string> ReadFixedWidthEncodingStrings(this byte[] bytes, int charLimit, Encoding encoding, int width)
|
||||
#endif
|
||||
{
|
||||
if (charLimit <= 0 || charLimit > bytes.Length)
|
||||
return [];
|
||||
|
||||
// Create the string set to return
|
||||
#if NET20
|
||||
var strings = new List<string>();
|
||||
#else
|
||||
var strings = new HashSet<string>();
|
||||
#endif
|
||||
|
||||
// Create a string builder for the loop
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Check for strings
|
||||
int offset = 0;
|
||||
while (offset <= bytes.Length - width)
|
||||
{
|
||||
// Read the next character from the stream
|
||||
char c = encoding.GetChars(bytes, offset, width)[0];
|
||||
offset += width;
|
||||
|
||||
// If the character is invalid
|
||||
if (char.IsControl(c) || (c & 0xFFFFFF00) != 0)
|
||||
{
|
||||
// Pretend only one byte was read
|
||||
offset -= width - 1;
|
||||
|
||||
// If there is no cached string
|
||||
if (sb.Length == 0)
|
||||
continue;
|
||||
|
||||
// Add the string if long enough
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
// Clear the builder and continue
|
||||
#if NET20 || NET35
|
||||
sb = new();
|
||||
#else
|
||||
sb.Clear();
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, add the character to the builder and continue
|
||||
sb.Append(c);
|
||||
}
|
||||
|
||||
// Handle any remaining data
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
return strings;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read string data from a byte array using ASCII encoding
|
||||
/// </summary>
|
||||
/// <param name="bytes">Byte array representing the source data</param>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string</param>
|
||||
/// <returns>String list containing the requested data, empty on error</returns>
|
||||
/// <remarks>Handling for 7-bit ASCII needs to be done differently than other fixed-width encodings</remarks>
|
||||
#if NET20
|
||||
private static List<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
|
||||
#else
|
||||
private static HashSet<string> ReadAsciiStrings(this byte[] bytes, int charLimit)
|
||||
#endif
|
||||
{
|
||||
if (charLimit <= 0 || charLimit > bytes.Length)
|
||||
return [];
|
||||
|
||||
// Create the string set to return
|
||||
#if NET20
|
||||
var strings = new List<string>();
|
||||
#else
|
||||
var strings = new HashSet<string>();
|
||||
#endif
|
||||
|
||||
// Create a string builder for the loop
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Check for strings
|
||||
int offset = 0;
|
||||
while (offset < bytes.Length)
|
||||
{
|
||||
// Read the next character from the stream
|
||||
char c = bytes.ReadChar(ref offset);
|
||||
|
||||
// If the character is invalid
|
||||
if (char.IsControl(c) || c > 0x7F)
|
||||
{
|
||||
// If there is no cached string
|
||||
if (sb.Length == 0)
|
||||
continue;
|
||||
|
||||
// Add the string if long enough
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
// Clear the builder and continue
|
||||
#if NET20 || NET35
|
||||
sb = new();
|
||||
#else
|
||||
sb.Clear();
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, add the character to the builder and continue
|
||||
sb.Append(c);
|
||||
}
|
||||
|
||||
// Handle any remaining data
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
return strings;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1065,6 +1065,10 @@ namespace SabreTools.IO.Extensions
|
||||
/// </summary>
|
||||
private static byte[] ReadExactlyToBuffer(byte[] content, ref int offset, int length)
|
||||
{
|
||||
// If we have an invalid offset
|
||||
if (offset < 0 || offset >= content.Length)
|
||||
throw new ArgumentOutOfRangeException($"{nameof(offset)} must be between 0 and {content.Length}, {offset} provided");
|
||||
|
||||
// If we have an invalid length
|
||||
if (length < 0)
|
||||
throw new ArgumentOutOfRangeException($"{nameof(length)} must be 0 or a positive value, {length} requested");
|
||||
|
||||
@@ -5,6 +5,25 @@ namespace SabreTools.IO.Extensions
|
||||
{
|
||||
public static class EnumerableExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Wrap iterating through an enumerable with an action
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// .NET Frameworks 2.0 and 3.5 process in series.
|
||||
/// .NET Frameworks 4.0 onward process in parallel.
|
||||
/// </remarks>
|
||||
public static void IterateWithAction<T>(this IEnumerable<T> source, Action<T> action)
|
||||
{
|
||||
#if NET20 || NET35
|
||||
foreach (var item in source)
|
||||
{
|
||||
action(item);
|
||||
}
|
||||
#else
|
||||
System.Threading.Tasks.Parallel.ForEach(source, action);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Safely iterate through an enumerable, skipping any errors
|
||||
/// </summary>
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace SabreTools.IO.Extensions
|
||||
{
|
||||
@@ -76,10 +75,15 @@ namespace SabreTools.IO.Extensions
|
||||
/// <summary>
|
||||
/// Read string data from a Stream
|
||||
/// </summary>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
|
||||
/// <param name="position">Position in the source to read from</param>
|
||||
/// <param name="length">Length of the requested data</param>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
|
||||
/// <returns>String list containing the requested data, null on error</returns>
|
||||
#if NET5_0_OR_GREATER
|
||||
/// <remarks>This reads both Latin1 and UTF-16 strings from the input data</remarks>
|
||||
#else
|
||||
/// <remarks>This reads both ASCII and UTF-16 strings from the input data</remarks>
|
||||
#endif
|
||||
public static List<string>? ReadStringsFrom(this Stream? input, int position, int length, int charLimit = 5)
|
||||
{
|
||||
// Read the data as a byte array first
|
||||
@@ -87,23 +91,7 @@ namespace SabreTools.IO.Extensions
|
||||
if (data == null)
|
||||
return null;
|
||||
|
||||
// Check for ASCII strings
|
||||
var asciiStrings = data.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
|
||||
|
||||
// Check for UTF-8 strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var utf8Strings = data.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
|
||||
|
||||
// Check for Unicode strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var unicodeStrings = data.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
|
||||
|
||||
// Ignore duplicate strings across encodings
|
||||
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
|
||||
|
||||
// Sort the strings and return
|
||||
sourceStrings.Sort();
|
||||
return sourceStrings;
|
||||
return data.ReadStringsFrom(charLimit);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
63
SabreTools.IO/Extensions/StringExtensions.cs
Normal file
63
SabreTools.IO/Extensions/StringExtensions.cs
Normal file
@@ -0,0 +1,63 @@
|
||||
using System;
|
||||
|
||||
namespace SabreTools.IO.Extensions
|
||||
{
|
||||
public static class StringExtensions
|
||||
{
|
||||
/// <inheritdoc cref="string.Contains(string)"/>
|
||||
public static bool OptionalContains(this string? self, string value)
|
||||
=> OptionalContains(self, value, StringComparison.Ordinal);
|
||||
|
||||
/// <inheritdoc cref="string.Contains(string, StringComparison)"/>
|
||||
public static bool OptionalContains(this string? self, string value, StringComparison comparisonType)
|
||||
{
|
||||
if (self == null)
|
||||
return false;
|
||||
|
||||
#if NETFRAMEWORK || NETSTANDARD2_0
|
||||
return self.Contains(value);
|
||||
#else
|
||||
return self.Contains(value, comparisonType);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <inheritdoc cref="string.EndsWith(string)"/>
|
||||
public static bool OptionalEndsWith(this string? self, string value)
|
||||
=> OptionalEndsWith(self, value, StringComparison.Ordinal);
|
||||
|
||||
/// <inheritdoc cref="string.EndsWith(string, StringComparison)"/>
|
||||
public static bool OptionalEndsWith(this string? self, string value, StringComparison comparisonType)
|
||||
{
|
||||
if (self == null)
|
||||
return false;
|
||||
|
||||
return self.EndsWith(value, comparisonType);
|
||||
}
|
||||
|
||||
/// <inheritdoc cref="string.Equals(string)"/>
|
||||
public static bool OptionalEquals(this string? self, string value)
|
||||
=> OptionalEquals(self, value, StringComparison.Ordinal);
|
||||
|
||||
/// <inheritdoc cref="string.Equals(string, StringComparison)"/>
|
||||
public static bool OptionalEquals(this string? self, string value, StringComparison comparisonType)
|
||||
{
|
||||
if (self == null)
|
||||
return false;
|
||||
|
||||
return self.Equals(value, comparisonType);
|
||||
}
|
||||
|
||||
/// <inheritdoc cref="string.StartsWith(string)"/>
|
||||
public static bool OptionalStartsWith(this string? self, string value)
|
||||
=> OptionalStartsWith(self, value, StringComparison.Ordinal);
|
||||
|
||||
/// <inheritdoc cref="string.StartsWith(string, StringComparison)"/>
|
||||
public static bool OptionalStartsWith(this string? self, string value, StringComparison comparisonType)
|
||||
{
|
||||
if (self == null)
|
||||
return false;
|
||||
|
||||
return self.StartsWith(value, comparisonType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -11,7 +11,7 @@
|
||||
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
|
||||
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<Version>1.7.2</Version>
|
||||
<Version>1.7.4</Version>
|
||||
|
||||
<!-- Package Properties -->
|
||||
<Authors>Matt Nadareski</Authors>
|
||||
@@ -31,7 +31,6 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="SabreTools.Matching" Version="1.6.0" />
|
||||
<PackageReference Include="SabreTools.Models" Version="1.7.1" />
|
||||
<PackageReference Include="SabreTools.Hashing" Version="1.5.0" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
77
SabreTools.IO/Streams/BufferedStream.cs
Normal file
77
SabreTools.IO/Streams/BufferedStream.cs
Normal file
@@ -0,0 +1,77 @@
|
||||
using System.IO;
|
||||
|
||||
namespace SabreTools.IO.Streams
|
||||
{
|
||||
/// <summary>
|
||||
/// Buffered stream that reads in blocks
|
||||
/// </summary>
|
||||
/// <remarks>Not a true <see cref="Stream"/> implementation yet</remarks>
|
||||
public class BufferedStream
|
||||
{
|
||||
/// <summary>
|
||||
/// Source stream for populating the buffer
|
||||
/// </summary>
|
||||
private readonly Stream _source;
|
||||
|
||||
/// <summary>
|
||||
/// Internal buffer to read
|
||||
/// </summary>
|
||||
private readonly byte[] _buffer = new byte[2048];
|
||||
|
||||
/// <summary>
|
||||
/// Current pointer into the buffer
|
||||
/// </summary>
|
||||
private int _bufferPtr = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the number of available bytes
|
||||
/// </summary>
|
||||
private int _available = -1;
|
||||
|
||||
/// <summary>
|
||||
/// Create a new buffered stream
|
||||
/// </summary>
|
||||
public BufferedStream(Stream source)
|
||||
{
|
||||
_source = source;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the next byte from the buffer, if possible
|
||||
/// </summary>
|
||||
public byte? ReadNextByte()
|
||||
{
|
||||
// Ensure the buffer first
|
||||
if (!EnsureBuffer())
|
||||
return null;
|
||||
|
||||
// Return the next available value
|
||||
return _buffer[_bufferPtr++];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ensure the buffer has data to read
|
||||
/// </summary>
|
||||
private bool EnsureBuffer()
|
||||
{
|
||||
// Force an update if in the initial state
|
||||
if (_available == -1)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// If the pointer is out of range
|
||||
if (_bufferPtr >= _available)
|
||||
{
|
||||
_available = _source.Read(_buffer, 0, _buffer.Length);
|
||||
_bufferPtr = 0;
|
||||
return _available != 0;
|
||||
}
|
||||
|
||||
// Otherwise, assume data is available
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user