10 Commits
1.7.1 ... 1.7.2

Author SHA1 Message Date
Matt Nadareski
955c1b5641 Bump version 2025-09-05 09:46:17 -04:00
Matt Nadareski
535f9f928d Update Models to 1.7.1 2025-09-05 09:21:15 -04:00
Matt Nadareski
f0cb15c2e4 Fix comments 2025-09-05 09:15:05 -04:00
Matt Nadareski
ec99304c51 Implement the 16KiB limit 2025-09-03 09:05:08 -04:00
Matt Nadareski
aefc931055 Of all things 2025-09-03 01:29:06 -04:00
Matt Nadareski
e7fe342379 Fix missed compatibility issue in string reading 2025-09-03 01:04:34 -04:00
Matt Nadareski
f372999b1b So that's why 2025-09-03 00:23:46 -04:00
Matt Nadareski
2679975945 TFM support thing 2025-09-03 00:22:46 -04:00
Matt Nadareski
54dd7f2f8f Add new extension tests 2025-09-03 00:20:02 -04:00
Matt Nadareski
aee5891c50 Backport thing 2025-09-03 00:15:41 -04:00
4 changed files with 216 additions and 34 deletions

View File

@@ -90,6 +90,133 @@ namespace SabreTools.IO.Test.Extensions
#endregion
#region ReadStringsFrom
[Fact]
public void ReadStringsFrom_Null_Null()
{
byte[]? arr = null;
var actual = arr.ReadStringsFrom(3);
Assert.Null(actual);
}
[Fact]
public void ReadStringsFrom_Empty_Null()
{
byte[]? arr = [];
var actual = arr.ReadStringsFrom(3);
Assert.Null(actual);
}
[Theory]
[InlineData(-1)]
[InlineData(0)]
[InlineData(2048)]
public void ReadStringsFrom_InvalidLimit_Empty(int charLimit)
{
byte[]? arr = new byte[1024];
var actual = arr.ReadStringsFrom(charLimit);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsFrom_NoValidStrings_Empty()
{
byte[]? arr = new byte[1024];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
Assert.Empty(actual);
}
[Fact]
public void ReadStringsFrom_AsciiStrings_Filled()
{
byte[]? arr =
[
.. Encoding.ASCII.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("DATA"),
.. new byte[] { 0x00 },
];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
}
[Fact]
public void ReadStringsFrom_UTF8_Filled()
{
byte[]? arr =
[
.. Encoding.UTF8.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA"),
.. new byte[] { 0x00 },
];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(4, actual.Count);
}
[Fact]
public void ReadStringsFrom_UTF16_Filled()
{
byte[]? arr =
[
.. Encoding.Unicode.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("DATA"),
.. new byte[] { 0x00 },
];
var actual = arr.ReadStringsFrom(4);
Assert.NotNull(actual);
Assert.Equal(2, actual.Count);
}
[Fact]
public void ReadStringsFrom_Mixed_Filled()
{
byte[]? arr =
[
.. Encoding.ASCII.GetBytes("TEST1"),
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("TWO1"),
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("DATA1"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TEST2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO2"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA2"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TEST3"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TWO3"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("DATA3"),
.. new byte[] { 0x00 },
];
var actual = arr.ReadStringsFrom(5);
Assert.NotNull(actual);
// ASCII and UTF-8 are identical for the character range
Assert.Equal(10, actual.Count);
}
#endregion
#region ReadStringsWithEncoding
[Fact]
@@ -134,6 +261,8 @@ namespace SabreTools.IO.Test.Extensions
[
.. Encoding.ASCII.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("ONE"),
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.ASCII.GetBytes("DATA"),
@@ -150,6 +279,8 @@ namespace SabreTools.IO.Test.Extensions
[
.. Encoding.Latin1.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.Latin1.GetBytes("ONE"),
.. new byte[] { 0x00 },
.. Encoding.Latin1.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.Latin1.GetBytes("DATA"),
@@ -166,6 +297,8 @@ namespace SabreTools.IO.Test.Extensions
[
.. Encoding.UTF8.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("ONE"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.UTF8.GetBytes("DATA"),
@@ -182,6 +315,8 @@ namespace SabreTools.IO.Test.Extensions
[
.. Encoding.Unicode.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("ONE"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.Unicode.GetBytes("DATA"),
@@ -198,6 +333,8 @@ namespace SabreTools.IO.Test.Extensions
[
.. Encoding.UTF32.GetBytes("TEST"),
.. new byte[] { 0x00 },
.. Encoding.UTF32.GetBytes("ONE"),
.. new byte[] { 0x00 },
.. Encoding.UTF32.GetBytes("TWO"),
.. new byte[] { 0x00 },
.. Encoding.UTF32.GetBytes("DATA"),

View File

@@ -1,17 +1,12 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SabreTools.IO.Extensions
{
public static class ByteArrayExtensions
{
/// <summary>
/// Defines the maximum number of characters in a string
/// as used in <see cref="ReadStringsWithEncoding"/>
/// </summary>
private const int MaximumCharactersInString = 64;
/// <summary>
/// Indicates whether the specified array is null or has a length of zero
/// </summary>
@@ -60,7 +55,45 @@ namespace SabreTools.IO.Extensions
}
/// <summary>
/// Read string data from the source with an encoding
/// Read string data from a byte array
/// </summary>
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
/// <returns>String list containing the requested data, null on error</returns>
/// <remarks>A maximum of 16KiB of data can be scanned at a time</remarks>
public static List<string>? ReadStringsFrom(this byte[]? input, int charLimit = 5)
{
// Validate the data
if (input == null || input.Length == 0)
return null;
// Limit to 16KiB of data
if (input.Length > 16384)
{
int offset = 0;
input = input.ReadBytes(ref offset, 16384);
}
// Check for ASCII strings
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
// Check for UTF-8 strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var utf8Strings = input.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
// Check for Unicode strings
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
var unicodeStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
// Ignore duplicate strings across encodings
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
// Sort the strings and return
sourceStrings.Sort();
return sourceStrings;
}
/// <summary>
/// Read string data from a byte array with an encoding
/// </summary>
/// <param name="bytes">Byte array representing the source data</param>
/// <param name="charLimit">Number of characters needed to be a valid string</param>
@@ -89,39 +122,50 @@ namespace SabreTools.IO.Extensions
var strings = new HashSet<string>();
#endif
// Open the text reader with the correct encoding
using var ms = new MemoryStream(bytes);
using var reader = new StreamReader(ms, encoding);
// Create a string builder for the loop
var sb = new StringBuilder();
// Check for strings
int index = 0;
while (index < bytes.Length)
long lastOffset = 0;
while (!reader.EndOfStream)
{
// Get the maximum number of characters
int maxChars = encoding.GetMaxCharCount(bytes.Length - index);
int maxBytes = encoding.GetMaxByteCount(Math.Min(MaximumCharactersInString, maxChars));
// Read the next character from the stream
char c = (char)reader.Read();
// Read the longest string allowed
int maxRead = Math.Min(maxBytes, bytes.Length - index);
string temp = encoding.GetString(bytes, index, maxRead);
char[] tempArr = temp.ToCharArray();
// Ignore empty strings
if (temp.Length == 0)
// If the character is invalid
if (char.IsControl(c) || (c & 0xFF00) != 0)
{
index++;
// Seek to the end of the last found string
string str = sb.ToString();
lastOffset += encoding.GetByteCount(str) + 1;
ms.Seek(lastOffset, SeekOrigin.Begin);
reader.DiscardBufferedData();
// Add the string if long enough
if (str.Length >= charLimit)
strings.Add(str);
// Clear the builder and continue
#if NET20 || NET35
sb = new();
#else
sb.Clear();
#endif
continue;
}
// Find the first instance of a control character
int endOfString = Array.FindIndex(tempArr, c => char.IsControl(c) || (c & 0xFF00) != 0);
if (endOfString > -1)
temp = temp.Substring(0, endOfString);
// Otherwise, just add the string if long enough
if (temp.Length >= charLimit)
strings.Add(temp);
// Increment and continue
index += Math.Max(encoding.GetByteCount(temp), 1);
// Otherwise, add the character to the builder and continue
sb.Append(c);
}
// Handle any remaining data
if (sb.Length >= charLimit)
strings.Add(sb.ToString());
return strings;
}
}

View File

@@ -74,7 +74,7 @@ namespace SabreTools.IO.Extensions
}
/// <summary>
/// Read string data from the source
/// Read string data from a Stream
/// </summary>
/// <param name="position">Position in the source to read from</param>
/// <param name="length">Length of the requested data</param>

View File

@@ -8,9 +8,10 @@
<LangVersion>latest</LangVersion>
<NoWarn>CS0618</NoWarn>
<Nullable>enable</Nullable>
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Version>1.7.1</Version>
<Version>1.7.2</Version>
<!-- Package Properties -->
<Authors>Matt Nadareski</Authors>
@@ -30,7 +31,7 @@
<ItemGroup>
<PackageReference Include="SabreTools.Matching" Version="1.6.0" />
<PackageReference Include="SabreTools.Models" Version="1.7.0" />
<PackageReference Include="SabreTools.Models" Version="1.7.1" />
<PackageReference Include="SabreTools.Hashing" Version="1.5.0" />
</ItemGroup>