mirror of
https://github.com/SabreTools/SabreTools.IO.git
synced 2026-02-08 13:49:55 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
955c1b5641 | ||
|
|
535f9f928d | ||
|
|
f0cb15c2e4 | ||
|
|
ec99304c51 | ||
|
|
aefc931055 | ||
|
|
e7fe342379 | ||
|
|
f372999b1b | ||
|
|
2679975945 | ||
|
|
54dd7f2f8f | ||
|
|
aee5891c50 |
@@ -90,6 +90,133 @@ namespace SabreTools.IO.Test.Extensions
|
||||
|
||||
#endregion
|
||||
|
||||
#region ReadStringsFrom
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_Null_Null()
|
||||
{
|
||||
byte[]? arr = null;
|
||||
var actual = arr.ReadStringsFrom(3);
|
||||
Assert.Null(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_Empty_Null()
|
||||
{
|
||||
byte[]? arr = [];
|
||||
var actual = arr.ReadStringsFrom(3);
|
||||
Assert.Null(actual);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData(-1)]
|
||||
[InlineData(0)]
|
||||
[InlineData(2048)]
|
||||
public void ReadStringsFrom_InvalidLimit_Empty(int charLimit)
|
||||
{
|
||||
byte[]? arr = new byte[1024];
|
||||
var actual = arr.ReadStringsFrom(charLimit);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_NoValidStrings_Empty()
|
||||
{
|
||||
byte[]? arr = new byte[1024];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Empty(actual);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_AsciiStrings_Filled()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
.. Encoding.ASCII.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("DATA"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_UTF8_Filled()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
.. Encoding.UTF8.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(4, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_UTF16_Filled()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
.. Encoding.Unicode.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("DATA"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(4);
|
||||
Assert.NotNull(actual);
|
||||
Assert.Equal(2, actual.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReadStringsFrom_Mixed_Filled()
|
||||
{
|
||||
byte[]? arr =
|
||||
[
|
||||
.. Encoding.ASCII.GetBytes("TEST1"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("TWO1"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("DATA1"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TEST2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA2"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TEST3"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TWO3"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("DATA3"),
|
||||
.. new byte[] { 0x00 },
|
||||
];
|
||||
var actual = arr.ReadStringsFrom(5);
|
||||
Assert.NotNull(actual);
|
||||
|
||||
// ASCII and UTF-8 are identical for the character range
|
||||
Assert.Equal(10, actual.Count);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region ReadStringsWithEncoding
|
||||
|
||||
[Fact]
|
||||
@@ -134,6 +261,8 @@ namespace SabreTools.IO.Test.Extensions
|
||||
[
|
||||
.. Encoding.ASCII.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("ONE"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.ASCII.GetBytes("DATA"),
|
||||
@@ -150,6 +279,8 @@ namespace SabreTools.IO.Test.Extensions
|
||||
[
|
||||
.. Encoding.Latin1.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Latin1.GetBytes("ONE"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Latin1.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Latin1.GetBytes("DATA"),
|
||||
@@ -166,6 +297,8 @@ namespace SabreTools.IO.Test.Extensions
|
||||
[
|
||||
.. Encoding.UTF8.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("ONE"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF8.GetBytes("DATA"),
|
||||
@@ -182,6 +315,8 @@ namespace SabreTools.IO.Test.Extensions
|
||||
[
|
||||
.. Encoding.Unicode.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("ONE"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.Unicode.GetBytes("DATA"),
|
||||
@@ -198,6 +333,8 @@ namespace SabreTools.IO.Test.Extensions
|
||||
[
|
||||
.. Encoding.UTF32.GetBytes("TEST"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF32.GetBytes("ONE"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF32.GetBytes("TWO"),
|
||||
.. new byte[] { 0x00 },
|
||||
.. Encoding.UTF32.GetBytes("DATA"),
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace SabreTools.IO.Extensions
|
||||
{
|
||||
public static class ByteArrayExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Defines the maximum number of characters in a string
|
||||
/// as used in <see cref="ReadStringsWithEncoding"/>
|
||||
/// </summary>
|
||||
private const int MaximumCharactersInString = 64;
|
||||
|
||||
/// <summary>
|
||||
/// Indicates whether the specified array is null or has a length of zero
|
||||
/// </summary>
|
||||
@@ -60,7 +55,45 @@ namespace SabreTools.IO.Extensions
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read string data from the source with an encoding
|
||||
/// Read string data from a byte array
|
||||
/// </summary>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string, default 5</param>
|
||||
/// <returns>String list containing the requested data, null on error</returns>
|
||||
/// <remarks>A maximum of 16KiB of data can be scanned at a time</remarks>
|
||||
public static List<string>? ReadStringsFrom(this byte[]? input, int charLimit = 5)
|
||||
{
|
||||
// Validate the data
|
||||
if (input == null || input.Length == 0)
|
||||
return null;
|
||||
|
||||
// Limit to 16KiB of data
|
||||
if (input.Length > 16384)
|
||||
{
|
||||
int offset = 0;
|
||||
input = input.ReadBytes(ref offset, 16384);
|
||||
}
|
||||
|
||||
// Check for ASCII strings
|
||||
var asciiStrings = input.ReadStringsWithEncoding(charLimit, Encoding.ASCII);
|
||||
|
||||
// Check for UTF-8 strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var utf8Strings = input.ReadStringsWithEncoding(charLimit, Encoding.UTF8);
|
||||
|
||||
// Check for Unicode strings
|
||||
// We are limiting the check for Unicode characters with a second byte of 0x00 for now
|
||||
var unicodeStrings = input.ReadStringsWithEncoding(charLimit, Encoding.Unicode);
|
||||
|
||||
// Ignore duplicate strings across encodings
|
||||
List<string> sourceStrings = [.. asciiStrings, .. utf8Strings, .. unicodeStrings];
|
||||
|
||||
// Sort the strings and return
|
||||
sourceStrings.Sort();
|
||||
return sourceStrings;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read string data from a byte array with an encoding
|
||||
/// </summary>
|
||||
/// <param name="bytes">Byte array representing the source data</param>
|
||||
/// <param name="charLimit">Number of characters needed to be a valid string</param>
|
||||
@@ -89,39 +122,50 @@ namespace SabreTools.IO.Extensions
|
||||
var strings = new HashSet<string>();
|
||||
#endif
|
||||
|
||||
// Open the text reader with the correct encoding
|
||||
using var ms = new MemoryStream(bytes);
|
||||
using var reader = new StreamReader(ms, encoding);
|
||||
|
||||
// Create a string builder for the loop
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Check for strings
|
||||
int index = 0;
|
||||
while (index < bytes.Length)
|
||||
long lastOffset = 0;
|
||||
while (!reader.EndOfStream)
|
||||
{
|
||||
// Get the maximum number of characters
|
||||
int maxChars = encoding.GetMaxCharCount(bytes.Length - index);
|
||||
int maxBytes = encoding.GetMaxByteCount(Math.Min(MaximumCharactersInString, maxChars));
|
||||
// Read the next character from the stream
|
||||
char c = (char)reader.Read();
|
||||
|
||||
// Read the longest string allowed
|
||||
int maxRead = Math.Min(maxBytes, bytes.Length - index);
|
||||
string temp = encoding.GetString(bytes, index, maxRead);
|
||||
char[] tempArr = temp.ToCharArray();
|
||||
|
||||
// Ignore empty strings
|
||||
if (temp.Length == 0)
|
||||
// If the character is invalid
|
||||
if (char.IsControl(c) || (c & 0xFF00) != 0)
|
||||
{
|
||||
index++;
|
||||
// Seek to the end of the last found string
|
||||
string str = sb.ToString();
|
||||
lastOffset += encoding.GetByteCount(str) + 1;
|
||||
ms.Seek(lastOffset, SeekOrigin.Begin);
|
||||
reader.DiscardBufferedData();
|
||||
|
||||
// Add the string if long enough
|
||||
if (str.Length >= charLimit)
|
||||
strings.Add(str);
|
||||
|
||||
// Clear the builder and continue
|
||||
#if NET20 || NET35
|
||||
sb = new();
|
||||
#else
|
||||
sb.Clear();
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find the first instance of a control character
|
||||
int endOfString = Array.FindIndex(tempArr, c => char.IsControl(c) || (c & 0xFF00) != 0);
|
||||
if (endOfString > -1)
|
||||
temp = temp.Substring(0, endOfString);
|
||||
|
||||
// Otherwise, just add the string if long enough
|
||||
if (temp.Length >= charLimit)
|
||||
strings.Add(temp);
|
||||
|
||||
// Increment and continue
|
||||
index += Math.Max(encoding.GetByteCount(temp), 1);
|
||||
// Otherwise, add the character to the builder and continue
|
||||
sb.Append(c);
|
||||
}
|
||||
|
||||
// Handle any remaining data
|
||||
if (sb.Length >= charLimit)
|
||||
strings.Add(sb.ToString());
|
||||
|
||||
return strings;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,7 +74,7 @@ namespace SabreTools.IO.Extensions
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read string data from the source
|
||||
/// Read string data from a Stream
|
||||
/// </summary>
|
||||
/// <param name="position">Position in the source to read from</param>
|
||||
/// <param name="length">Length of the requested data</param>
|
||||
|
||||
@@ -8,9 +8,10 @@
|
||||
<LangVersion>latest</LangVersion>
|
||||
<NoWarn>CS0618</NoWarn>
|
||||
<Nullable>enable</Nullable>
|
||||
<SuppressTfmSupportBuildWarnings>true</SuppressTfmSupportBuildWarnings>
|
||||
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<Version>1.7.1</Version>
|
||||
<Version>1.7.2</Version>
|
||||
|
||||
<!-- Package Properties -->
|
||||
<Authors>Matt Nadareski</Authors>
|
||||
@@ -30,7 +31,7 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="SabreTools.Matching" Version="1.6.0" />
|
||||
<PackageReference Include="SabreTools.Models" Version="1.7.0" />
|
||||
<PackageReference Include="SabreTools.Models" Version="1.7.1" />
|
||||
<PackageReference Include="SabreTools.Hashing" Version="1.5.0" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user