Add class to do conversions using Runes, for codepages that contain characters needing UTF-16 surrogates.

This commit is contained in:
2021-04-29 19:37:15 +01:00
parent 01df1f48e1
commit c3304d356d
2 changed files with 375 additions and 0 deletions

View File

@@ -2,6 +2,9 @@ using System;
namespace Claunia.Encoding
{
/// <summary>
/// Implements a class that converts to/from a single byte codepage and UTF-16 representable strings
/// </summary>
public abstract class SingleByteEncoding : Encoding
{
protected abstract char[] CharTable { get; }

View File

@@ -0,0 +1,372 @@
using System;
using System.Globalization;
using System.Linq;
using System.Text;
namespace Claunia.Encoding
{
/// <summary>
/// Implements a class that converts to/from a single byte codepage and strings that contains elements that need
/// surrogates in UTF-16, using runes.
/// </summary>
public abstract class SingleByteEncodingWithRunes : Encoding
{
protected abstract Rune[] CharTable { get; }
/// <summary>Gets a value indicating whether the current encoding can be used by browser clients for displaying content.</summary>
public abstract override bool IsBrowserDisplay { get; }
/// <summary>Gets a value indicating whether the current encoding can be used by browser clients for saving content.</summary>
public abstract override bool IsBrowserSave { get; }
/// <summary>
/// Gets a value indicating whether the current encoding can be used by mail and news clients for displaying
/// content.
/// </summary>
public abstract override bool IsMailNewsDisplay { get; }
/// <summary>Gets a value indicating whether the current encoding can be used by mail and news clients for saving content.</summary>
public abstract override bool IsMailNewsSave { get; }
/// <summary>Gets a value indicating whether the current encoding is read-only.</summary>
/// <value>The is single byte.</value>
public abstract override bool IsReadOnly { get; }
/// <summary>Gets a value indicating whether the current encoding uses single-byte code points.</summary>
public abstract override bool IsSingleByte { get; }
/// <summary>Gets the code page identifier of the current Encoding.</summary>
public abstract override int CodePage { get; }
/// <summary>Gets a name for the current encoding that can be used with mail agent body tags</summary>
public abstract override string BodyName { get; }
/// <summary>Gets a name for the current encoding that can be used with mail agent header tags</summary>
public abstract override string HeaderName { get; }
/// <summary>Gets the name registered with the Internet Assigned Numbers Authority (IANA) for the current encoding.</summary>
public abstract override string WebName { get; }
/// <summary>Gets the human-readable description of the current encoding.</summary>
public abstract override string EncodingName { get; }
/// <summary>Gets the Windows operating system code page that most closely corresponds to the current encoding.</summary>
public abstract override int WindowsCodePage { get; }
/// <summary>Calculates the number of bytes produced by encoding the characters in the specified <see cref="string" />.</summary>
/// <returns>The number of bytes produced by encoding the specified characters.</returns>
/// <param name="s">The <see cref="string" /> containing the set of characters to encode.</param>
public override int GetByteCount(string s)
{
if(s == null)
throw new ArgumentNullException(nameof(s));
return new StringInfo(s).LengthInTextElements;
}
/// <summary>Calculates the number of bytes produced by encoding a set of characters from the specified character array.</summary>
/// <returns>The number of bytes produced by encoding the specified characters.</returns>
/// <param name="chars">The character array containing the set of characters to encode.</param>
/// <param name="index">The index of the first character to encode.</param>
/// <param name="count">The number of characters to encode.</param>
public override int GetByteCount(char[] chars, int index, int count)
{
if(chars == null)
throw new ArgumentNullException(nameof(chars));
if(index < 0 ||
index >= chars.Length)
throw new ArgumentOutOfRangeException(nameof(index));
if(count < 0 ||
index + count > chars.Length)
throw new ArgumentOutOfRangeException(nameof(index));
return new StringInfo(new string(chars, index, count)).LengthInTextElements;
}
/// <summary>Calculates the number of bytes produced by encoding all the characters in the specified character array.</summary>
/// <returns>The number of bytes produced by encoding all the characters in the specified character array.</returns>
/// <param name="chars">The character array containing the characters to encode.</param>
public override int GetByteCount(char[] chars)
{
if(chars == null)
throw new ArgumentNullException(nameof(chars));
return new StringInfo(new string(chars)).LengthInTextElements;
}
/// <summary>Encodes a set of characters from the specified <see cref="string" /> into the specified byte array.</summary>
/// <returns>The actual number of bytes written into bytes.</returns>
/// <param name="s">The <see cref="string" /> containing the set of characters to encode.</param>
/// <param name="charIndex">The index of the first character to encode.</param>
/// <param name="charCount">The number of characters to encode.</param>
/// <param name="bytes">The byte array to contain the resulting sequence of bytes.</param>
/// <param name="byteIndex">The index at which to start writing the resulting sequence of bytes.</param>
public override int GetBytes(string s, int charIndex, int charCount, byte[] bytes, int byteIndex) =>
GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
/// <summary>Encodes all the characters in the specified string into a sequence of bytes.</summary>
/// <returns>A byte array containing the results of encoding the specified set of characters.</returns>
/// <param name="s">The string containing the characters to encode.</param>
public override byte[] GetBytes(string s)
{
if(s == null)
throw new ArgumentNullException(nameof(s));
return GetBytes(s.ToCharArray(), 0, new StringInfo(s).LengthInTextElements);
}
/// <summary>Encodes a set of characters from the specified character array into the specified byte array.</summary>
/// <returns>The actual number of bytes written into bytes.</returns>
/// <param name="chars">The character array containing the set of characters to encode.</param>
/// <param name="charIndex">The index of the first character to encode.</param>
/// <param name="charCount">The number of characters to encode.</param>
/// <param name="bytes">The byte array to contain the resulting sequence of bytes.</param>
/// <param name="byteIndex">The index at which to start writing the resulting sequence of bytes.</param>
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
if(chars == null)
throw new ArgumentNullException(nameof(chars));
if(bytes == null)
throw new ArgumentNullException(nameof(bytes));
if(charIndex < 0)
throw new ArgumentOutOfRangeException(nameof(charIndex));
if(charCount < 0)
throw new ArgumentOutOfRangeException(nameof(charCount));
if(byteIndex < 0)
throw new ArgumentOutOfRangeException(nameof(byteIndex));
if(charIndex >= chars.Length)
throw new ArgumentOutOfRangeException(nameof(charIndex));
if(charCount + charIndex > chars.Length)
throw new ArgumentOutOfRangeException(nameof(charCount));
if(byteIndex >= bytes.Length)
throw new ArgumentOutOfRangeException(nameof(byteIndex));
if(byteIndex + charCount > bytes.Length)
throw new ArgumentException(nameof(bytes));
byte[] temp = GetBytes(chars, charIndex, charCount);
for(int i = 0; i < temp.Length; i++)
bytes[i + byteIndex] = temp[i];
return charCount;
}
/// <summary>Encodes a set of characters from the specified character array into a sequence of bytes.</summary>
/// <returns>A byte array containing the results of encoding the specified set of characters.</returns>
/// <param name="chars">The character array containing the set of characters to encode.</param>
/// <param name="index">The index of the first character to encode.</param>
/// <param name="count">The number of characters to encode.</param>
public override byte[] GetBytes(char[] chars, int index, int count)
{
if(chars == null)
throw new ArgumentNullException(nameof(chars));
if(index < 0)
throw new ArgumentOutOfRangeException(nameof(index));
if(count < 0)
throw new ArgumentOutOfRangeException(nameof(count));
string s = new(chars);
if(count + index > new StringInfo(s).LengthInTextElements)
throw new ArgumentOutOfRangeException(nameof(count));
byte[] bytes = new byte[count];
StringRuneEnumerator runes = s.EnumerateRunes();
runes.MoveNext();
for(int i = 0; i < index; i++)
{
if(!runes.MoveNext())
throw new ArgumentOutOfRangeException(nameof(index));
}
bool finished = false;
for(int i = 0; i < count; i++)
{
if(finished)
throw new ArgumentOutOfRangeException(nameof(count));
bytes[i] = GetByte(runes.Current);
finished = !runes.MoveNext();
}
return bytes;
}
/// <summary>Encodes all the characters in the specified character array into a sequence of bytes.</summary>
/// <returns>A byte array containing the results of encoding the specified set of characters.</returns>
/// <param name="chars">The character array containing the characters to encode.</param>
public override byte[] GetBytes(char[] chars) => GetBytes(chars, 0, chars.Length);
/// <summary>Calculates the number of characters produced by decoding all the bytes in the specified byte array.</summary>
/// <returns>The number of characters produced by decoding the specified sequence of bytes.</returns>
/// <param name="bytes">The byte array containing the sequence of bytes to decode.</param>
public override int GetCharCount(byte[] bytes) => GetCharCount(bytes, 0, bytes.Length);
/// <summary>Calculates the number of characters produced by decoding a sequence of bytes from the specified byte array.</summary>
/// <returns>The number of characters produced by decoding the specified sequence of bytes.</returns>
/// <param name="bytes">The byte array containing the sequence of bytes to decode.</param>
/// <param name="index">The index of the first byte to decode.</param>
/// <param name="count">The number of bytes to decode.</param>
public override int GetCharCount(byte[] bytes, int index, int count)
{
if(bytes == null)
throw new ArgumentNullException(nameof(bytes));
if(index < 0)
throw new ArgumentOutOfRangeException(nameof(index));
if(count < 0)
throw new ArgumentOutOfRangeException(nameof(count));
if(count + index > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(count));
return count;
}
/// <summary>Decodes a sequence of bytes from the specified byte array into the specified character array.</summary>
/// <returns>The actual number of characters written into chars.</returns>
/// <param name="bytes">The byte array containing the sequence of bytes to decode.</param>
/// <param name="byteIndex">The index of the first byte to decode.</param>
/// <param name="byteCount">The number of bytes to decode.</param>
/// <param name="chars">The character array to contain the resulting set of characters.</param>
/// <param name="charIndex">The index at which to start writing the resulting set of characters.</param>
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
if(bytes == null)
throw new ArgumentNullException(nameof(bytes));
if(chars == null)
throw new ArgumentNullException(nameof(chars));
if(byteIndex < 0)
throw new ArgumentOutOfRangeException(nameof(byteIndex));
if(byteCount < 0)
throw new ArgumentOutOfRangeException(nameof(byteCount));
if(charIndex < 0)
throw new ArgumentOutOfRangeException(nameof(charIndex));
if(byteIndex >= bytes.Length)
throw new ArgumentOutOfRangeException(nameof(byteIndex));
if(byteCount + byteIndex > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(byteCount));
if(charIndex >= chars.Length)
throw new ArgumentOutOfRangeException(nameof(charIndex));
if(charIndex + byteCount > chars.Length)
throw new ArgumentException(nameof(chars));
char[] temp = GetChars(bytes, byteIndex, byteCount);
for(int i = 0; i < temp.Length; i++)
chars[i + charIndex] = temp[i];
return byteCount;
}
/// <summary>Decodes all the bytes in the specified byte array into a set of characters.</summary>
/// <returns>A character array containing the results of decoding the specified sequence of bytes.</returns>
/// <param name="bytes">The byte array containing the sequence of bytes to decode.</param>
public override char[] GetChars(byte[] bytes) => GetChars(bytes, 0, bytes.Length);
/// <summary>Decodes a sequence of bytes from the specified byte array into a set of characters.</summary>
/// <returns>The chars.</returns>
/// <param name="bytes">The byte array containing the sequence of bytes to decode.</param>
/// <param name="index">The index of the first byte to decode.</param>
/// <param name="count">The number of bytes to decode.</param>
public override char[] GetChars(byte[] bytes, int index, int count) =>
GetString(bytes, index, count).ToCharArray();
/// <summary>Calculates the maximum number of bytes produced by encoding the specified number of characters.</summary>
/// <returns>The maximum number of bytes produced by encoding the specified number of characters.</returns>
/// <param name="charCount">The number of characters to encode.</param>
public override int GetMaxByteCount(int charCount)
{
if(charCount < 0)
throw new ArgumentOutOfRangeException(nameof(charCount));
return charCount;
}
/// <summary>Calculates the maximum number of characters produced by decoding the specified number of bytes.</summary>
/// <returns>The maximum number of characters produced by decoding the specified number of bytes.</returns>
/// <param name="byteCount">The number of bytes to decode.</param>
public override int GetMaxCharCount(int byteCount)
{
if(byteCount < 0)
throw new ArgumentOutOfRangeException(nameof(byteCount));
return byteCount;
}
/// <summary>Returns a sequence of bytes that specifies the encoding used.</summary>
/// <returns>A byte array of length zero, as a preamble is not required.</returns>
public override byte[] GetPreamble() => new byte[0];
/// <summary>Decodes all the bytes in the specified byte array into a string.</summary>
/// <returns>A string that contains the results of decoding the specified sequence of bytes.</returns>
/// <param name="bytes">The byte array containing the sequence of bytes to decode.</param>
public override string GetString(byte[] bytes) => GetString(bytes, 0, bytes.Length);
/// <summary>Decodes a sequence of bytes from the specified byte array into a string.</summary>
/// <returns>A string that contains the results of decoding the specified sequence of bytes.</returns>
/// <param name="bytes">The byte array containing the sequence of bytes to decode.</param>
/// <param name="index">The index of the first byte to decode.</param>
/// <param name="count">The number of bytes to decode.</param>
public override string GetString(byte[] bytes, int index, int count)
{
if(bytes == null)
throw new ArgumentNullException(nameof(bytes));
if(index < 0)
throw new ArgumentOutOfRangeException(nameof(index));
if(count < 0)
throw new ArgumentOutOfRangeException(nameof(count));
if(count + index > bytes.Length)
throw new ArgumentOutOfRangeException(nameof(count));
Rune[] runes = new Rune[count];
for(int i = 0; i < count; i++)
runes[i] = GetChar(bytes[index + i]);
char[] chars = new char[runes.Select(r => r.Utf16SequenceLength).Sum()];
int outPos = 0;
foreach(var r in runes)
outPos += r.EncodeToUtf16(new Span<char>(chars, outPos, chars.Length - outPos));
return new string(chars);
}
/// <summary>Converts a codepage character to an Unicode character</summary>
/// <returns>Unicode character.</returns>
/// <param name="character">Codepage character.</param>
Rune GetChar(byte character) => CharTable[character];
private protected abstract byte GetByte(Rune character);
}
}