diff --git a/Claunia.Encoding/SingleByteEncoding.cs b/Claunia.Encoding/SingleByteEncoding.cs index de5b5a9..caa151e 100644 --- a/Claunia.Encoding/SingleByteEncoding.cs +++ b/Claunia.Encoding/SingleByteEncoding.cs @@ -2,6 +2,9 @@ using System; namespace Claunia.Encoding { + /// + /// Implements a class that converts to/from a single byte codepage and UTF-16 representable strings + /// public abstract class SingleByteEncoding : Encoding { protected abstract char[] CharTable { get; } diff --git a/Claunia.Encoding/SingleByteEncodingWithRunes.cs b/Claunia.Encoding/SingleByteEncodingWithRunes.cs new file mode 100644 index 0000000..b38080b --- /dev/null +++ b/Claunia.Encoding/SingleByteEncodingWithRunes.cs @@ -0,0 +1,372 @@ +using System; +using System.Globalization; +using System.Linq; +using System.Text; + +namespace Claunia.Encoding +{ + /// + /// Implements a class that converts to/from a single byte codepage and strings that contains elements that need + /// surrogates in UTF-16, using runes. + /// + public abstract class SingleByteEncodingWithRunes : Encoding + { + protected abstract Rune[] CharTable { get; } + + /// Gets a value indicating whether the current encoding can be used by browser clients for displaying content. + public abstract override bool IsBrowserDisplay { get; } + + /// Gets a value indicating whether the current encoding can be used by browser clients for saving content. + public abstract override bool IsBrowserSave { get; } + + /// + /// Gets a value indicating whether the current encoding can be used by mail and news clients for displaying + /// content. + /// + public abstract override bool IsMailNewsDisplay { get; } + + /// Gets a value indicating whether the current encoding can be used by mail and news clients for saving content. + public abstract override bool IsMailNewsSave { get; } + + /// Gets a value indicating whether the current encoding is read-only. + /// The is single byte. + public abstract override bool IsReadOnly { get; } + + /// Gets a value indicating whether the current encoding uses single-byte code points. + public abstract override bool IsSingleByte { get; } + + /// Gets the code page identifier of the current Encoding. + public abstract override int CodePage { get; } + + /// Gets a name for the current encoding that can be used with mail agent body tags + public abstract override string BodyName { get; } + + /// Gets a name for the current encoding that can be used with mail agent header tags + public abstract override string HeaderName { get; } + + /// Gets the name registered with the Internet Assigned Numbers Authority (IANA) for the current encoding. + public abstract override string WebName { get; } + + /// Gets the human-readable description of the current encoding. + public abstract override string EncodingName { get; } + + /// Gets the Windows operating system code page that most closely corresponds to the current encoding. + public abstract override int WindowsCodePage { get; } + + /// Calculates the number of bytes produced by encoding the characters in the specified . + /// The number of bytes produced by encoding the specified characters. + /// The containing the set of characters to encode. + public override int GetByteCount(string s) + { + if(s == null) + throw new ArgumentNullException(nameof(s)); + + return new StringInfo(s).LengthInTextElements; + } + + /// Calculates the number of bytes produced by encoding a set of characters from the specified character array. + /// The number of bytes produced by encoding the specified characters. + /// The character array containing the set of characters to encode. + /// The index of the first character to encode. + /// The number of characters to encode. + public override int GetByteCount(char[] chars, int index, int count) + { + if(chars == null) + throw new ArgumentNullException(nameof(chars)); + + if(index < 0 || + index >= chars.Length) + throw new ArgumentOutOfRangeException(nameof(index)); + + if(count < 0 || + index + count > chars.Length) + throw new ArgumentOutOfRangeException(nameof(index)); + + return new StringInfo(new string(chars, index, count)).LengthInTextElements; + } + + /// Calculates the number of bytes produced by encoding all the characters in the specified character array. + /// The number of bytes produced by encoding all the characters in the specified character array. + /// The character array containing the characters to encode. + public override int GetByteCount(char[] chars) + { + if(chars == null) + throw new ArgumentNullException(nameof(chars)); + + return new StringInfo(new string(chars)).LengthInTextElements; + } + + /// Encodes a set of characters from the specified into the specified byte array. + /// The actual number of bytes written into bytes. + /// The containing the set of characters to encode. + /// The index of the first character to encode. + /// The number of characters to encode. + /// The byte array to contain the resulting sequence of bytes. + /// The index at which to start writing the resulting sequence of bytes. + public override int GetBytes(string s, int charIndex, int charCount, byte[] bytes, int byteIndex) => + GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex); + + /// Encodes all the characters in the specified string into a sequence of bytes. + /// A byte array containing the results of encoding the specified set of characters. + /// The string containing the characters to encode. + public override byte[] GetBytes(string s) + { + if(s == null) + throw new ArgumentNullException(nameof(s)); + + return GetBytes(s.ToCharArray(), 0, new StringInfo(s).LengthInTextElements); + } + + /// Encodes a set of characters from the specified character array into the specified byte array. + /// The actual number of bytes written into bytes. + /// The character array containing the set of characters to encode. + /// The index of the first character to encode. + /// The number of characters to encode. + /// The byte array to contain the resulting sequence of bytes. + /// The index at which to start writing the resulting sequence of bytes. + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) + { + if(chars == null) + throw new ArgumentNullException(nameof(chars)); + + if(bytes == null) + throw new ArgumentNullException(nameof(bytes)); + + if(charIndex < 0) + throw new ArgumentOutOfRangeException(nameof(charIndex)); + + if(charCount < 0) + throw new ArgumentOutOfRangeException(nameof(charCount)); + + if(byteIndex < 0) + throw new ArgumentOutOfRangeException(nameof(byteIndex)); + + if(charIndex >= chars.Length) + throw new ArgumentOutOfRangeException(nameof(charIndex)); + + if(charCount + charIndex > chars.Length) + throw new ArgumentOutOfRangeException(nameof(charCount)); + + if(byteIndex >= bytes.Length) + throw new ArgumentOutOfRangeException(nameof(byteIndex)); + + if(byteIndex + charCount > bytes.Length) + throw new ArgumentException(nameof(bytes)); + + byte[] temp = GetBytes(chars, charIndex, charCount); + + for(int i = 0; i < temp.Length; i++) + bytes[i + byteIndex] = temp[i]; + + return charCount; + } + + /// Encodes a set of characters from the specified character array into a sequence of bytes. + /// A byte array containing the results of encoding the specified set of characters. + /// The character array containing the set of characters to encode. + /// The index of the first character to encode. + /// The number of characters to encode. + public override byte[] GetBytes(char[] chars, int index, int count) + { + if(chars == null) + throw new ArgumentNullException(nameof(chars)); + + if(index < 0) + throw new ArgumentOutOfRangeException(nameof(index)); + + if(count < 0) + throw new ArgumentOutOfRangeException(nameof(count)); + + string s = new(chars); + + if(count + index > new StringInfo(s).LengthInTextElements) + throw new ArgumentOutOfRangeException(nameof(count)); + + byte[] bytes = new byte[count]; + + StringRuneEnumerator runes = s.EnumerateRunes(); + runes.MoveNext(); + + for(int i = 0; i < index; i++) + { + if(!runes.MoveNext()) + throw new ArgumentOutOfRangeException(nameof(index)); + } + + bool finished = false; + + for(int i = 0; i < count; i++) + { + if(finished) + throw new ArgumentOutOfRangeException(nameof(count)); + + bytes[i] = GetByte(runes.Current); + + finished = !runes.MoveNext(); + } + + return bytes; + } + + /// Encodes all the characters in the specified character array into a sequence of bytes. + /// A byte array containing the results of encoding the specified set of characters. + /// The character array containing the characters to encode. + public override byte[] GetBytes(char[] chars) => GetBytes(chars, 0, chars.Length); + + /// Calculates the number of characters produced by decoding all the bytes in the specified byte array. + /// The number of characters produced by decoding the specified sequence of bytes. + /// The byte array containing the sequence of bytes to decode. + public override int GetCharCount(byte[] bytes) => GetCharCount(bytes, 0, bytes.Length); + + /// Calculates the number of characters produced by decoding a sequence of bytes from the specified byte array. + /// The number of characters produced by decoding the specified sequence of bytes. + /// The byte array containing the sequence of bytes to decode. + /// The index of the first byte to decode. + /// The number of bytes to decode. + public override int GetCharCount(byte[] bytes, int index, int count) + { + if(bytes == null) + throw new ArgumentNullException(nameof(bytes)); + + if(index < 0) + throw new ArgumentOutOfRangeException(nameof(index)); + + if(count < 0) + throw new ArgumentOutOfRangeException(nameof(count)); + + if(count + index > bytes.Length) + throw new ArgumentOutOfRangeException(nameof(count)); + + return count; + } + + /// Decodes a sequence of bytes from the specified byte array into the specified character array. + /// The actual number of characters written into chars. + /// The byte array containing the sequence of bytes to decode. + /// The index of the first byte to decode. + /// The number of bytes to decode. + /// The character array to contain the resulting set of characters. + /// The index at which to start writing the resulting set of characters. + public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) + { + if(bytes == null) + throw new ArgumentNullException(nameof(bytes)); + + if(chars == null) + throw new ArgumentNullException(nameof(chars)); + + if(byteIndex < 0) + throw new ArgumentOutOfRangeException(nameof(byteIndex)); + + if(byteCount < 0) + throw new ArgumentOutOfRangeException(nameof(byteCount)); + + if(charIndex < 0) + throw new ArgumentOutOfRangeException(nameof(charIndex)); + + if(byteIndex >= bytes.Length) + throw new ArgumentOutOfRangeException(nameof(byteIndex)); + + if(byteCount + byteIndex > bytes.Length) + throw new ArgumentOutOfRangeException(nameof(byteCount)); + + if(charIndex >= chars.Length) + throw new ArgumentOutOfRangeException(nameof(charIndex)); + + if(charIndex + byteCount > chars.Length) + throw new ArgumentException(nameof(chars)); + + char[] temp = GetChars(bytes, byteIndex, byteCount); + + for(int i = 0; i < temp.Length; i++) + chars[i + charIndex] = temp[i]; + + return byteCount; + } + + /// Decodes all the bytes in the specified byte array into a set of characters. + /// A character array containing the results of decoding the specified sequence of bytes. + /// The byte array containing the sequence of bytes to decode. + public override char[] GetChars(byte[] bytes) => GetChars(bytes, 0, bytes.Length); + + /// Decodes a sequence of bytes from the specified byte array into a set of characters. + /// The chars. + /// The byte array containing the sequence of bytes to decode. + /// The index of the first byte to decode. + /// The number of bytes to decode. + public override char[] GetChars(byte[] bytes, int index, int count) => + GetString(bytes, index, count).ToCharArray(); + + /// Calculates the maximum number of bytes produced by encoding the specified number of characters. + /// The maximum number of bytes produced by encoding the specified number of characters. + /// The number of characters to encode. + public override int GetMaxByteCount(int charCount) + { + if(charCount < 0) + throw new ArgumentOutOfRangeException(nameof(charCount)); + + return charCount; + } + + /// Calculates the maximum number of characters produced by decoding the specified number of bytes. + /// The maximum number of characters produced by decoding the specified number of bytes. + /// The number of bytes to decode. + public override int GetMaxCharCount(int byteCount) + { + if(byteCount < 0) + throw new ArgumentOutOfRangeException(nameof(byteCount)); + + return byteCount; + } + + /// Returns a sequence of bytes that specifies the encoding used. + /// A byte array of length zero, as a preamble is not required. + public override byte[] GetPreamble() => new byte[0]; + + /// Decodes all the bytes in the specified byte array into a string. + /// A string that contains the results of decoding the specified sequence of bytes. + /// The byte array containing the sequence of bytes to decode. + public override string GetString(byte[] bytes) => GetString(bytes, 0, bytes.Length); + + /// Decodes a sequence of bytes from the specified byte array into a string. + /// A string that contains the results of decoding the specified sequence of bytes. + /// The byte array containing the sequence of bytes to decode. + /// The index of the first byte to decode. + /// The number of bytes to decode. + public override string GetString(byte[] bytes, int index, int count) + { + if(bytes == null) + throw new ArgumentNullException(nameof(bytes)); + + if(index < 0) + throw new ArgumentOutOfRangeException(nameof(index)); + + if(count < 0) + throw new ArgumentOutOfRangeException(nameof(count)); + + if(count + index > bytes.Length) + throw new ArgumentOutOfRangeException(nameof(count)); + + Rune[] runes = new Rune[count]; + + for(int i = 0; i < count; i++) + runes[i] = GetChar(bytes[index + i]); + + char[] chars = new char[runes.Select(r => r.Utf16SequenceLength).Sum()]; + + int outPos = 0; + + foreach(var r in runes) + outPos += r.EncodeToUtf16(new Span(chars, outPos, chars.Length - outPos)); + + return new string(chars); + } + + /// Converts a codepage character to an Unicode character + /// Unicode character. + /// Codepage character. + Rune GetChar(byte character) => CharTable[character]; + + private protected abstract byte GetByte(Rune character); + } +} \ No newline at end of file