Avoid allocations in EntityHelper

This commit is contained in:
MihaZupan
2020-04-04 10:28:45 +02:00
parent 09593ff3da
commit 3d005d6444
7 changed files with 37 additions and 254 deletions

View File

@@ -1,6 +1,7 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System;
using System.Collections.Generic;
using Markdig.Helpers;
using Markdig.Parsers;
@@ -115,7 +116,7 @@ namespace Markdig.Extensions.Abbreviations
ValidAbbreviationStart:;
if (prefixTree.TryMatchLongest(text, i, content.End - i + 1, out KeyValuePair<string, Abbreviation> abbreviationMatch))
if (prefixTree.TryMatchLongest(text.AsSpan(i, content.End - i + 1), out KeyValuePair<string, Abbreviation> abbreviationMatch))
{
var match = abbreviationMatch.Key;
if (!IsValidAbbreviationEnding(match, content, i))

View File

@@ -2,6 +2,7 @@
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System;
using System.Collections.Generic;
using Markdig.Helpers;
using Markdig.Parsers;
@@ -34,7 +35,7 @@ namespace Markdig.Extensions.Emoji
}
// Try to match an emoji shortcode or smiley
if (!_emojiMapping.PrefixTree.TryMatchLongest(slice.Text, slice.Start, slice.Length, out KeyValuePair<string, string> match))
if (!_emojiMapping.PrefixTree.TryMatchLongest(slice.Text.AsSpan(slice.Start, slice.Length), out KeyValuePair<string, string> match))
{
return false;
}

View File

@@ -381,7 +381,11 @@ namespace Markdig.Helpers
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsInInclusiveRange(char c, char min, char max)
=> (uint) (c - min) <= (uint) (max - min);
=> (uint)(c - min) <= (uint)(max - min);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsInInclusiveRange(int value, uint min, uint max)
=> ((uint)value - min) <= (max - min);
public static IEnumerable<int> ToUtf32(StringSlice text)
{

View File

@@ -27,10 +27,7 @@ namespace Markdig.Helpers
/// </summary>
/// <typeparam name="TValue">The value associated with the key</typeparam>
[ExcludeFromCodeCoverage]
internal sealed class CompactPrefixTree<TValue>
//#if !LEGACY
// : IReadOnlyDictionary<string, TValue>, IReadOnlyList<KeyValuePair<string, TValue>>
//#endif
internal sealed class CompactPrefixTree<TValue> : IReadOnlyDictionary<string, TValue>, IReadOnlyList<KeyValuePair<string, TValue>>
{
/// <summary>
/// Used internally to control behavior of insertion
@@ -355,7 +352,7 @@ namespace Markdig.Helpers
{
get
{
if (TryMatchExact(key, out KeyValuePair<string, TValue> match))
if (TryMatchExact(key.AsSpan(), out KeyValuePair<string, TValue> match))
return match.Value;
throw new KeyNotFoundException(key);
}
@@ -367,7 +364,6 @@ namespace Markdig.Helpers
}
} // Get, Set
#if NETCORE
/// <summary>
/// Gets the value associated with the specified key
/// </summary>
@@ -382,7 +378,6 @@ namespace Markdig.Helpers
throw new KeyNotFoundException(key.ToString());
}
} // Get only
#endif
#endregion this[] accessors
@@ -713,50 +708,6 @@ namespace Markdig.Helpers
#region TryMatch longest
/// <summary>
/// Tries to find the longest prefix of text, starting at offset, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="offset">Index of the character at which to start searching</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
public bool TryMatchLongest(string text, int offset, out KeyValuePair<string, TValue> match)
{
#if NETCORE
return TryMatchLongest(text.AsSpan(offset), out match);
#else
return TryMatchLongest(text, offset, text.Length - offset, out match);
#endif
}
/// <summary>
/// Tries to find the longest prefix of text, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
public bool TryMatchLongest(string text, out KeyValuePair<string, TValue> match)
{
if (text == null) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
#if NETCORE
return TryMatchLongest(text.AsSpan(), out match);
#else
return TryMatchLongest(text, 0, text.Length, out match);
#endif
}
/// <summary>
/// Tries to find the longest prefix of text, starting at offset, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="offset">The offset in text at which to start looking for the prefix</param>
/// <param name="length">The longest prefix allowed to match</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
#if NETCORE
public bool TryMatchLongest(string text, int offset, int length, out KeyValuePair<string, TValue> match)
=> TryMatchLongest(text.AsSpan(offset, length), out match);
/// <summary>
/// Tries to find the longest prefix of text, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
@@ -768,20 +719,6 @@ namespace Markdig.Helpers
match = default;
if (text.Length == 0 || !TryGetRoot(text[0], out int nodeIndex))
return false;
#else
public bool TryMatchLongest(string text, int offset, int length, out KeyValuePair<string, TValue> match)
{
int limit = offset + length;
if (text == null)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
if (offset < 0 || length < 0 || text.Length < limit)
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.offsetLength, ExceptionReason.InvalidOffsetLength);
match = default;
if (length == 0 || !TryGetRoot(text[offset], out int nodeIndex))
return false;
#endif
int matchIndex = -1;
int depth = 1;
@@ -790,11 +727,7 @@ namespace Markdig.Helpers
if (node.ChildChar == 0) goto LeafNodeFound;
if (node.MatchIndex != -1) matchIndex = node.MatchIndex;
#if NETCORE
for (int i = 1; i < text.Length; i++)
#else
for (int i = offset + 1; i < limit; i++)
#endif
{
char c = text[i];
@@ -826,7 +759,6 @@ namespace Markdig.Helpers
LeafNodeFound:;
ref KeyValuePair<string, TValue> possibleMatch = ref _matches[node.MatchIndex];
#if NETCORE
if (possibleMatch.Key.Length <= text.Length)
{
// Check that the rest of the strings match
@@ -835,18 +767,6 @@ namespace Markdig.Helpers
matchIndex = node.MatchIndex;
}
}
#else
if (possibleMatch.Key.Length <= length)
{
// Check that the rest of the strings match
for (int i = offset + depth, j = depth; j < possibleMatch.Key.Length; i++, j++)
{
if (text[i] != possibleMatch.Key[j])
goto Return;
}
matchIndex = node.MatchIndex;
}
#endif
Return:;
if (matchIndex != -1)
@@ -861,50 +781,6 @@ namespace Markdig.Helpers
#region TryMatch exact
/// <summary>
/// Tries to find a suffix of text, starting at offset, that is contained in this <see cref="CompactPrefixTree{TValue}"/> and is exactly (text.Length - offset) characters long
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="offset">Index of the character at which to start searching</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
public bool TryMatchExact(string text, int offset, out KeyValuePair<string, TValue> match)
{
#if NETCORE
return TryMatchExact(text.AsSpan(offset), out match);
#else
return TryMatchExact(text, offset, text.Length - offset, out match);
#endif
}
/// <summary>
/// Tries to find a prefix of text, that is contained in this <see cref="CompactPrefixTree{TValue}"/> and is exactly text.Length characters long
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
public bool TryMatchExact(string text, out KeyValuePair<string, TValue> match)
{
if (text == null) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
#if NETCORE
return TryMatchExact(text.AsSpan(), out match);
#else
return TryMatchExact(text, 0, text.Length, out match);
#endif
}
/// <summary>
/// Tries to find a prefix of text, starting at offset, that is contained in this <see cref="CompactPrefixTree{TValue}"/> and is exactly length characters long
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="offset">The offset in text at which to start looking for the prefix</param>
/// <param name="length">The longest prefix allowed to match</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
#if NETCORE
public bool TryMatchExact(string text, int offset, int length, out KeyValuePair<string, TValue> match)
=> TryMatchExact(text.AsSpan(offset, length), out match);
/// <summary>
/// Tries to find a prefix of text, that is contained in this <see cref="CompactPrefixTree{TValue}"/> and is exactly text.Length characters long
/// </summary>
@@ -916,39 +792,18 @@ namespace Markdig.Helpers
match = default;
if (text.Length == 0 || !TryGetRoot(text[0], out int nodeIndex))
return false;
#else
public bool TryMatchExact(string text, int offset, int length, out KeyValuePair<string, TValue> match)
{
int limit = offset + length;
if (text == null)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
if (offset < 0 || length < 0 || text.Length < limit)
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.offsetLength, ExceptionReason.InvalidOffsetLength);
match = default;
if (length == 0 || !TryGetRoot(text[offset], out int nodeIndex))
return false;
#endif
int depth = 1;
ref Node node = ref _tree[nodeIndex];
if (node.ChildChar == 0) goto LeafNodeFound;
#if NETCORE
if (node.MatchIndex != -1 && text.Length == 1)
#else
if (node.MatchIndex != -1 && length == 1)
#endif
{
match = _matches[node.MatchIndex];
return true;
}
#if NETCORE
for (int i = 1; i < text.Length; i++)
#else
for (int i = offset + 1; i < limit; i++)
#endif
{
char c = text[i];
@@ -977,81 +832,20 @@ namespace Markdig.Helpers
if (node.MatchIndex == -1) return false;
match = _matches[node.MatchIndex];
#if NETCORE
Debug.Assert(match.Key.Length == text.Length);
#else
Debug.Assert(match.Key.Length == length);
#endif
return true;
LeafNodeFound:;
match = _matches[node.MatchIndex];
#if NETCORE
return match.Key.Length == text.Length &&
text.Slice(depth).Equals(match.Key.AsSpan(depth), StringComparison.Ordinal);
#else
if (match.Key.Length == length)
{
// Check that the rest of the strings match
for (int i = offset + depth, j = depth; j < match.Key.Length; i++, j++)
{
if (text[i] != match.Key[j])
return false;
}
return true;
}
return false;
#endif
}
#endregion TryMatch exact
#region TryMatch shortest
/// <summary>
/// Tries to find the shortest prefix of text, starting at offset, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="offset">Index of the character at which to start searching</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
public bool TryMatchShortest(string text, int offset, out KeyValuePair<string, TValue> match)
{
#if NETCORE
return TryMatchShortest(text.AsSpan(offset), out match);
#else
return TryMatchShortest(text, offset, text.Length - offset, out match);
#endif
}
/// <summary>
/// Tries to find the shortest prefix of text, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
public bool TryMatchShortest(string text, out KeyValuePair<string, TValue> match)
{
if (text == null) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
#if NETCORE
return TryMatchShortest(text.AsSpan(), out match);
#else
return TryMatchShortest(text, 0, text.Length, out match);
#endif
}
/// <summary>
/// Tries to find the shortest prefix of text, starting at offset, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
/// <param name="text">The text in which to search for the prefix</param>
/// <param name="offset">The offset in text at which to start looking for the prefix</param>
/// <param name="length">The longest prefix allowed to match</param>
/// <param name="match">The found prefix and the corresponding value</param>
/// <returns>True if a match was found, false otherwise</returns>
#if NETCORE
public bool TryMatchShortest(string text, int offset, int length, out KeyValuePair<string, TValue> match)
=> TryMatchShortest(text.AsSpan(offset, length), out match);
/// <summary>
/// Tries to find the shortest prefix of text, that is contained in this <see cref="CompactPrefixTree{TValue}"/>
/// </summary>
@@ -1063,20 +857,7 @@ namespace Markdig.Helpers
match = default;
if (text.Length == 0 || !TryGetRoot(text[0], out int nodeIndex))
return false;
#else
public bool TryMatchShortest(string text, int offset, int length, out KeyValuePair<string, TValue> match)
{
int limit = offset + length;
if (text == null)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.text);
if (offset < 0 || length < 0 || text.Length < limit)
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.offsetLength, ExceptionReason.InvalidOffsetLength);
match = default;
if (length == 0 || !TryGetRoot(text[offset], out int nodeIndex))
return false;
#endif
ref Node node = ref _tree[nodeIndex];
if (node.MatchIndex != -1)
{
@@ -1084,11 +865,7 @@ namespace Markdig.Helpers
return true;
}
#if NETCORE
for (int i = 1; i < text.Length; i++)
#else
for (int i = offset + 1; i < limit; i++)
#endif
{
char c = text[i];
@@ -1131,7 +908,7 @@ namespace Markdig.Helpers
/// <param name="key">The key to locate in this <see cref="CompactPrefixTree{TValue}"/></param>
/// <returns>True if the key is contained in this PrefixTree, false otherwise.</returns>
public bool ContainsKey(string key)
=> TryMatchExact(key, out _);
=> TryMatchExact(key.AsSpan(), out _);
/// <summary>
/// Gets the value associated with the specified key
@@ -1141,7 +918,7 @@ namespace Markdig.Helpers
/// <returns>True if the key is contained in this PrefixTree, false otherwise.</returns>
public bool TryGetValue(string key, out TValue value)
{
bool ret = TryMatchExact(key, out KeyValuePair<string, TValue> match);
bool ret = TryMatchExact(key.AsSpan(), out KeyValuePair<string, TValue> match);
value = match.Value;
return ret;
}
@@ -1175,9 +952,8 @@ namespace Markdig.Helpers
/// </summary>
/// <returns></returns>
public IEnumerator<KeyValuePair<string, TValue>> GetEnumerator() => new Enumerator(_matches);
//#if !LEGACY
//IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_matches);
//#endif
IEnumerator IEnumerable.GetEnumerator() => new Enumerator(_matches);
/// <summary>
/// Enumerates the elements of a <see cref="CompactPrefixTree{TValue}"/>

View File

@@ -1,4 +1,4 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
@@ -46,11 +46,10 @@ namespace Markdig.Helpers
/// </summary>
/// <param name="entity">The entity without <c>&amp;</c> and <c>;</c> symbols, for example, <c>copy</c>.</param>
/// <returns>The unicode character set or <c>null</c> if the entity was not recognized.</returns>
public static string DecodeEntity(string entity)
public static string DecodeEntity(ReadOnlySpan<char> entity)
{
string result;
if (EntityMap.TryGetValue(entity, out result))
return result;
if (EntityMap.TryMatchExact(entity, out KeyValuePair<string, string> result))
return result.Value;
return null;
}
@@ -61,25 +60,31 @@ namespace Markdig.Helpers
/// <returns>The unicode character set or <c>null</c> if the entity was not recognized.</returns>
public static string DecodeEntity(int utf32)
{
if (utf32 < 0 || utf32 > 1114111 || (utf32 >= 55296 && utf32 <= 57343))
if (!CharHelper.IsInInclusiveRange(utf32, 0, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343))
return null;
if (utf32 < 65536)
return char.ToString((char)utf32);
utf32 -= 65536;
return new string(new char[]
return new string(
#if NETCORE
stackalloc
#else
new
#endif
char[]
{
(char)(utf32 / 1024 + 55296),
(char)(utf32 % 1024 + 56320)
(char)((uint)utf32 / 1024 + 55296),
(char)((uint)utf32 % 1024 + 56320)
});
}
#region [ EntityMap ]
#region [ EntityMap ]
/// <summary>
/// Source: http://www.w3.org/html/wg/drafts/html/master/syntax.html#named-character-references
/// </summary>
private static readonly Dictionary<string, string> EntityMap = new Dictionary<string, string>(2125, StringComparer.Ordinal)
private static readonly CompactPrefixTree<string> EntityMap = new CompactPrefixTree<string>(2125, 3385, 3510)
{
{ "Aacute", "\u00C1" },
{ "aacute", "\u00E1" },
@@ -2207,6 +2212,6 @@ namespace Markdig.Helpers
{ "zwj", "\u200D" },
{ "zwnj", "\u200C" }
};
#endregion
#endregion
}
}

View File

@@ -486,8 +486,7 @@ namespace Markdig.Helpers
if (entityNameLength > 0)
{
var namedEntity = new StringSlice(text, entityNameStart, entityNameStart + entityNameLength - 1);
var decoded = EntityHelper.DecodeEntity(namedEntity.ToString());
var decoded = EntityHelper.DecodeEntity(text.AsSpan(entityNameStart, entityNameLength));
if (decoded != null)
{
sb.Append(text, lastPos, searchPos - match - lastPos);

View File

@@ -2,7 +2,7 @@
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Text;
using System;
using Markdig.Helpers;
using Markdig.Syntax;
using Markdig.Syntax.Inlines;
@@ -27,10 +27,7 @@ namespace Markdig.Parsers.Inlines
public static bool TryParse(ref StringSlice slice, out string literal, out int match)
{
literal = null;
int entityNameStart;
int entityNameLength;
int entityValue;
match = HtmlHelper.ScanEntity(slice, out entityValue, out entityNameStart, out entityNameLength);
match = HtmlHelper.ScanEntity(slice, out int entityValue, out int entityNameStart, out int entityNameLength);
if (match == 0)
{
return false;
@@ -38,7 +35,7 @@ namespace Markdig.Parsers.Inlines
if (entityNameLength > 0)
{
literal = EntityHelper.DecodeEntity(new StringSlice(slice.Text, entityNameStart, entityNameStart + entityNameLength - 1).ToString());
literal = EntityHelper.DecodeEntity(slice.Text.AsSpan(entityNameStart, entityNameLength));
}
else if (entityValue >= 0)
{