mirror of
https://github.com/xoofx/markdig.git
synced 2026-02-04 05:44:50 +00:00
Add SearchValues polyfill and use it in CharacterMap
This commit is contained in:
@@ -2,14 +2,10 @@
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using System.Buffers;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
#if NETCOREAPP3_1_OR_GREATER
|
||||
using System.Numerics;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
#endif
|
||||
|
||||
namespace Markdig.Helpers;
|
||||
|
||||
@@ -19,13 +15,9 @@ namespace Markdig.Helpers;
|
||||
/// <typeparam name="T"></typeparam>
|
||||
public sealed class CharacterMap<T> where T : class
|
||||
{
|
||||
#if NETCOREAPP3_1_OR_GREATER
|
||||
private readonly Vector128<byte> _asciiBitmap;
|
||||
#endif
|
||||
|
||||
private readonly T[] asciiMap;
|
||||
private readonly Dictionary<uint, T>? nonAsciiMap;
|
||||
private readonly BoolVector128 isOpeningCharacter;
|
||||
private readonly SearchValues<char> _values;
|
||||
private readonly T[] _asciiMap;
|
||||
private readonly Dictionary<uint, T>? _nonAsciiMap;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CharacterMap{T}"/> class.
|
||||
@@ -35,64 +27,38 @@ public sealed class CharacterMap<T> where T : class
|
||||
public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
|
||||
{
|
||||
if (maps is null) ThrowHelper.ArgumentNullException(nameof(maps));
|
||||
|
||||
var charSet = new HashSet<char>();
|
||||
int maxChar = 0;
|
||||
|
||||
foreach (var map in maps)
|
||||
{
|
||||
var openingChar = map.Key;
|
||||
charSet.Add(openingChar);
|
||||
|
||||
if (openingChar < 128)
|
||||
{
|
||||
maxChar = Math.Max(maxChar, openingChar);
|
||||
|
||||
if (openingChar == 0)
|
||||
{
|
||||
ThrowHelper.ArgumentOutOfRangeException("Null is not a valid opening character.", nameof(maps));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nonAsciiMap ??= new Dictionary<uint, T>();
|
||||
}
|
||||
charSet.Add(map.Key);
|
||||
}
|
||||
|
||||
OpeningCharacters = charSet.ToArray();
|
||||
Array.Sort(OpeningCharacters);
|
||||
|
||||
asciiMap = new T[maxChar + 1];
|
||||
_asciiMap = new T[128];
|
||||
|
||||
foreach (var state in maps)
|
||||
{
|
||||
char openingChar = state.Key;
|
||||
if (openingChar < 128)
|
||||
{
|
||||
asciiMap[openingChar] ??= state.Value;
|
||||
isOpeningCharacter.Set(openingChar);
|
||||
_asciiMap[openingChar] ??= state.Value;
|
||||
}
|
||||
else if (!nonAsciiMap!.ContainsKey(openingChar))
|
||||
else
|
||||
{
|
||||
nonAsciiMap[openingChar] = state.Value;
|
||||
_nonAsciiMap ??= new Dictionary<uint, T>();
|
||||
|
||||
if (!_nonAsciiMap.ContainsKey(openingChar))
|
||||
{
|
||||
_nonAsciiMap[openingChar] = state.Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if NETCOREAPP3_1_OR_GREATER
|
||||
if (nonAsciiMap is null)
|
||||
{
|
||||
long bitmap_0_3 = 0;
|
||||
long bitmap_4_7 = 0;
|
||||
|
||||
foreach (char openingChar in OpeningCharacters)
|
||||
{
|
||||
int position = (openingChar >> 4) | ((openingChar & 0x0F) << 3);
|
||||
if (position < 64) bitmap_0_3 |= 1L << position;
|
||||
else bitmap_4_7 |= 1L << (position - 64);
|
||||
}
|
||||
|
||||
_asciiBitmap = Vector128.Create(bitmap_0_3, bitmap_4_7).AsByte();
|
||||
}
|
||||
#endif
|
||||
_values = SearchValues.Create(OpeningCharacters);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -110,7 +76,7 @@ public sealed class CharacterMap<T> where T : class
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
get
|
||||
{
|
||||
T[] asciiMap = this.asciiMap;
|
||||
T[] asciiMap = _asciiMap;
|
||||
if (openingChar < (uint)asciiMap.Length)
|
||||
{
|
||||
return asciiMap[openingChar];
|
||||
@@ -118,13 +84,12 @@ public sealed class CharacterMap<T> where T : class
|
||||
else
|
||||
{
|
||||
T? map = null;
|
||||
nonAsciiMap?.TryGetValue(openingChar, out map);
|
||||
_nonAsciiMap?.TryGetValue(openingChar, out map);
|
||||
return map;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Searches for an opening character from a registered parser in the specified string.
|
||||
/// </summary>
|
||||
@@ -132,167 +97,20 @@ public sealed class CharacterMap<T> where T : class
|
||||
/// <param name="start">The start.</param>
|
||||
/// <param name="end">The end.</param>
|
||||
/// <returns>Index position within the string of the first opening character found in the specified text; if not found, returns -1</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public int IndexOfOpeningCharacter(string text, int start, int end)
|
||||
{
|
||||
Debug.Assert(text is not null);
|
||||
Debug.Assert(start >= 0 && end >= 0);
|
||||
Debug.Assert(end - start + 1 >= 0);
|
||||
Debug.Assert(end - start + 1 <= text.Length);
|
||||
|
||||
if (nonAsciiMap is null)
|
||||
ReadOnlySpan<char> span = text.AsSpan(start, end - start + 1);
|
||||
|
||||
int index = span.IndexOfAny(_values);
|
||||
|
||||
if (index >= 0)
|
||||
{
|
||||
#if NETCOREAPP3_1_OR_GREATER
|
||||
if (Ssse3.IsSupported && BitConverter.IsLittleEndian)
|
||||
{
|
||||
// Based on http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm
|
||||
// Optimized for sets in the [1, 127] range
|
||||
|
||||
int lengthMinusOne = end - start;
|
||||
int charsToProcessVectorized = lengthMinusOne & ~(2 * Vector128<short>.Count - 1);
|
||||
int finalStart = start + charsToProcessVectorized;
|
||||
|
||||
if (start < finalStart)
|
||||
{
|
||||
ref char textStartRef = ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start);
|
||||
Vector128<byte> bitmap = _asciiBitmap;
|
||||
do
|
||||
{
|
||||
// Load 32 bytes (16 chars) into two Vector128<short>s (chars)
|
||||
// Drop the high byte of each char
|
||||
// Pack the remaining bytes into a single Vector128<byte>
|
||||
Vector128<byte> input = Sse2.PackUnsignedSaturate(
|
||||
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref textStartRef)),
|
||||
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref textStartRef, Vector128<short>.Count))));
|
||||
|
||||
// Extract the higher nibble of each character ((input >> 4) & 0xF)
|
||||
Vector128<byte> higherNibbles = Sse2.And(Sse2.ShiftRightLogical(input.AsUInt16(), 4).AsByte(), Vector128.Create((byte)0xF));
|
||||
|
||||
// Lookup the matching higher nibble for each character based on the lower nibble
|
||||
// PSHUFB will set the result to 0 for any non-ASCII (> 127) character
|
||||
Vector128<byte> bitsets = Ssse3.Shuffle(bitmap, input);
|
||||
|
||||
// Calculate a bitmask (1 << (higherNibble % 8)) for each character
|
||||
Vector128<byte> bitmask = Ssse3.Shuffle(Vector128.Create(0x8040201008040201).AsByte(), higherNibbles);
|
||||
|
||||
// Check which characters are present in the set
|
||||
// We are relying on bitsets being zero for non-ASCII characters
|
||||
Vector128<byte> result = Sse2.And(bitsets, bitmask);
|
||||
|
||||
if (!result.Equals(Vector128<byte>.Zero))
|
||||
{
|
||||
int resultMask = ~Sse2.MoveMask(Sse2.CompareEqual(result, Vector128<byte>.Zero));
|
||||
return start + BitOperations.TrailingZeroCount((uint)resultMask);
|
||||
}
|
||||
|
||||
start += 2 * Vector128<short>.Count;
|
||||
textStartRef = ref Unsafe.Add(ref textStartRef, 2 * Vector128<short>.Count);
|
||||
}
|
||||
while (start != finalStart);
|
||||
}
|
||||
}
|
||||
|
||||
ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
|
||||
for (; start <= end; start++)
|
||||
{
|
||||
if (IntPtr.Size == 4)
|
||||
{
|
||||
uint c = Unsafe.Add(ref textRef, start);
|
||||
if (c < 128 && isOpeningCharacter[c])
|
||||
{
|
||||
return start;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ulong c = Unsafe.Add(ref textRef, start);
|
||||
if (c < 128 && isOpeningCharacter[c])
|
||||
{
|
||||
return start;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
unsafe
|
||||
{
|
||||
fixed (char* pText = text)
|
||||
{
|
||||
for (int i = start; i <= end; i++)
|
||||
{
|
||||
char c = pText[i];
|
||||
if (c < 128 && isOpeningCharacter[c])
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return -1;
|
||||
index += start;
|
||||
}
|
||||
else
|
||||
{
|
||||
return IndexOfOpeningCharacterNonAscii(text, start, end);
|
||||
}
|
||||
}
|
||||
|
||||
private int IndexOfOpeningCharacterNonAscii(string text, int start, int end)
|
||||
{
|
||||
#if NETCOREAPP3_1_OR_GREATER
|
||||
ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
|
||||
for (int i = start; i <= end; i++)
|
||||
{
|
||||
char c = Unsafe.Add(ref textRef, i);
|
||||
if (c < 128 ? isOpeningCharacter[c] : nonAsciiMap!.ContainsKey(c))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
#else
|
||||
unsafe
|
||||
{
|
||||
fixed (char* pText = text)
|
||||
{
|
||||
for (int i = start; i <= end; i++)
|
||||
{
|
||||
char c = pText[i];
|
||||
if (c < 128 ? isOpeningCharacter[c] : nonAsciiMap!.ContainsKey(c))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return -1;
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe struct BoolVector128
|
||||
{
|
||||
private fixed bool values[128];
|
||||
|
||||
public void Set(char c)
|
||||
{
|
||||
Debug.Assert(c < 128);
|
||||
values[c] = true;
|
||||
}
|
||||
|
||||
public readonly bool this[uint c]
|
||||
{
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
get
|
||||
{
|
||||
Debug.Assert(c < 128);
|
||||
return values[c];
|
||||
}
|
||||
}
|
||||
public readonly bool this[ulong c]
|
||||
{
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
get
|
||||
{
|
||||
Debug.Assert(c < 128 && IntPtr.Size == 8);
|
||||
return values[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -36,9 +36,7 @@ public sealed class LiteralInlineParser : InlineParser
|
||||
|
||||
var startPosition = processor.GetSourcePosition(slice.Start, out int line, out int column);
|
||||
|
||||
// Slightly faster to perform our own search for opening characters
|
||||
var nextStart = processor.Parsers.IndexOfOpeningCharacter(text, slice.Start + 1, slice.End);
|
||||
//var nextStart = str.IndexOfAny(processor.SpecialCharacters, slice.Start + 1, slice.Length - 1);
|
||||
int length;
|
||||
|
||||
if (nextStart < 0)
|
||||
|
||||
108
src/Markdig/Polyfills/SearchValues.cs
Normal file
108
src/Markdig/Polyfills/SearchValues.cs
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
#if !NET8_0_OR_GREATER
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace System.Buffers;
|
||||
|
||||
internal static class SearchValues
|
||||
{
|
||||
public static SearchValues<char> Create(ReadOnlySpan<char> values)
|
||||
{
|
||||
return new PreNet8CompatSearchValues(values);
|
||||
}
|
||||
|
||||
public static int IndexOfAny(this ReadOnlySpan<char> span, SearchValues<char> values)
|
||||
{
|
||||
return values.IndexOfAny(span);
|
||||
}
|
||||
|
||||
public static int IndexOfAny(this Span<char> span, SearchValues<char> values)
|
||||
{
|
||||
return values.IndexOfAny(span);
|
||||
}
|
||||
}
|
||||
|
||||
internal abstract class SearchValues<T>
|
||||
{
|
||||
public abstract int IndexOfAny(ReadOnlySpan<char> span);
|
||||
}
|
||||
|
||||
internal sealed class PreNet8CompatSearchValues : SearchValues<char>
|
||||
{
|
||||
private readonly BoolVector128 _ascii;
|
||||
private readonly HashSet<char>? _nonAscii;
|
||||
|
||||
public PreNet8CompatSearchValues(ReadOnlySpan<char> values)
|
||||
{
|
||||
foreach (char c in values)
|
||||
{
|
||||
if (c < 128)
|
||||
{
|
||||
_ascii.Set(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
_nonAscii ??= new HashSet<char>();
|
||||
_nonAscii.Add(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public override int IndexOfAny(ReadOnlySpan<char> span)
|
||||
{
|
||||
if (_nonAscii is null)
|
||||
{
|
||||
for (int i = 0; i < span.Length; i++)
|
||||
{
|
||||
char c = span[i];
|
||||
|
||||
if (c < 128 && _ascii[c])
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < span.Length; i++)
|
||||
{
|
||||
char c = span[i];
|
||||
|
||||
if (c < 128 ? _ascii[c] : _nonAscii.Contains(c))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private unsafe struct BoolVector128
|
||||
{
|
||||
private fixed bool _values[128];
|
||||
|
||||
public void Set(char c)
|
||||
{
|
||||
Debug.Assert(c < 128);
|
||||
_values[c] = true;
|
||||
}
|
||||
|
||||
public readonly bool this[uint c]
|
||||
{
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
get
|
||||
{
|
||||
Debug.Assert(c < 128);
|
||||
return _values[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user