Compare commits

..

40 Commits

Author SHA1 Message Date
Alexandre Mutel
f52ecee0b9 Update packages 2023-12-14 06:24:19 +01:00
Alexandre Mutel
a092ec23b3 Merge pull request #760 from zickb/fix_source_span_calculation_for_linebreak_inline
Fix source span calculation for LineBreakInline
2023-12-14 06:18:09 +01:00
Benni
6f1dce6306 Fix last LineBreakInline source span in multi block scenario 2023-12-14 02:53:02 +01:00
Alexandre Mutel
040a778d87 Merge pull request #759 from Akarinnnnn/fix-757
Make StringLineGroup returns a count limited Enumerator
2023-12-12 19:13:52 +01:00
Fa鸽
2ae2cf9263 Add tests for non-boxed enumerator of StringLineGroup. 2023-12-12 16:18:10 +08:00
Fa鸽
ba1e562d2f Fix complication error of StringLineGroup. 2023-12-12 16:15:55 +08:00
Fa鸽
65a02e44ec Add Enumerator GetEnumerator() for StringLineGroup 2023-12-12 15:45:30 +08:00
Alexandre Mutel
e78833ae30 Update src/Markdig/Helpers/StringLineGroup.cs 2023-12-12 08:00:54 +01:00
Fa鸽
2ab716bec1 Make StringLineGroup returns a count limited Enumerator
Fixes #757, before we return the array enumerator directly, enumerate it will get phantom empty lines.
2023-12-11 19:57:51 +08:00
Alexandre Mutel
feeb1867ce Merge pull request #753 from MihaZupan/perf-nov23-3
A few more perf improvements
2023-11-29 10:00:01 +01:00
Miha Zupan
f3aa7e73e3 Avoid Dictionary lookups in RendererBase.Write 2023-11-26 02:27:26 +01:00
Miha Zupan
dce5572356 Create inlining boundaries in MarkdownParser.Parse 2023-11-26 00:59:12 +01:00
Miha Zupan
dbbabd2221 Avoid redundant work in FencedBlockParserBase.TryContinue 2023-11-26 00:59:06 +01:00
Miha Zupan
22145c2fb0 Add UnicodeUtility helper 2023-11-26 00:58:57 +01:00
Miha Zupan
2517003edc Speed up code block arguments string parsing 2023-11-25 21:36:04 +01:00
Miha Zupan
50a3d02c2c Remove NoInlining from throw helper 2023-11-25 20:15:40 +01:00
Alexandre Mutel
40fb2b8249 Merge pull request #751 from MihaZupan/net8
.NET 8.0 and a few other perf improvements
2023-11-25 10:08:32 +01:00
Miha Zupan
5c54968807 Fix polyfill namespace 2023-11-25 03:25:37 +01:00
Miha Zupan
58ea46d58b Also install 6.0, 7.0 SDKs 2023-11-25 02:33:59 +01:00
Miha Zupan
f557e57ab1 Optimize WriteEscapeUrl 2023-11-24 03:34:45 +01:00
Miha Zupan
87aa32e1bd Optimize WriteEscape 2023-11-24 02:57:12 +01:00
Miha Zupan
4f1cb9da08 Avoid allocating strings for known emphasis character fallbacks 2023-11-24 02:44:47 +01:00
Miha Zupan
5cff880c90 Remove temporary string allocations in AutoIdentifierExtension 2023-11-24 02:44:08 +01:00
Miha Zupan
c7aec822b0 Speed up a few character checks 2023-11-24 02:41:08 +01:00
Miha Zupan
b0bde46cc1 Defer position calculations in LiteralInlineParser 2023-11-24 02:39:16 +01:00
Miha Zupan
7803417e5c Rewrite CodeInline matching to make use of vectorization 2023-11-24 02:37:45 +01:00
Miha Zupan
047c4cbcbb Skip _lineBits read on MarkdownObject creation 2023-11-24 02:28:25 +01:00
Miha Zupan
e4f57ca21e Fix build warnings 2023-11-24 02:26:51 +01:00
Miha Zupan
1f1364e69b Add SearchValues polyfill and use it in CharacterMap 2023-11-24 02:23:55 +01:00
Miha Zupan
4eea9db35c Add .NET 8.0 target 2023-11-24 02:17:47 +01:00
Alexandre Mutel
cce7284b84 Merge pull request #749 from michaelvolz/patch-1
Update Visual Studio editor link
2023-11-14 18:36:26 +01:00
Michael A. Volz (Flynn)
8e1e0b9bf3 Update Visual Studio editor link
Markdown Editor v2 (Visual Studio 2022)

This is a complete rewrite of the original Markdown Editor with tons of fixes, tweeks, and performance improvements.
2023-11-14 17:50:51 +01:00
Alexandre Mutel
7d40bc118b Merge pull request #736 from zickb/better_literal_delimiter_content_string_slice
Better literal delimiter content string slice
2023-08-30 07:29:44 +02:00
Benni
dba94a2371 Add documentation for the new constructor. 2023-08-30 00:55:35 +02:00
Benni
6d75eed3bb Don't break external users of the public constructor 2023-08-30 00:53:05 +02:00
Benni
ccb75fd5f0 Merge branch 'master' into better_literal_delimiter_content_string_slice 2023-08-30 00:50:18 +02:00
Benni
06eb6ba774 Better content string slice of delimiter literals:
instead of creating a new StringSlice only containing the delimiter chars, use the provided StringSlice from the match method with an appropriate start and end index
2023-08-30 00:40:10 +02:00
Alexandre Mutel
f15e9f020e Merge pull request #733 from zickb/fix_source_span_calculation
Fix source span calculation
2023-08-26 16:04:01 +02:00
Benni
a70ca6304f Fix source span of paragraphs in table cells and cleanup InlineProcessor 2023-08-25 08:54:30 +02:00
Benni
d26822be05 fix inline source spans calculation 2023-08-22 23:38:37 +02:00
46 changed files with 1005 additions and 699 deletions

View File

@@ -20,10 +20,13 @@ jobs:
submodules: true
fetch-depth: 0
- name: Install .NET 6.0
- name: Install .NET 6.0, 7.0, and 8.0
uses: actions/setup-dotnet@v1
with:
dotnet-version: '6.0.x'
dotnet-version: |
6.0.x
7.0.x
8.0.x
- name: Build, Test, Pack, Publish
shell: bash

View File

@@ -12,7 +12,7 @@ You can **try Markdig online** and compare it to other implementations on [babel
- **Very fast parser and html renderer** (no-regexp), very lightweight in terms of GC pressure. See benchmarks
- **Abstract Syntax Tree** with precise source code location for syntax tree, useful when building a Markdown editor.
- Checkout [MarkdownEditor for Visual Studio](https://visualstudiogallery.msdn.microsoft.com/eaab33c3-437b-4918-8354-872dfe5d1bfe) powered by Markdig!
- Checkout [Markdown Editor v2 for Visual Studio 2022](https://marketplace.visualstudio.com/items?itemName=MadsKristensen.MarkdownEditor2) powered by Markdig!
- Converter to **HTML**
- Passing more than **600+ tests** from the latest [CommonMark specs (0.30)](http://spec.commonmark.org/)
- Includes all the core elements of CommonMark:

View File

@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<OutputType>Exe</OutputType>
@@ -19,12 +19,12 @@
</Content>
</ItemGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.13.1" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.13.1" />
<PackageReference Include="BenchmarkDotNet" Version="0.13.11" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.13.11" />
<PackageReference Include="CommonMark.NET" Version="0.15.1" />
<PackageReference Include="Markdown" Version="2.2.1" />
<PackageReference Include="MarkdownSharp" Version="2.0.5" />
<PackageReference Include="Microsoft.Diagnostics.Runtime" Version="2.0.226801" />
<PackageReference Include="Microsoft.Diagnostics.Runtime" Version="3.1.456101" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Markdig\Markdig.csproj" />

View File

@@ -0,0 +1,5 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
global using Assert = NUnit.Framework.Legacy.ClassicAssert;

View File

@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<OutputType>Exe</OutputType>
<IsPackable>false</IsPackable>
<ImplicitUsings>enable</ImplicitUsings>
@@ -11,9 +11,9 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.1.0" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="NUnit" Version="4.0.1" />
<PackageReference Include="NUnit3TestAdapter" Version="4.5.0" />
</ItemGroup>
<ItemGroup>

View File

@@ -1,7 +1,8 @@
using System.Text.RegularExpressions;
using Markdig.Extensions.AutoLinks;
using Markdig.Extensions.Tables;
using Markdig.Syntax;
using NUnit.Framework;
namespace Markdig.Tests;
@@ -198,9 +199,9 @@ $$
<div class=""math"">
\begin{align}
\sqrt{37} & = \sqrt{\frac{73^2-1}{12^2}} \\
& = \sqrt{\frac{73^2}{12^2}\cdot\frac{73^2-1}{73^2}} \\
& = \sqrt{\frac{73^2}{12^2}\cdot\frac{73^2-1}{73^2}} \\
& = \sqrt{\frac{73^2}{12^2}}\sqrt{\frac{73^2-1}{73^2}} \\
& = \frac{73}{12}\sqrt{1 - \frac{1}{73^2}} \\
& = \frac{73}{12}\sqrt{1 - \frac{1}{73^2}} \\
& \approx \frac{73}{12}\left(1 - \frac{1}{2\cdot73^2}\right)
\end{align}
</div>
@@ -291,4 +292,29 @@ $$
TestParser.TestSpec("www.foo.bar", "<p><a href=\"http://www.foo.bar\">www.foo.bar</a></p>", pipeline);
TestParser.TestSpec("www.foo.bar", "<p><a href=\"https://www.foo.bar\">www.foo.bar</a></p>", httpsPipeline);
}
[Test]
public void RootInlineHasCorrectSourceSpan()
{
var pipeline = new MarkdownPipelineBuilder().UsePreciseSourceLocation().Build();
pipeline.TrackTrivia = true;
var document = Markdown.Parse("0123456789\n", pipeline);
var expectedSourceSpan = new SourceSpan(0, 10);
Assert.That(((LeafBlock)document.LastChild).Inline.Span == expectedSourceSpan);
}
[Test]
public void RootInlineInTableCellHasCorrectSourceSpan()
{
var pipeline = new MarkdownPipelineBuilder().UsePreciseSourceLocation().UseAdvancedExtensions().Build();
pipeline.TrackTrivia = true;
var document = Markdown.Parse("| a | b |\n| --- | --- |\n| <span id=\"dest\"></span><span id=\"DEST\"></span>*dest*<br/> | \\[in\\] The address of the result of the operation.<br/> |", pipeline);
var paragraph = (ParagraphBlock)((TableCell)((TableRow)((Table)document.LastChild).LastChild).First()).LastChild;
Assert.That(paragraph.Inline.Span.Start == paragraph.Inline.FirstChild.Span.Start);
Assert.That(paragraph.Inline.Span.End == paragraph.Inline.LastChild.Span.End);
}
}

View File

@@ -1,7 +1,10 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using Markdig.Syntax;
using Markdig.Syntax.Inlines;
namespace Markdig.Tests;
[TestFixture]
@@ -18,4 +21,17 @@ public partial class TestEmphasisPlus
{
TestParser.TestSpec("normal ***Strong emphasis*** normal", "<p>normal <em><strong>Strong emphasis</strong></em> normal</p>", "");
}
[Test]
public void OpenEmphasisHasConvenientContentStringSlice()
{
var pipeline = new MarkdownPipelineBuilder().Build();
var document = Markdown.Parse("test*test", pipeline);
var emphasisDelimiterLiteral = (LiteralInline)((ParagraphBlock)document.LastChild).Inline.ElementAt(1);
Assert.That(emphasisDelimiterLiteral.Content.Text == "test*test");
Assert.That(emphasisDelimiterLiteral.Content.Start == 4);
Assert.That(emphasisDelimiterLiteral.Content.End == 4);
}
}

View File

@@ -67,6 +67,60 @@ literal ( 2, 0) 12-21
");
}
[Test]
public void TestParagraphWithEndNewLine()
{
Check("0123456789\n", @"
paragraph ( 0, 0) 0-10
literal ( 0, 0) 0-9
linebreak ( 0,10) 10-10
", trackTrivia: true);
Check("0123456789\r", @"
paragraph ( 0, 0) 0-10
literal ( 0, 0) 0-9
linebreak ( 0,10) 10-10
", trackTrivia: true);
Check("0123456789\r\n", @"
paragraph ( 0, 0) 0-11
literal ( 0, 0) 0-9
linebreak ( 0,10) 10-11
", trackTrivia: true);
}
[Test]
public void TestMultipleParagraphsWithEndNewLine()
{
Check("0123456789\n\n0123456789\n\n", @"
paragraph ( 0, 0) 0-10
literal ( 0, 0) 0-9
linebreak ( 0,10) 10-10
paragraph ( 2, 0) 12-22
literal ( 2, 0) 12-21
linebreak ( 2,10) 22-22
", trackTrivia: true);
Check("0123456789\r\r0123456789\r\r", @"
paragraph ( 0, 0) 0-10
literal ( 0, 0) 0-9
linebreak ( 0,10) 10-10
paragraph ( 2, 0) 12-22
literal ( 2, 0) 12-21
linebreak ( 2,10) 22-22
", trackTrivia: true);
Check("0123456789\r\n\r\n0123456789\r\n\r\n", @"
paragraph ( 0, 0) 0-11
literal ( 0, 0) 0-9
linebreak ( 0,10) 10-11
paragraph ( 2, 0) 14-25
literal ( 2, 0) 14-23
linebreak ( 2,10) 24-25
", trackTrivia: true);
}
[Test]
public void TestEmphasis()
{
@@ -825,9 +879,10 @@ literal ( 8, 2) 77-92
");
}
private static void Check(string text, string expectedResult, string extensions = null)
private static void Check(string text, string expectedResult, string extensions = null, bool trackTrivia = false)
{
var pipelineBuilder = new MarkdownPipelineBuilder().UsePreciseSourceLocation();
pipelineBuilder.TrackTrivia = trackTrivia;
if (extensions != null)
{
pipelineBuilder.Configure(extensions);

View File

@@ -1,3 +1,4 @@
using System.Collections;
using System.Text;
using Markdig.Helpers;
@@ -215,4 +216,27 @@ public class TestStringSliceList
TextAssert.AreEqual("ABC\r\nD\r\n", chars.ToString());
TextAssert.AreEqual("ABC\r\nD", text.ToString());
}
}
[Test]
public void TestStringLineGroup_EnumeratorReturnsRealLines()
{
string str = "A\r\n";
var text = new StringLineGroup(4)
{
new StringSlice(str, NewLine.CarriageReturnLineFeed) { Start = 0, End = 0 }
};
var enumerator = ((IEnumerable)text).GetEnumerator();
Assert.True(enumerator.MoveNext());
StringLine currentLine = (StringLine)enumerator.Current;
TextAssert.AreEqual("A", currentLine.ToString());
Assert.False(enumerator.MoveNext());
var nonBoxedEnumerator = text.GetEnumerator();
Assert.True(nonBoxedEnumerator.MoveNext());
currentLine = (StringLine)nonBoxedEnumerator.Current;
TextAssert.AreEqual("A", currentLine.ToString());
Assert.False(nonBoxedEnumerator.MoveNext());
}
}

View File

@@ -75,8 +75,11 @@ public class TestYamlFrontMatterExtension
ObjectRenderers = new ObjectRendererCollection();
}
#pragma warning disable CS0067 // ObjectWriteBefore/ObjectWriteAfter is never used
public event Action<IMarkdownRenderer, MarkdownObject> ObjectWriteBefore;
public event Action<IMarkdownRenderer, MarkdownObject> ObjectWriteAfter;
#pragma warning restore CS0067
public ObjectRendererCollection ObjectRenderers { get; }
public object Render(MarkdownObject markdownObject)
{

View File

@@ -14,7 +14,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.ApplicationInsights.AspNetCore" Version="2.20.0" />
<PackageReference Include="Microsoft.ApplicationInsights.AspNetCore" Version="2.22.0" />
</ItemGroup>
<ItemGroup>

View File

@@ -20,8 +20,10 @@ namespace Markdig.Extensions.AutoIdentifiers;
public class AutoIdentifierExtension : IMarkdownExtension
{
private const string AutoIdentifierKey = "AutoIdentifier";
private readonly AutoIdentifierOptions options;
private readonly StripRendererCache rendererCache = new StripRendererCache();
private static readonly StripRendererCache _rendererCache = new();
private readonly AutoIdentifierOptions _options;
/// <summary>
/// Initializes a new instance of the <see cref="AutoIdentifierExtension"/> class.
@@ -29,7 +31,7 @@ public class AutoIdentifierExtension : IMarkdownExtension
/// <param name="options">The options.</param>
public AutoIdentifierExtension(AutoIdentifierOptions options)
{
this.options = options;
_options = options;
}
public void Setup(MarkdownPipelineBuilder pipeline)
@@ -68,7 +70,7 @@ public class AutoIdentifierExtension : IMarkdownExtension
}
// If the AutoLink options is set, we register a LinkReferenceDefinition at the document level
if ((options & AutoIdentifierOptions.AutoLink) != 0)
if ((_options & AutoIdentifierOptions.AutoLink) != 0)
{
var headingLine = headingBlock.Lines.Lines[0];
@@ -157,16 +159,17 @@ public class AutoIdentifierExtension : IMarkdownExtension
}
// Use internally a HtmlRenderer to strip links from a heading
var stripRenderer = rendererCache.Get();
var stripRenderer = _rendererCache.Get();
stripRenderer.Render(headingBlock.Inline);
var headingText = stripRenderer.Writer.ToString()!;
rendererCache.Release(stripRenderer);
ReadOnlySpan<char> rawHeadingText = ((FastStringWriter)stripRenderer.Writer).AsSpan();
// Urilize the link
headingText = (options & AutoIdentifierOptions.GitHub) != 0
? LinkHelper.UrilizeAsGfm(headingText)
: LinkHelper.Urilize(headingText, (options & AutoIdentifierOptions.AllowOnlyAscii) != 0);
string headingText = (_options & AutoIdentifierOptions.GitHub) != 0
? LinkHelper.UrilizeAsGfm(rawHeadingText)
: LinkHelper.Urilize(rawHeadingText, (_options & AutoIdentifierOptions.AllowOnlyAscii) != 0);
_rendererCache.Release(stripRenderer);
// If the heading is empty, use the word "section" instead
var baseHeadingId = string.IsNullOrEmpty(headingText) ? "section" : headingText;
@@ -197,7 +200,7 @@ public class AutoIdentifierExtension : IMarkdownExtension
{
protected override HtmlRenderer NewInstance()
{
var headingWriter = new StringWriter();
var headingWriter = new FastStringWriter();
var stripRenderer = new HtmlRenderer(headingWriter)
{
// Set to false both to avoid having any HTML tags in the output
@@ -209,7 +212,9 @@ public class AutoIdentifierExtension : IMarkdownExtension
protected override void Reset(HtmlRenderer instance)
{
instance.Reset();
instance.ResetInternal();
((FastStringWriter)instance.Writer).Reset();
}
}
}

View File

@@ -443,6 +443,11 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
{
var paragraph = (ParagraphBlock) cell[0];
state.PostProcessInlines(postInlineProcessorIndex + 1, paragraph.Inline, null, true);
if (paragraph.Inline?.LastChild is not null)
{
paragraph.Inline.Span.End = paragraph.Inline.LastChild.Span.End;
paragraph.UpdateSpanEnd(paragraph.Inline.LastChild.Span.End);
}
}
// Clear cells when we are done
@@ -520,7 +525,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
// Create aligns until we may have a header row
aligns ??= new List<TableColumnDefinition>();
aligns.Add(new TableColumnDefinition() { Alignment = align });
// If this is the last delimiter, we need to check the right side of the `|` delimiter

View File

@@ -22,7 +22,7 @@ internal readonly struct BlockWrapper : IEquatable<BlockWrapper>
public bool Equals(BlockWrapper other) => ReferenceEquals(Block, other.Block);
public override bool Equals(object obj) => Block.Equals(obj);
public override bool Equals(object? obj) => Block.Equals(obj);
public override int GetHashCode() => Block.GetHashCode();
}

View File

@@ -30,15 +30,17 @@ public static class CharHelper
{ 'I', 1 }, { 'V', 5 }, { 'X', 10 }
};
private static readonly char[] punctuationExceptions = { '', '-', '†', '‡' };
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsPunctuationException(char c) =>
c is '' or '-' or '†' or '‡';
public static void CheckOpenCloseDelimiter(char pc, char c, bool enableWithinWord, out bool canOpen, out bool canClose)
{
pc.CheckUnicodeCategory(out bool prevIsWhiteSpace, out bool prevIsPunctuation);
c.CheckUnicodeCategory(out bool nextIsWhiteSpace, out bool nextIsPunctuation);
var prevIsExcepted = prevIsPunctuation && punctuationExceptions.Contains(pc);
var nextIsExcepted = nextIsPunctuation && punctuationExceptions.Contains(c);
var prevIsExcepted = prevIsPunctuation && IsPunctuationException(pc);
var nextIsExcepted = nextIsPunctuation && IsPunctuationException(c);
// A left-flanking delimiter run is a delimiter run that is
// (1) not followed by Unicode whitespace, and either
@@ -126,19 +128,6 @@ public static class CharHelper
return (column & (TabSize - 1)) != 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool Contains(this char[] charList, char c)
{
foreach (char ch in charList)
{
if (ch == c)
{
return true;
}
}
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsWhitespace(this char c)
{
@@ -178,7 +167,7 @@ public static class CharHelper
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsControl(this char c)
{
return c < ' ' || char.IsControl(c);
return char.IsControl(c);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -211,15 +200,17 @@ public static class CharHelper
{
// A Unicode punctuation character is an ASCII punctuation character
// or anything in the general Unicode categories Pc, Pd, Pe, Pf, Pi, Po, or Ps.
const int PunctuationCategoryMask =
1 << (int)UnicodeCategory.ConnectorPunctuation |
1 << (int)UnicodeCategory.DashPunctuation |
1 << (int)UnicodeCategory.OpenPunctuation |
1 << (int)UnicodeCategory.ClosePunctuation |
1 << (int)UnicodeCategory.InitialQuotePunctuation |
1 << (int)UnicodeCategory.FinalQuotePunctuation |
1 << (int)UnicodeCategory.OtherPunctuation;
space = false;
UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(c);
punctuation = category == UnicodeCategory.ConnectorPunctuation
|| category == UnicodeCategory.DashPunctuation
|| category == UnicodeCategory.OpenPunctuation
|| category == UnicodeCategory.ClosePunctuation
|| category == UnicodeCategory.InitialQuotePunctuation
|| category == UnicodeCategory.FinalQuotePunctuation
|| category == UnicodeCategory.OtherPunctuation;
punctuation = (PunctuationCategoryMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(c))) != 0;
}
}
@@ -236,14 +227,16 @@ public static class CharHelper
}
else
{
var category = CharUnicodeInfo.GetUnicodeCategory(c);
return category == UnicodeCategory.ConnectorPunctuation
|| category == UnicodeCategory.DashPunctuation
|| category == UnicodeCategory.OpenPunctuation
|| category == UnicodeCategory.ClosePunctuation
|| category == UnicodeCategory.InitialQuotePunctuation
|| category == UnicodeCategory.FinalQuotePunctuation
|| category == UnicodeCategory.OtherPunctuation;
const int PunctuationCategoryMask =
1 << (int)UnicodeCategory.ConnectorPunctuation |
1 << (int)UnicodeCategory.DashPunctuation |
1 << (int)UnicodeCategory.OpenPunctuation |
1 << (int)UnicodeCategory.ClosePunctuation |
1 << (int)UnicodeCategory.InitialQuotePunctuation |
1 << (int)UnicodeCategory.FinalQuotePunctuation |
1 << (int)UnicodeCategory.OtherPunctuation;
return (PunctuationCategoryMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(c))) != 0;
}
}

View File

@@ -2,14 +2,10 @@
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Buffers;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
#if NETCOREAPP3_1_OR_GREATER
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
namespace Markdig.Helpers;
@@ -19,13 +15,9 @@ namespace Markdig.Helpers;
/// <typeparam name="T"></typeparam>
public sealed class CharacterMap<T> where T : class
{
#if NETCOREAPP3_1_OR_GREATER
private readonly Vector128<byte> _asciiBitmap;
#endif
private readonly T[] asciiMap;
private readonly Dictionary<uint, T>? nonAsciiMap;
private readonly BoolVector128 isOpeningCharacter;
private readonly SearchValues<char> _values;
private readonly T[] _asciiMap;
private readonly Dictionary<uint, T>? _nonAsciiMap;
/// <summary>
/// Initializes a new instance of the <see cref="CharacterMap{T}"/> class.
@@ -35,64 +27,38 @@ public sealed class CharacterMap<T> where T : class
public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
{
if (maps is null) ThrowHelper.ArgumentNullException(nameof(maps));
var charSet = new HashSet<char>();
int maxChar = 0;
foreach (var map in maps)
{
var openingChar = map.Key;
charSet.Add(openingChar);
if (openingChar < 128)
{
maxChar = Math.Max(maxChar, openingChar);
if (openingChar == 0)
{
ThrowHelper.ArgumentOutOfRangeException("Null is not a valid opening character.", nameof(maps));
}
}
else
{
nonAsciiMap ??= new Dictionary<uint, T>();
}
charSet.Add(map.Key);
}
OpeningCharacters = charSet.ToArray();
Array.Sort(OpeningCharacters);
asciiMap = new T[maxChar + 1];
_asciiMap = new T[128];
foreach (var state in maps)
{
char openingChar = state.Key;
if (openingChar < 128)
{
asciiMap[openingChar] ??= state.Value;
isOpeningCharacter.Set(openingChar);
_asciiMap[openingChar] ??= state.Value;
}
else if (!nonAsciiMap!.ContainsKey(openingChar))
else
{
nonAsciiMap[openingChar] = state.Value;
_nonAsciiMap ??= new Dictionary<uint, T>();
if (!_nonAsciiMap.ContainsKey(openingChar))
{
_nonAsciiMap[openingChar] = state.Value;
}
}
}
#if NETCOREAPP3_1_OR_GREATER
if (nonAsciiMap is null)
{
long bitmap_0_3 = 0;
long bitmap_4_7 = 0;
foreach (char openingChar in OpeningCharacters)
{
int position = (openingChar >> 4) | ((openingChar & 0x0F) << 3);
if (position < 64) bitmap_0_3 |= 1L << position;
else bitmap_4_7 |= 1L << (position - 64);
}
_asciiBitmap = Vector128.Create(bitmap_0_3, bitmap_4_7).AsByte();
}
#endif
_values = SearchValues.Create(OpeningCharacters);
}
/// <summary>
@@ -110,7 +76,7 @@ public sealed class CharacterMap<T> where T : class
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
T[] asciiMap = this.asciiMap;
T[] asciiMap = _asciiMap;
if (openingChar < (uint)asciiMap.Length)
{
return asciiMap[openingChar];
@@ -118,13 +84,12 @@ public sealed class CharacterMap<T> where T : class
else
{
T? map = null;
nonAsciiMap?.TryGetValue(openingChar, out map);
_nonAsciiMap?.TryGetValue(openingChar, out map);
return map;
}
}
}
/// <summary>
/// Searches for an opening character from a registered parser in the specified string.
/// </summary>
@@ -132,167 +97,20 @@ public sealed class CharacterMap<T> where T : class
/// <param name="start">The start.</param>
/// <param name="end">The end.</param>
/// <returns>Index position within the string of the first opening character found in the specified text; if not found, returns -1</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int IndexOfOpeningCharacter(string text, int start, int end)
{
Debug.Assert(text is not null);
Debug.Assert(start >= 0 && end >= 0);
Debug.Assert(end - start + 1 >= 0);
Debug.Assert(end - start + 1 <= text.Length);
if (nonAsciiMap is null)
ReadOnlySpan<char> span = text.AsSpan(start, end - start + 1);
int index = span.IndexOfAny(_values);
if (index >= 0)
{
#if NETCOREAPP3_1_OR_GREATER
if (Ssse3.IsSupported && BitConverter.IsLittleEndian)
{
// Based on http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm
// Optimized for sets in the [1, 127] range
int lengthMinusOne = end - start;
int charsToProcessVectorized = lengthMinusOne & ~(2 * Vector128<short>.Count - 1);
int finalStart = start + charsToProcessVectorized;
if (start < finalStart)
{
ref char textStartRef = ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start);
Vector128<byte> bitmap = _asciiBitmap;
do
{
// Load 32 bytes (16 chars) into two Vector128<short>s (chars)
// Drop the high byte of each char
// Pack the remaining bytes into a single Vector128<byte>
Vector128<byte> input = Sse2.PackUnsignedSaturate(
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref textStartRef)),
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref textStartRef, Vector128<short>.Count))));
// Extract the higher nibble of each character ((input >> 4) & 0xF)
Vector128<byte> higherNibbles = Sse2.And(Sse2.ShiftRightLogical(input.AsUInt16(), 4).AsByte(), Vector128.Create((byte)0xF));
// Lookup the matching higher nibble for each character based on the lower nibble
// PSHUFB will set the result to 0 for any non-ASCII (> 127) character
Vector128<byte> bitsets = Ssse3.Shuffle(bitmap, input);
// Calculate a bitmask (1 << (higherNibble % 8)) for each character
Vector128<byte> bitmask = Ssse3.Shuffle(Vector128.Create(0x8040201008040201).AsByte(), higherNibbles);
// Check which characters are present in the set
// We are relying on bitsets being zero for non-ASCII characters
Vector128<byte> result = Sse2.And(bitsets, bitmask);
if (!result.Equals(Vector128<byte>.Zero))
{
int resultMask = ~Sse2.MoveMask(Sse2.CompareEqual(result, Vector128<byte>.Zero));
return start + BitOperations.TrailingZeroCount((uint)resultMask);
}
start += 2 * Vector128<short>.Count;
textStartRef = ref Unsafe.Add(ref textStartRef, 2 * Vector128<short>.Count);
}
while (start != finalStart);
}
}
ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
for (; start <= end; start++)
{
if (IntPtr.Size == 4)
{
uint c = Unsafe.Add(ref textRef, start);
if (c < 128 && isOpeningCharacter[c])
{
return start;
}
}
else
{
ulong c = Unsafe.Add(ref textRef, start);
if (c < 128 && isOpeningCharacter[c])
{
return start;
}
}
}
#else
unsafe
{
fixed (char* pText = text)
{
for (int i = start; i <= end; i++)
{
char c = pText[i];
if (c < 128 && isOpeningCharacter[c])
{
return i;
}
}
}
}
#endif
return -1;
index += start;
}
else
{
return IndexOfOpeningCharacterNonAscii(text, start, end);
}
}
private int IndexOfOpeningCharacterNonAscii(string text, int start, int end)
{
#if NETCOREAPP3_1_OR_GREATER
ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
for (int i = start; i <= end; i++)
{
char c = Unsafe.Add(ref textRef, i);
if (c < 128 ? isOpeningCharacter[c] : nonAsciiMap!.ContainsKey(c))
{
return i;
}
}
#else
unsafe
{
fixed (char* pText = text)
{
for (int i = start; i <= end; i++)
{
char c = pText[i];
if (c < 128 ? isOpeningCharacter[c] : nonAsciiMap!.ContainsKey(c))
{
return i;
}
}
}
}
#endif
return -1;
return index;
}
}
internal unsafe struct BoolVector128
{
private fixed bool values[128];
public void Set(char c)
{
Debug.Assert(c < 128);
values[c] = true;
}
public readonly bool this[uint c]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
Debug.Assert(c < 128);
return values[c];
}
}
public readonly bool this[ulong c]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
Debug.Assert(c < 128 && IntPtr.Size == 8);
return values[c];
}
}
}

View File

@@ -31,6 +31,8 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
using System.Text;
namespace Markdig.Helpers;
/// <summary>
@@ -57,41 +59,31 @@ public static class EntityHelper
/// <returns>The unicode character set or <c>null</c> if the entity was not recognized.</returns>
public static string DecodeEntity(int utf32)
{
if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343))
if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
return CharHelper.ReplacementCharString;
if (utf32 < 65536)
if (UnicodeUtility.IsBmpCodePoint((uint)utf32))
return char.ToString((char)utf32);
utf32 -= 65536;
return new string(
#if NETSTANDARD2_1_OR_GREATER || NETCOREAPP3_1_OR_GREATER
stackalloc
#else
new
#endif
char[]
{
(char)((uint)utf32 / 1024 + 55296),
(char)((uint)utf32 % 1024 + 56320)
});
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low);
return new string([high, low]);
}
internal static void DecodeEntity(int utf32, ref ValueStringBuilder sb)
{
if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343))
if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
{
sb.Append(CharHelper.ReplacementChar);
}
else if (utf32 < 65536)
else if (UnicodeUtility.IsBmpCodePoint((uint)utf32))
{
sb.Append((char)utf32);
}
else
{
utf32 -= 65536;
sb.Append((char)((uint)utf32 / 1024 + 55296));
sb.Append((char)((uint)utf32 % 1024 + 56320));
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low);
sb.Append(high);
sb.Append(low);
}
}

View File

@@ -278,8 +278,7 @@ internal sealed class FastStringWriter : TextWriter
_pos = 0;
}
public override string ToString()
{
return _chars.AsSpan(0, _pos).ToString();
}
public override string ToString() => AsSpan().ToString();
public ReadOnlySpan<char> AsSpan() => _chars.AsSpan(0, _pos);
}

View File

@@ -0,0 +1,27 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Runtime.CompilerServices;
namespace Markdig.Helpers;
// Based on https://github.com/dotnet/runtime/blob/main/src/libraries/Common/src/System/HexConverter.cs
internal static class HexConverter
{
public enum Casing : uint
{
Upper = 0,
Lower = 0x2020U,
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void ToCharsBuffer(byte value, Span<char> buffer, int startingIndex = 0, Casing casing = Casing.Upper)
{
uint difference = (((uint)value & 0xF0U) << 4) + ((uint)value & 0x0FU) - 0x8989U;
uint packedResult = ((((uint)(-(int)difference) & 0x7070U) >> 4) + difference + 0xB9B9U) | (uint)casing;
buffer[startingIndex + 1] = (char)(packedResult & 0xFF);
buffer[startingIndex] = (char)(packedResult >> 8);
}
}

View File

@@ -53,7 +53,7 @@ public struct LineReader
else
{
#if NETCOREAPP3_1_OR_GREATER
ReadOnlySpan<char> span = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.Add(ref Unsafe.AsRef(text.GetPinnableReference()), sourcePosition), end - sourcePosition);
ReadOnlySpan<char> span = MemoryMarshal.CreateReadOnlySpan(ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), sourcePosition), end - sourcePosition);
#else
ReadOnlySpan<char> span = text.AsSpan(sourcePosition);
#endif
@@ -65,7 +65,7 @@ public struct LineReader
newSourcePosition = end + 1;
#if NETCOREAPP3_1_OR_GREATER
if (Unsafe.Add(ref Unsafe.AsRef(text.GetPinnableReference()), end) == '\r')
if (Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), end) == '\r')
#else
if ((uint)end < (uint)text.Length && text[end] == '\r')
#endif

View File

@@ -19,6 +19,11 @@ public static class LinkHelper
}
public static string Urilize(string headingText, bool allowOnlyAscii, bool keepOpeningDigits = false)
{
return Urilize(headingText.AsSpan(), allowOnlyAscii, keepOpeningDigits);
}
public static string Urilize(ReadOnlySpan<char> headingText, bool allowOnlyAscii, bool keepOpeningDigits = false)
{
var headingBuffer = new ValueStringBuilder(stackalloc char[ValueStringBuilder.StackallocThreshold]);
bool hasLetter = keepOpeningDigits && headingText.Length > 0 && char.IsLetterOrDigit(headingText[0]);
@@ -95,15 +100,24 @@ public static class LinkHelper
}
public static string UrilizeAsGfm(string headingText)
{
return UrilizeAsGfm(headingText.AsSpan());
}
public static string UrilizeAsGfm(ReadOnlySpan<char> headingText)
{
// Following https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
var headingBuffer = new ValueStringBuilder(stackalloc char[ValueStringBuilder.StackallocThreshold]);
for (int i = 0; i < headingText.Length; i++)
{
var c = headingText[i];
if (char.IsLetterOrDigit(c) || c == ' ' || c == '-' || c == '_')
if (char.IsLetterOrDigit(c) || c == '-' || c == '_')
{
headingBuffer.Append(c == ' ' ? '-' : char.ToLowerInvariant(c));
headingBuffer.Append(char.ToLowerInvariant(c));
}
else if (c == ' ')
{
headingBuffer.Append('-');
}
}
return headingBuffer.ToString();

View File

@@ -36,7 +36,7 @@ public abstract class ObjectCache<T> where T : class
/// <returns></returns>
public T Get()
{
if (_builders.TryDequeue(out T instance))
if (_builders.TryDequeue(out T? instance))
{
return instance;
}

View File

@@ -187,9 +187,38 @@ public struct StringLineGroup : IEnumerable
}
}
IEnumerator IEnumerable.GetEnumerator()
public struct Enumerator : IEnumerator
{
return Lines.GetEnumerator();
private readonly StringLineGroup _parent;
private int _index;
public Enumerator(StringLineGroup parent)
{
_parent = parent;
_index = -1;
}
public object Current => _parent.Lines[_index];
public bool MoveNext()
{
return ++_index < _parent.Count;
}
public void Reset()
{
_index = -1;
}
}
public Enumerator GetEnumerator()
{
return new Enumerator(this);
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
private void IncreaseCapacity()
@@ -444,4 +473,4 @@ public struct StringLineGroup : IEnumerable
public readonly int End;
}
}
}

View File

@@ -475,7 +475,7 @@ public struct StringSlice : ICharIterator
}
#if NETCOREAPP3_1_OR_GREATER
return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.Add(ref Unsafe.AsRef(text.GetPinnableReference()), start), length);
return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start), length);
#else
return text.AsSpan(start, length);
#endif

View File

@@ -80,7 +80,7 @@ internal static class ThrowHelper
if (depth > limit)
DepthLimitExceeded();
[MethodImpl(MethodImplOptions.NoInlining)]
[DoesNotReturn]
static void DepthLimitExceeded() => throw new ArgumentException("Markdown elements in the input are too deeply nested - depth limit exceeded. Input is most likely not sensible or is a very large table.");
}

View File

@@ -0,0 +1,30 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Diagnostics;
using System.Runtime.CompilerServices;
namespace System.Text;
// Based on https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeUtility.cs
internal static class UnicodeUtility
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsBmpCodePoint(uint value) => value <= 0xFFFFu;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsValidUnicodeScalar(uint value)
{
return ((value - 0x110000u) ^ 0xD800u) >= 0xFFEF0800u;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void GetUtf16SurrogatesFromSupplementaryPlaneScalar(uint value, out char highSurrogateCodePoint, out char lowSurrogateCodePoint)
{
Debug.Assert(IsValidUnicodeScalar(value) && IsBmpCodePoint(value));
highSurrogateCodePoint = (char)((value + ((0xD800u - 0x40u) << 10)) >> 10);
lowSurrogateCodePoint = (char)((value & 0x3FFu) + 0xDC00u);
}
}

View File

@@ -5,7 +5,7 @@
<Copyright>Alexandre Mutel</Copyright>
<NeutralLanguage>en-US</NeutralLanguage>
<Authors>Alexandre Mutel</Authors>
<TargetFrameworks>net462;netstandard2.0;netstandard2.1;net6.0</TargetFrameworks>
<TargetFrameworks>net462;netstandard2.0;netstandard2.1;net6.0;net8.0</TargetFrameworks>
<CheckEolTargetFramework>false</CheckEolTargetFramework>
<PackageTags>Markdown CommonMark md html md2html</PackageTags>
<PackageReleaseNotes>https://github.com/lunet-io/markdig/blob/master/changelog.md</PackageReleaseNotes>
@@ -14,7 +14,7 @@
<PackageIcon>markdig.png</PackageIcon>
<PackageProjectUrl>https://github.com/lunet-io/markdig</PackageProjectUrl>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<LangVersion>10</LangVersion>
<LangVersion>12</LangVersion>
<Nullable>enable</Nullable>
<NoWarn>$(NoWarn);CS1591</NoWarn>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
@@ -24,22 +24,18 @@
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
</PropertyGroup>
<ItemGroup Condition=" '$(TargetFramework)' == 'net462' ">
<PackageReference Include="System.Memory" Version="4.5.4" />
<ItemGroup Condition=" '$(TargetFramework)' == 'net462' OR '$(TargetFramework)' == 'netstandard2.0'">
<PackageReference Include="System.Memory" Version="4.5.5" />
</ItemGroup>
<ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.0' ">
<PackageReference Include="System.Memory" Version="4.5.4" />
</ItemGroup>
<ItemGroup>
<None Include="../../img/markdig.png" Pack="true" PackagePath="" />
<None Include="../../readme.md" Pack="true" PackagePath="/"/>
<PackageReference Include="MinVer" Version="3.1.0">
<PackageReference Include="MinVer" Version="4.3.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.*" PrivateAssets="All"/>
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="8.0.*" PrivateAssets="All"/>
</ItemGroup>
<Target Name="PatchVersion" AfterTargets="MinVer">

View File

@@ -19,16 +19,10 @@ namespace Markdig;
/// </summary>
public static class Markdown
{
public static string Version
{
get
{
if (_Version == null)
_Version = ((AssemblyFileVersionAttribute)typeof(Markdown).Assembly.GetCustomAttributes(typeof(AssemblyFileVersionAttribute), false).FirstOrDefault())?.Version ?? "Unknown";
return _Version;
}
}
private static string? _Version;
public static string Version =>
s_version ??= typeof(Markdown).Assembly.GetCustomAttribute<AssemblyFileVersionAttribute>()?.Version ?? "Unknown";
private static string? s_version;
internal static readonly MarkdownPipeline DefaultPipeline = new MarkdownPipelineBuilder().Build();
private static readonly MarkdownPipeline _defaultTrackTriviaPipeline = new MarkdownPipelineBuilder().EnableTrackTrivia().Build();

View File

@@ -40,7 +40,8 @@ public abstract class FencedBlockParserBase : BlockParser, IAttributesParseable
/// <seealso cref="BlockParser" />
public abstract class FencedBlockParserBase<T> : FencedBlockParserBase where T : Block, IFencedBlock
{
private static readonly TransformedStringCache _infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString));
private static readonly TransformedStringCache s_infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString));
private static readonly TransformedStringCache s_argumentsStringCache = new(static argumentsString => HtmlHelper.Unescape(argumentsString));
private TransformedStringCache? _infoPrefixCache;
/// <summary>
@@ -176,7 +177,7 @@ public abstract class FencedBlockParserBase<T> : FencedBlockParserBase where T :
end:
fenced.TriviaAfterFencedChar = afterFence;
fenced.Info = _infoStringCache.Get(info.AsSpan());
fenced.Info = s_infoStringCache.Get(info.AsSpan());
fenced.UnescapedInfo = info;
fenced.TriviaAfterInfo = afterInfo;
fenced.Arguments = HtmlHelper.Unescape(arg.ToString());
@@ -197,71 +198,47 @@ public abstract class FencedBlockParserBase<T> : FencedBlockParserBase where T :
/// <returns><c>true</c> if parsing of the line is successfull; <c>false</c> otherwise</returns>
public static bool DefaultInfoParser(BlockProcessor state, ref StringSlice line, IFencedBlock fenced, char openingCharacter)
{
// An info string cannot contain any backticks (unless it is a tilde block)
int firstSpace = -1;
if (openingCharacter == '`')
ReadOnlySpan<char> lineSpan = line.AsSpan();
if (!lineSpan.IsEmpty)
{
for (int i = line.Start; i <= line.End; i++)
if (openingCharacter == '`')
{
char c = line.Text[i];
if (c == '`')
firstSpace = lineSpan.IndexOfAny(' ', '\t', '`');
// An info string cannot contain any backticks (unless it is a tilde block)
if (firstSpace >= 0 && lineSpan.Slice(firstSpace).Contains('`'))
{
return false;
}
if (firstSpace < 0 && c.IsSpaceOrTab())
{
firstSpace = i;
}
}
}
else
{
for (int i = line.Start; i <= line.End; i++)
else
{
if (line.Text[i].IsSpaceOrTab())
{
firstSpace = i;
break;
}
firstSpace = lineSpan.IndexOfAny(' ', '\t');
}
}
StringSlice infoStringSlice;
string? argString = null;
if (firstSpace > 0)
if (firstSpace >= 0)
{
firstSpace += line.Start;
infoStringSlice = new StringSlice(line.Text, line.Start, firstSpace - 1);
// Skip any spaces after info string
firstSpace++;
while (firstSpace <= line.End)
{
char c = line[firstSpace];
if (c.IsSpaceOrTab())
{
firstSpace++;
}
else
{
break;
}
}
var argStringSlice = new StringSlice(line.Text, firstSpace, line.End);
argStringSlice.Trim();
argString = argStringSlice.ToString();
fenced.Arguments = s_argumentsStringCache.Get(argStringSlice.AsSpan());
}
else
{
infoStringSlice = line;
fenced.Arguments = string.Empty;
}
infoStringSlice.Trim();
fenced.Info = _infoStringCache.Get(infoStringSlice.AsSpan());
fenced.Arguments = HtmlHelper.Unescape(argString);
fenced.Info = s_infoStringCache.Get(infoStringSlice.AsSpan());
return true;
}
@@ -303,17 +280,19 @@ public abstract class FencedBlockParserBase<T> : FencedBlockParserBase where T :
// Try to parse any attached attributes
TryParseAttributes?.Invoke(processor, ref line, fenced);
// If the info parser was not successfull, early exit
// If the info parser was not successful, early exit
if (InfoParser != null && !InfoParser(processor, ref line, fenced, matchChar))
{
return BlockState.None;
}
// Add the language as an attribute by default
if (!string.IsNullOrEmpty(fenced.Info))
string? info = fenced.Info;
if (!string.IsNullOrEmpty(info))
{
Debug.Assert(_infoPrefixCache is not null || InfoPrefix is null);
string infoWithPrefix = _infoPrefixCache?.Get(fenced.Info!) ?? fenced.Info!;
string infoWithPrefix = _infoPrefixCache?.Get(info!) ?? info!;
fenced.GetAttributes().AddClass(infoWithPrefix);
}
@@ -329,34 +308,32 @@ public abstract class FencedBlockParserBase<T> : FencedBlockParserBase where T :
public override BlockState TryContinue(BlockProcessor processor, Block block)
{
var fence = (IFencedBlock)block;
var openingCount = fence.OpeningFencedCharCount;
// Match if we have a closing fence
var line = processor.Line;
var sourcePosition = processor.Start;
var closingCount = line.CountAndSkipChar(fence.FencedChar);
var diff = openingCount - closingCount;
char c = line.CurrentChar;
var lastFenceCharPosition = processor.Start + closingCount;
// If we have a closing fence, close it and discard the current line
// The line must contain only fence opening character followed only by whitespaces.
var startBeforeTrim = line.Start;
var endBeforeTrim = line.End;
var trimmed = line.TrimEnd();
if (diff <= 0 && !processor.IsCodeIndent && (c == '\0' || c.IsWhitespace()) && trimmed)
if (fence.OpeningFencedCharCount <= closingCount &&
!processor.IsCodeIndent &&
(c == '\0' || c.IsWhitespace()) &&
line.TrimEnd())
{
block.UpdateSpanEnd(startBeforeTrim - 1);
var fencedBlock = (IFencedBlock)block;
fencedBlock.ClosingFencedCharCount = closingCount;
fence.ClosingFencedCharCount = closingCount;
if (processor.TrackTrivia)
{
fencedBlock.NewLine = processor.Line.NewLine;
fencedBlock.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1);
fencedBlock.TriviaAfter = new StringSlice(processor.Line.Text, lastFenceCharPosition, endBeforeTrim);
fence.NewLine = line.NewLine;
fence.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1);
fence.TriviaAfter = new StringSlice(line.Text, processor.Start + closingCount, processor.Line.End);
}
// Don't keep the last line

View File

@@ -225,6 +225,7 @@ public class InlineProcessor
previousLineIndexForSliceOffset = 0;
lineOffsets.Clear();
var text = leafBlock.Lines.ToSlice(lineOffsets);
var textEnd = text.End;
leafBlock.Lines.Release();
int previousStart = -1;
@@ -319,7 +320,8 @@ public class InlineProcessor
var newLine = leafBlock.NewLine;
if (newLine != NewLine.None)
{
leafBlock.Inline.AppendChild(new LineBreakInline { NewLine = newLine });
var position = GetSourcePosition(textEnd + 1, out int line, out int column);
leafBlock.Inline.AppendChild(new LineBreakInline { NewLine = newLine, Line = line, Column = column, Span = { Start = position, End = position + (newLine == NewLine.CarriageReturnLineFeed ? 1 : 0) } });
}
}
}
@@ -342,6 +344,12 @@ public class InlineProcessor
// DebugLog.WriteLine("** Dump after Emphasis:");
// leafBlock.Inline.DumpTo(DebugLog);
//}
if (leafBlock.Inline.LastChild is not null)
{
leafBlock.Inline.Span.End = leafBlock.Inline.LastChild.Span.End;
leafBlock.UpdateSpanEnd(leafBlock.Inline.Span.End);
}
}
public void PostProcessInlines(int startingIndex, Inline? root, Inline? lastChild, bool isFinalProcessing)

View File

@@ -26,22 +26,16 @@ public class CodeInlineParser : InlineParser
public override bool Match(InlineProcessor processor, ref StringSlice slice)
{
var match = slice.CurrentChar;
char match = slice.CurrentChar;
if (slice.PeekCharExtra(-1) == match)
{
return false;
}
var startPosition = slice.Start;
Debug.Assert(match is not ('\r' or '\n'));
// Match the opened sticks
int openSticks = slice.CountAndSkipChar(match);
int contentStart = slice.Start;
int closeSticks = 0;
char c = slice.CurrentChar;
var builder = new ValueStringBuilder(stackalloc char[ValueStringBuilder.StackallocThreshold]);
// A backtick string is a string of one or more backtick characters (`) that is neither preceded nor followed by a backtick.
// A code span begins with a backtick string and ends with a backtick string of equal length.
@@ -54,91 +48,106 @@ public class CodeInlineParser : InlineParser
// This allows you to include code that begins or ends with backtick characters, which must be separated by
// whitespace from the opening or closing backtick strings.
bool allSpace = true;
bool containsNewLine = false;
var contentEnd = -1;
ReadOnlySpan<char> span = slice.AsSpan();
bool containsNewLines = false;
while (c != '\0')
while (true)
{
// Transform '\n' into a single space
if (c == '\n')
int i = span.IndexOfAny('\r', '\n', match);
if ((uint)i >= (uint)span.Length)
{
containsNewLine = true;
c = ' ';
}
else if (c == '\r')
{
containsNewLine = true;
slice.SkipChar();
c = slice.CurrentChar;
continue;
// We got to the end of the input before seeing the match character. CodeInline can't match here.
return false;
}
if (c == match)
int closeSticks = 0;
while ((uint)i < (uint)span.Length && span[i] == match)
{
contentEnd = slice.Start;
closeSticks = slice.CountAndSkipChar(match);
if (openSticks == closeSticks)
{
break;
}
allSpace = false;
builder.Append(match, closeSticks);
c = slice.CurrentChar;
closeSticks++;
i++;
}
else
span = span.Slice(i);
if (openSticks == closeSticks)
{
builder.Append(c);
if (c != ' ')
{
allSpace = false;
}
c = slice.NextChar();
break;
}
else if (closeSticks == 0)
{
containsNewLines = true;
span = span.Slice(1);
}
}
bool isMatching = false;
if (closeSticks == openSticks)
ReadOnlySpan<char> rawContent = slice.AsSpan().Slice(0, slice.Length - span.Length - openSticks);
var content = containsNewLines
? new LazySubstring(ReplaceNewLines(rawContent)) // Should be the rare path.
: new LazySubstring(slice.Text, slice.Start, rawContent.Length);
// Remove one space from front and back if the string is not all spaces
if (rawContent.Length > 2 &&
rawContent[0] is ' ' or '\n' &&
rawContent[rawContent.Length - 1] is ' ' or '\n' &&
rawContent.ContainsAnyExcept(' ', '\r', '\n'))
{
ReadOnlySpan<char> contentSpan = builder.AsSpan();
var content = containsNewLine
? new LazySubstring(contentSpan.ToString())
: new LazySubstring(slice.Text, contentStart, contentSpan.Length);
Debug.Assert(contentSpan.SequenceEqual(content.AsSpan()));
// Remove one space from front and back if the string is not all spaces
if (!allSpace && contentSpan.Length > 2 && contentSpan[0] == ' ' && contentSpan[contentSpan.Length - 1] == ' ')
{
content.Offset++;
content.Length -= 2;
}
int delimiterCount = Math.Min(openSticks, closeSticks);
var spanStart = processor.GetSourcePosition(startPosition, out int line, out int column);
var spanEnd = processor.GetSourcePosition(slice.Start - 1);
var codeInline = new CodeInline(content)
{
Delimiter = match,
Span = new SourceSpan(spanStart, spanEnd),
Line = line,
Column = column,
DelimiterCount = delimiterCount,
};
if (processor.TrackTrivia)
{
codeInline.ContentWithTrivia = new StringSlice(slice.Text, contentStart, contentEnd - 1);
}
processor.Inline = codeInline;
isMatching = true;
content.Offset++;
content.Length -= 2;
}
builder.Dispose();
return isMatching;
int startPosition = slice.Start;
slice.Start = startPosition + rawContent.Length + openSticks;
// We've already skipped the opening sticks. Account for that here.
startPosition -= openSticks;
var codeInline = new CodeInline(content)
{
Delimiter = slice.Text[startPosition],
Span = new SourceSpan(processor.GetSourcePosition(startPosition, out int line, out int column), processor.GetSourcePosition(slice.Start - 1)),
Line = line,
Column = column,
DelimiterCount = openSticks,
};
if (processor.TrackTrivia)
{
// startPosition and slice.Start include the opening/closing sticks.
codeInline.ContentWithTrivia = new StringSlice(slice.Text, startPosition + openSticks, slice.Start - openSticks - 1);
}
processor.Inline = codeInline;
return true;
}
private static string ReplaceNewLines(ReadOnlySpan<char> content)
{
var builder = new ValueStringBuilder(stackalloc char[ValueStringBuilder.StackallocThreshold]);
while (true)
{
int i = content.IndexOfAny('\r', '\n');
if ((uint)i >= (uint)content.Length)
{
builder.Append(content);
break;
}
builder.Append(content.Slice(0, i));
if (content[i] == '\n')
{
// Transform '\n' into a single space
builder.Append(' ');
}
content = content.Slice(i + 1);
}
return builder.ToString();
}
}

View File

@@ -1,5 +1,5 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Diagnostics;
@@ -109,7 +109,7 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
var child = container.FirstChild;
while (child != null)
{
// Stop the search on the delimitation child
// Stop the search on the delimitation child
if (child == lastChild)
{
break;
@@ -197,7 +197,7 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
if (canOpen) delimiterType |= DelimiterType.Open;
if (canClose) delimiterType |= DelimiterType.Close;
var delimiter = new EmphasisDelimiterInline(this, emphasisDesc)
var delimiter = new EmphasisDelimiterInline(this, emphasisDesc, new StringSlice(slice.Text, startPosition, slice.Start - 1))
{
DelimiterCount = delimiterCount,
Type = delimiterType,
@@ -221,7 +221,7 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
// TODO: Benchmark difference between using List and LinkedList here since there could be a few Remove calls
// Move current_position forward in the delimiter stack (if needed) until
// Move current_position forward in the delimiter stack (if needed) until
// we find the first potential closer with delimiter * or _. (This will be the potential closer closest to the beginning of the input the first one in parse order.)
for (int i = 0; i < delimiters.Count; i++)
{
@@ -237,7 +237,7 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
{
while (true)
{
// Now, look back in the stack (staying above stack_bottom and the openers_bottom for this delimiter type)
// Now, look back in the stack (staying above stack_bottom and the openers_bottom for this delimiter type)
// for the first matching potential opener (“matching” means same delimiter).
EmphasisDelimiterInline? openDelimiter = null;
int openDelimiterIndex = -1;
@@ -307,8 +307,10 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
emphasis.Column = openDelimiter.Column;
emphasis.Span.End = closeDelimiter.Span.End - closeDelimitercount + delimiterDelta;
openDelimiter.Content.Start += delimiterDelta;
openDelimiter.Span.Start += delimiterDelta;
openDelimiter.Column += delimiterDelta;
closeDelimiter.Content.Start += delimiterDelta;
closeDelimiter.Span.Start += delimiterDelta;
closeDelimiter.Column += delimiterDelta;
@@ -331,7 +333,7 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
for (int k = i - 1; k >= openDelimiterIndex + 1; k--)
{
var literalDelimiter = delimiters[k];
literalDelimiter.ReplaceBy(literalDelimiter.AsLiteralInline());
literalDelimiter.ReplaceBy(literalDelimiter.AsLiteralInline());
delimiters.RemoveAt(k);
i--;
}

View File

@@ -32,16 +32,12 @@ public sealed class LiteralInlineParser : InlineParser
public override bool Match(InlineProcessor processor, ref StringSlice slice)
{
var text = slice.Text;
string text = slice.Text;
var startPosition = processor.GetSourcePosition(slice.Start, out int line, out int column);
// Slightly faster to perform our own search for opening characters
var nextStart = processor.Parsers.IndexOfOpeningCharacter(text, slice.Start + 1, slice.End);
//var nextStart = str.IndexOfAny(processor.SpecialCharacters, slice.Start + 1, slice.Length - 1);
int nextStart = processor.Parsers.IndexOfOpeningCharacter(text, slice.Start + 1, slice.End);
int length;
if (nextStart < 0)
if ((uint)nextStart >= (uint)text.Length)
{
nextStart = slice.End + 1;
length = nextStart - slice.Start;
@@ -50,10 +46,10 @@ public sealed class LiteralInlineParser : InlineParser
{
// Remove line endings if the next char is a new line
length = nextStart - slice.Start;
if (!processor.TrackTrivia)
{
var nextText = text[nextStart];
if (nextText == '\n' || nextText == '\r')
if (text[nextStart] is '\n' or '\r')
{
int end = nextStart - 1;
while (length > 0 && text[end].IsSpace())
@@ -86,7 +82,7 @@ public sealed class LiteralInlineParser : InlineParser
processor.Inline = new LiteralInline
{
Content = length > 0 ? newSlice : StringSlice.Empty,
Span = new SourceSpan(startPosition, processor.GetSourcePosition(endPosition)),
Span = new SourceSpan(processor.GetSourcePosition(slice.Start, out int line, out int column), processor.GetSourcePosition(endPosition)),
Line = line,
Column = column,
};

View File

@@ -53,34 +53,11 @@ public static class MarkdownParser
{
blockProcessor.Open(document);
ProcessBlocks(blockProcessor, new LineReader(text));
ProcessBlocks(blockProcessor, text);
if (pipeline.TrackTrivia)
{
Block? lastBlock = blockProcessor.LastBlock;
if (lastBlock is null && document.Count == 0)
{
// this means we have unassigned characters
var noBlocksFoundBlock = new EmptyBlock(null);
List<StringSlice> linesBefore = blockProcessor.UseLinesBefore();
noBlocksFoundBlock.LinesAfter = new List<StringSlice>();
if (linesBefore != null)
{
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
}
document.Add(noBlocksFoundBlock);
}
else if (lastBlock != null && blockProcessor.LinesBefore != null)
{
// this means we're out of lines, but still have unassigned empty lines.
// thus, we'll assign the empty unsassigned lines to the last block
// of the document.
var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock);
rootMostContainerBlock.LinesAfter ??= new List<StringSlice>();
var linesBefore = blockProcessor.UseLinesBefore();
rootMostContainerBlock.LinesAfter.AddRange(linesBefore);
}
ProcessBlocksTrivia(blockProcessor, document);
}
// At this point the LineIndex is the same as the number of lines in the document
@@ -117,12 +94,15 @@ public static class MarkdownParser
return text.Replace('\0', CharHelper.ReplacementChar);
}
private static void ProcessBlocks(BlockProcessor blockProcessor, LineReader lineReader)
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ProcessBlocks(BlockProcessor blockProcessor, string text)
{
var lineReader = new LineReader(text);
while (true)
{
// Get the precise position of the begining of the line
var lineText = lineReader.ReadLine();
// Get the precise position of the beginning of the line
StringSlice lineText = lineReader.ReadLine();
// If this is the end of file and the last line is empty
if (lineText.Text is null)
@@ -132,9 +112,39 @@ public static class MarkdownParser
blockProcessor.ProcessLine(lineText);
}
blockProcessor.CloseAll(true);
}
private static void ProcessBlocksTrivia(BlockProcessor blockProcessor, MarkdownDocument document)
{
Block? lastBlock = blockProcessor.LastBlock;
if (lastBlock is null && document.Count == 0)
{
// this means we have unassigned characters
var noBlocksFoundBlock = new EmptyBlock(null);
List<StringSlice> linesBefore = blockProcessor.UseLinesBefore();
noBlocksFoundBlock.LinesAfter = [];
if (linesBefore != null)
{
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
}
document.Add(noBlocksFoundBlock);
}
else if (lastBlock != null && blockProcessor.LinesBefore != null)
{
// this means we're out of lines, but still have unassigned empty lines.
// thus, we'll assign the empty unsassigned lines to the last block
// of the document.
var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock);
rootMostContainerBlock.LinesAfter ??= [];
var linesBefore = blockProcessor.UseLinesBefore();
rootMostContainerBlock.LinesAfter.AddRange(linesBefore);
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ProcessInlines(InlineProcessor inlineProcessor, MarkdownDocument document)
{
// "stackless" processor

View File

@@ -0,0 +1,30 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
#if !NET8_0_OR_GREATER
namespace System.Text;
internal static class Ascii
{
public static bool IsValid(this string value)
{
return IsValid(value.AsSpan());
}
public static bool IsValid(this ReadOnlySpan<char> value)
{
for (int i = 0; i < value.Length; i++)
{
if (value[i] > 127)
{
return false;
}
}
return true;
}
}
#endif

View File

@@ -0,0 +1,24 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
#if !NETSTANDARD2_1_OR_GREATER
using System.Runtime.InteropServices;
namespace System.Text;
internal static class EncodingExtensions
{
public static unsafe int GetBytes(this Encoding encoding, ReadOnlySpan<char> chars, Span<byte> bytes)
{
fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
{
fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
{
return encoding.GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
}
}
}
}
#endif

View File

@@ -0,0 +1,53 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
#if !NET8_0_OR_GREATER
namespace System;
internal static class IndexOfHelpers
{
public static bool ContainsAnyExcept(this ReadOnlySpan<char> span, char value0, char value1, char value2)
{
for (int i = 0; i < span.Length; i++)
{
char c = span[i];
if (c != value0 && c != value1 && c != value2)
{
return true;
}
}
return false;
}
#if !NETSTANDARD2_1_OR_GREATER
public static int IndexOfAny(this ReadOnlySpan<char> span, string values)
{
for (int i = 0; i < span.Length; i++)
{
char c = span[i];
foreach (char v in values)
{
if (c == v)
{
return i;
}
}
}
return -1;
}
#endif
#if !NET6_0_OR_GREATER
public static bool Contains<T>(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>
{
return span.IndexOf(value) >= 0;
}
#endif
}
#endif

View File

@@ -0,0 +1,137 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
#if !NET8_0_OR_GREATER
using System.Diagnostics;
using System.Runtime.CompilerServices;
namespace System.Buffers;
internal static class SearchValues
{
public static SearchValues<char> Create(string values) =>
Create(values.AsSpan());
public static SearchValues<char> Create(ReadOnlySpan<char> values) =>
new PreNet8CompatSearchValues(values);
public static int IndexOfAny(this ReadOnlySpan<char> span, SearchValues<char> values) =>
values.IndexOfAny(span);
public static int IndexOfAnyExcept(this ReadOnlySpan<char> span, SearchValues<char> values) =>
values.IndexOfAnyExcept(span);
}
internal abstract class SearchValues<T>
{
public abstract int IndexOfAny(ReadOnlySpan<char> span);
public abstract int IndexOfAnyExcept(ReadOnlySpan<char> span);
}
internal sealed class PreNet8CompatSearchValues : SearchValues<char>
{
private readonly BoolVector128 _ascii;
private readonly HashSet<char>? _nonAscii;
public PreNet8CompatSearchValues(ReadOnlySpan<char> values)
{
foreach (char c in values)
{
if (c < 128)
{
_ascii.Set(c);
}
else
{
_nonAscii ??= new HashSet<char>();
_nonAscii.Add(c);
}
}
}
public override int IndexOfAny(ReadOnlySpan<char> span)
{
if (_nonAscii is null)
{
for (int i = 0; i < span.Length; i++)
{
char c = span[i];
if (c < 128 && _ascii[c])
{
return i;
}
}
}
else
{
for (int i = 0; i < span.Length; i++)
{
char c = span[i];
if (c < 128 ? _ascii[c] : _nonAscii.Contains(c))
{
return i;
}
}
}
return -1;
}
public override int IndexOfAnyExcept(ReadOnlySpan<char> span)
{
if (_nonAscii is null)
{
for (int i = 0; i < span.Length; i++)
{
char c = span[i];
if (c >= 128 || !_ascii[c])
{
return i;
}
}
}
else
{
for (int i = 0; i < span.Length; i++)
{
char c = span[i];
if (c < 128 ? !_ascii[c] : !_nonAscii.Contains(c))
{
return i;
}
}
}
return -1;
}
private unsafe struct BoolVector128
{
private fixed bool _values[128];
public void Set(char c)
{
Debug.Assert(c < 128);
_values[c] = true;
}
public readonly bool this[uint c]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
Debug.Assert(c < 128);
return _values[c];
}
}
}
}
#endif

View File

@@ -2,6 +2,7 @@
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Buffers;
using System.Globalization;
using System.IO;
using System.Runtime.CompilerServices;
@@ -20,7 +21,10 @@ namespace Markdig.Renderers;
/// <seealso cref="TextRendererBase{HtmlRenderer}" />
public class HtmlRenderer : TextRendererBase<HtmlRenderer>
{
private static readonly char[] s_writeEscapeIndexOfAnyChars = new[] { '<', '>', '&', '"' };
private static readonly IdnMapping s_idnMapping = new();
private static readonly SearchValues<char> s_asciiNonEscapeChars =
SearchValues.Create("!#$%()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz");
/// <summary>
/// Initializes a new instance of the <see cref="HtmlRenderer"/> class.
@@ -149,73 +153,38 @@ public class HtmlRenderer : TextRendererBase<HtmlRenderer>
{
if (!content.IsEmpty)
{
int nextIndex = content.IndexOfAny(s_writeEscapeIndexOfAnyChars);
if (nextIndex == -1)
WriteIndent();
while (true)
{
Write(content);
}
else
{
WriteEscapeSlow(content, softEscape);
int indexOfCharToEscape = softEscape
? content.IndexOfAny('<', '&')
: content.IndexOfAny("<>&\"");
if ((uint)indexOfCharToEscape >= (uint)content.Length)
{
WriteRaw(content);
return;
}
WriteRaw(content.Slice(0, indexOfCharToEscape));
if (EnableHtmlEscape)
{
WriteRaw(content[indexOfCharToEscape] switch
{
'<' => "&lt;",
'>' => "&gt;",
'&' => "&amp;",
_ => "&quot;",
});
}
content = content.Slice(indexOfCharToEscape + 1);
}
}
}
private void WriteEscapeSlow(ReadOnlySpan<char> content, bool softEscape = false)
{
WriteIndent();
int previousOffset = 0;
for (int i = 0; i < content.Length; i++)
{
switch (content[i])
{
case '<':
WriteRaw(content.Slice(previousOffset, i - previousOffset));
if (EnableHtmlEscape)
{
WriteRaw("&lt;");
}
previousOffset = i + 1;
break;
case '>':
if (!softEscape)
{
WriteRaw(content.Slice(previousOffset, i - previousOffset));
if (EnableHtmlEscape)
{
WriteRaw("&gt;");
}
previousOffset = i + 1;
}
break;
case '&':
WriteRaw(content.Slice(previousOffset, i - previousOffset));
if (EnableHtmlEscape)
{
WriteRaw("&amp;");
}
previousOffset = i + 1;
break;
case '"':
if (!softEscape)
{
WriteRaw(content.Slice(previousOffset, i - previousOffset));
if (EnableHtmlEscape)
{
WriteRaw("&quot;");
}
previousOffset = i + 1;
}
break;
}
}
WriteRaw(content.Slice(previousOffset));
}
private static readonly IdnMapping IdnMapping = new IdnMapping();
/// <summary>
/// Writes the URL escaped for HTML.
/// </summary>
@@ -239,120 +208,107 @@ public class HtmlRenderer : TextRendererBase<HtmlRenderer>
content = LinkRewriter(content);
}
// a://c.d = 7 chars
int schemeOffset = content.Length < 7 ? -1 : content.IndexOf("://", StringComparison.Ordinal);
if (schemeOffset != -1) // This is an absolute URL
if (!Ascii.IsValid(content))
{
schemeOffset += 3; // skip ://
WriteEscapeUrl(content, 0, schemeOffset);
bool idnaEncodeDomain = false;
int endOfDomain = schemeOffset;
for (; endOfDomain < content.Length; endOfDomain++)
int schemeOffset = content.IndexOf("://", StringComparison.Ordinal);
if (schemeOffset > 0) // This is an absolute URL
{
char c = content[endOfDomain];
if (c == '/' || c == '?' || c == '#' || c == ':') // End of domain part
{
break;
}
if (c > 127)
{
idnaEncodeDomain = true;
}
}
schemeOffset += 3; // skip ://
if (idnaEncodeDomain)
{
string domainName;
int domainLength = content.AsSpan(schemeOffset).IndexOfAny("/?#:");
if (domainLength < 0)
{
domainLength = content.Length - schemeOffset;
}
string? domainName = null;
try
{
domainName = IdnMapping.GetAscii(content, schemeOffset, endOfDomain - schemeOffset);
domainName = s_idnMapping.GetAscii(content, schemeOffset, domainLength);
}
catch
catch { }
if (domainName is not null)
{
// Not a valid IDN, fallback to non-punycode encoding
WriteEscapeUrl(content, schemeOffset, content.Length);
WriteEscapeUrlCore(content.AsSpan(0, schemeOffset));
WriteEscapeUrlCore(domainName.AsSpan());
WriteEscapeUrlCore(content.AsSpan(schemeOffset + domainLength));
return this;
}
// Escape the characters (see Commonmark example 327 and think of it with a non-ascii symbol)
int previousPosition = 0;
for (int i = 0; i < domainName.Length; i++)
{
var escape = HtmlHelper.EscapeUrlCharacter(domainName[i]);
if (escape != null)
{
Write(domainName, previousPosition, i - previousPosition);
previousPosition = i + 1;
Write(escape);
}
}
Write(domainName, previousPosition, domainName.Length - previousPosition);
WriteEscapeUrl(content, endOfDomain, content.Length);
// Not a valid IDN, fallback to non-punycode encoding
}
else
{
WriteEscapeUrl(content, schemeOffset, content.Length);
}
}
else // This is a relative URL
{
WriteEscapeUrl(content, 0, content.Length);
}
WriteEscapeUrlCore(content.AsSpan());
return this;
}
private void WriteEscapeUrl(string content, int start, int length)
private void WriteEscapeUrlCore(ReadOnlySpan<char> content)
{
int previousPosition = start;
for (var i = previousPosition; i < length; i++)
WriteIndent();
while (true)
{
var c = content[i];
int i = content.IndexOfAnyExcept(s_asciiNonEscapeChars);
if ((uint)i >= (uint)content.Length)
{
WriteRaw(content);
break;
}
WriteRaw(content.Slice(0, i));
char c = content[i];
if (c < 128)
{
var escape = HtmlHelper.EscapeUrlCharacter(c);
if (escape != null)
{
Write(content, previousPosition, i - previousPosition);
previousPosition = i + 1;
Write(escape);
}
WriteRaw(HtmlHelper.EscapeUrlCharacter(c));
}
else if (UseNonAsciiNoEscape)
{
// Special case for Edge/IE workaround for MarkdownEditor, don't escape non-ASCII chars to make image links working
WriteRaw(c);
}
else
{
Write(content, previousPosition, i - previousPosition);
previousPosition = i + 1;
// Special case for Edge/IE workaround for MarkdownEditor, don't escape non-ASCII chars to make image links working
if (UseNonAsciiNoEscape)
{
Write(c);
}
else
{
byte[] bytes;
if (c >= '\ud800' && c <= '\udfff' && previousPosition < length)
{
bytes = Encoding.UTF8.GetBytes(new[] { c, content[previousPosition] });
// Skip next char as it is decoded above
i++;
previousPosition = i + 1;
}
else
{
bytes = Encoding.UTF8.GetBytes(new[] { c });
}
for (var j = 0; j < bytes.Length; j++)
{
Write($"%{bytes[j]:X2}");
}
}
i = WriteEscapedUtf8Bytes(this, content, c, i);
}
content = content.Slice(i + 1);
}
static int WriteEscapedUtf8Bytes(HtmlRenderer renderer, ReadOnlySpan<char> content, char c, int i)
{
scoped ReadOnlySpan<char> chars;
if (CharHelper.IsHighSurrogate(c) && (uint)(i + 1) < (uint)content.Length)
{
chars = stackalloc char[] { c, content[i + 1] };
i++;
}
else
{
chars = stackalloc char[] { c };
}
Span<byte> utf8Buffer = stackalloc byte[4];
int utf8Length = Encoding.UTF8.GetBytes(chars, utf8Buffer);
utf8Buffer = utf8Buffer.Slice(0, utf8Length);
Span<char> escapedBuffer = stackalloc char[3];
escapedBuffer[0] = '%';
foreach (byte b in utf8Buffer)
{
HexConverter.ToCharsBuffer(b, escapedBuffer, startingIndex: 1);
renderer.WriteRaw(escapedBuffer);
}
return i;
}
Write(content, previousPosition, length - previousPosition);
}
/// <summary>

View File

@@ -3,7 +3,7 @@
// See the license.txt file in the project root for more information.
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Markdig.Helpers;
using Markdig.Syntax;
using Markdig.Syntax.Inlines;
@@ -16,31 +16,64 @@ namespace Markdig.Renderers;
/// <seealso cref="IMarkdownRenderer" />
public abstract class RendererBase : IMarkdownRenderer
{
private readonly Dictionary<KeyWrapper, IMarkdownObjectRenderer?> _renderersPerType = new();
private const int SubTableCount = 32;
private readonly struct RendererEntry
{
public readonly IntPtr Key;
public readonly IMarkdownObjectRenderer? Renderer;
public RendererEntry(IntPtr key, IMarkdownObjectRenderer? renderer)
{
Key = key;
Renderer = renderer;
}
}
private readonly RendererEntry[][] _renderersPerType;
internal int _childrenDepth = 0;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static IntPtr GetKeyForType(MarkdownObject obj) => Type.GetTypeHandle(obj).Value;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int SubTableIndex(IntPtr key) => (int)((((ulong)key) / 64) & (SubTableCount - 1));
/// <summary>
/// Initializes a new instance of the <see cref="RendererBase"/> class.
/// </summary>
protected RendererBase() { }
protected RendererBase()
{
var entries = _renderersPerType = new RendererEntry[SubTableCount][];
for (int i = 0; i < entries.Length; i++)
{
entries[i] ??= [];
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
private IMarkdownObjectRenderer? GetRendererInstance(MarkdownObject obj)
{
KeyWrapper key = GetKeyForType(obj);
Type objectType = obj.GetType();
IMarkdownObjectRenderer? renderer = null;
for (int i = 0; i < ObjectRenderers.Count; i++)
foreach (var potentialRenderer in ObjectRenderers)
{
var renderer = ObjectRenderers[i];
if (renderer.Accept(this, objectType))
if (potentialRenderer.Accept(this, objectType))
{
_renderersPerType[key] = renderer;
return renderer;
renderer = potentialRenderer;
break;
}
}
_renderersPerType[key] = null;
return null;
IntPtr key = GetKeyForType(obj);
ref RendererEntry[] entries = ref _renderersPerType[SubTableIndex(key)];
Array.Resize(ref entries, entries.Length + 1);
entries[entries.Length - 1] = new RendererEntry(key, renderer);
return renderer;
}
public ObjectRendererCollection ObjectRenderers { get; } = new();
@@ -77,12 +110,11 @@ public abstract class RendererBase : IMarkdownRenderer
bool saveIsFirstInContainer = IsFirstInContainer;
bool saveIsLastInContainer = IsLastInContainer;
var children = containerBlock;
for (int i = 0; i < children.Count; i++)
for (int i = 0; i < containerBlock.Count; i++)
{
IsFirstInContainer = i == 0;
IsLastInContainer = i + 1 == children.Count;
Write(children[i]);
IsLastInContainer = i + 1 == containerBlock.Count;
Write(containerBlock[i]);
}
IsFirstInContainer = saveIsFirstInContainer;
@@ -140,11 +172,27 @@ public abstract class RendererBase : IMarkdownRenderer
// Calls before writing an object
ObjectWriteBefore?.Invoke(this, obj);
if (!_renderersPerType.TryGetValue(GetKeyForType(obj), out IMarkdownObjectRenderer? renderer))
IMarkdownObjectRenderer? renderer = null;
IntPtr key = GetKeyForType(obj);
#if NETFRAMEWORK || NETSTANDARD
RendererEntry[] renderers = _renderersPerType[SubTableIndex(key)];
#else
RendererEntry[] renderers = Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_renderersPerType), SubTableIndex(key));
#endif
foreach (RendererEntry entry in renderers)
{
renderer = GetRendererInstance(obj);
if (key == entry.Key)
{
renderer = entry.Renderer;
goto Render;
}
}
renderer = GetRendererInstance(obj);
Render:
if (renderer is not null)
{
renderer.Write(this, obj);
@@ -161,24 +209,4 @@ public abstract class RendererBase : IMarkdownRenderer
// Calls after writing an object
ObjectWriteAfter?.Invoke(this, obj);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static KeyWrapper GetKeyForType(MarkdownObject obj)
{
IntPtr typeHandle = Type.GetTypeHandle(obj).Value;
return new KeyWrapper(typeHandle);
}
private readonly struct KeyWrapper : IEquatable<KeyWrapper>
{
public readonly IntPtr Key;
public KeyWrapper(IntPtr key) => Key = key;
public bool Equals(KeyWrapper other) => Key == other.Key;
public override int GetHashCode() => Key.GetHashCode();
public override bool Equals(object? obj) => throw new NotImplementedException();
}
}

View File

@@ -16,7 +16,7 @@ namespace Markdig.Syntax.Inlines;
/// <seealso cref="Inline" />
public class ContainerInline : Inline, IEnumerable<Inline>
{
public ContainerInline()
public ContainerInline() : base(dummySkipTypeKind: true)
{
SetTypeKind(isInline: true, isContainer: true);
}

View File

@@ -1,5 +1,5 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using Markdig.Helpers;
@@ -27,6 +27,24 @@ public class EmphasisDelimiterInline : DelimiterInline
Descriptor = descriptor;
DelimiterChar = descriptor.Character;
Content = new StringSlice(ToLiteral());
}
/// <summary>
/// Initializes a new instance of the <see cref="EmphasisDelimiterInline" /> class.
/// </summary>
/// <param name="parser">The parser.</param>
/// <param name="descriptor">The descriptor.</param>
/// <param name="content">The content.</param>
/// <exception cref="ArgumentNullException"></exception>
internal EmphasisDelimiterInline(InlineParser parser, EmphasisDescriptor descriptor, StringSlice content) : base(parser)
{
if (descriptor is null)
ThrowHelper.ArgumentNullException(nameof(descriptor));
Descriptor = descriptor;
DelimiterChar = descriptor.Character;
Content = content;
}
/// <summary>
@@ -44,16 +62,35 @@ public class EmphasisDelimiterInline : DelimiterInline
/// </summary>
public int DelimiterCount { get; set; }
/// <summary>
/// The content as a <see cref="StringSlice"/>.
/// </summary>
public StringSlice Content;
public override string ToLiteral()
{
return DelimiterCount > 0 ? new string(DelimiterChar, DelimiterCount) : string.Empty;
if (DelimiterCount == 1)
{
return DelimiterChar switch
{
'*' => "*",
'_' => "_",
'~' => "~",
'^' => "^",
'+' => "+",
'=' => "=",
_ => DelimiterChar.ToString()
};
}
return new string(DelimiterChar, DelimiterCount);
}
public LiteralInline AsLiteralInline()
{
return new LiteralInline()
{
Content = new StringSlice(ToLiteral()),
Content = Content,
IsClosed = true,
Span = Span,
Line = Line,

View File

@@ -20,6 +20,8 @@ public abstract class Inline : MarkdownObject, IInline
SetTypeKind(isInline: true, isContainer: false);
}
private protected Inline(bool dummySkipTypeKind) { }
/// <summary>
/// Gets the parent container of this inline.
/// </summary>

View File

@@ -2,6 +2,7 @@
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Diagnostics;
using System.Runtime.CompilerServices;
using Markdig.Helpers;
@@ -36,7 +37,8 @@ public abstract class MarkdownObject : IMarkdownObject
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private protected void SetTypeKind(bool isInline, bool isContainer)
{
_lineBits |= (isInline ? IsInlineMask : 0) | (isContainer ? IsContainerMask : 0);
Debug.Assert(_lineBits == 0);
_lineBits = (isInline ? IsInlineMask : 0) | (isContainer ? IsContainerMask : 0);
}
private protected bool IsClosedInternal

View File

@@ -1,6 +1,6 @@
{
"sdk": {
"version": "6.0.100",
"version": "8.0.100",
"rollForward": "latestMajor",
"allowPrerelease": false
}

View File

@@ -3,6 +3,7 @@
This file is licensed under the BSD-Clause 2 license. &#xD;
See the license.txt file in the project root for more information.</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/PredefinedNamingRules/=PrivateInstanceFields/@EntryIndexedValue">&lt;Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /&gt;</s:String>
<s:Boolean x:Key="/Default/Environment/SettingsMigration/IsMigratorApplied/=JetBrains_002EReSharper_002EFeature_002EServices_002ECodeCleanup_002EFileHeader_002EFileHeaderSettingsMigrate/@EntryIndexedValue">True</s:Boolean>
<s:String x:Key="/Default/Environment/UnitTesting/NUnitProvider/SetCurrentDirectoryTo/@EntryValue">TestFolder</s:String>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Autolink/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Inlines/@EntryIndexedValue">True</s:Boolean>