mirror of
https://github.com/xoofx/markdig.git
synced 2026-02-04 05:44:50 +00:00
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d47fbc757f | ||
|
|
3602433b84 | ||
|
|
1bac4afc9b | ||
|
|
a89056d961 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -8,6 +8,8 @@
|
||||
*.sln.docstates
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
src/.idea
|
||||
BenchmarkDotNet.Artifacts
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
81
src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs
Normal file
81
src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs
Normal file
@@ -0,0 +1,81 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using BenchmarkDotNet.Diagnosers;
|
||||
using Markdig;
|
||||
|
||||
namespace Testamina.Markdig.Benchmarks.PipeTable;
|
||||
|
||||
/// <summary>
|
||||
/// Benchmark for pipe table parsing performance, especially for large tables.
|
||||
/// Tests the performance of PipeTableParser with varying table sizes.
|
||||
/// </summary>
|
||||
[MemoryDiagnoser]
|
||||
[GcServer(true)] // Use server GC to get more comprehensive GC stats
|
||||
public class PipeTableBenchmark
|
||||
{
|
||||
private string _100Rows = null!;
|
||||
private string _500Rows = null!;
|
||||
private string _1000Rows = null!;
|
||||
private string _1500Rows = null!;
|
||||
private string _5000Rows = null!;
|
||||
private string _10000Rows = null!;
|
||||
private MarkdownPipeline _pipeline = null!;
|
||||
|
||||
[GlobalSetup]
|
||||
public void Setup()
|
||||
{
|
||||
// Pipeline with pipe tables enabled (part of advanced extensions)
|
||||
_pipeline = new MarkdownPipelineBuilder()
|
||||
.UseAdvancedExtensions()
|
||||
.Build();
|
||||
|
||||
// Generate tables of various sizes
|
||||
// Note: Before optimization, 5000+ rows hit depth limit due to nested tree structure.
|
||||
// After optimization, these should work.
|
||||
_100Rows = PipeTableGenerator.Generate(rows: 100, columns: 5);
|
||||
_500Rows = PipeTableGenerator.Generate(rows: 500, columns: 5);
|
||||
_1000Rows = PipeTableGenerator.Generate(rows: 1000, columns: 5);
|
||||
_1500Rows = PipeTableGenerator.Generate(rows: 1500, columns: 5);
|
||||
_5000Rows = PipeTableGenerator.Generate(rows: 5000, columns: 5);
|
||||
_10000Rows = PipeTableGenerator.Generate(rows: 10000, columns: 5);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 100 rows x 5 cols")]
|
||||
public string Parse100Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_100Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 500 rows x 5 cols")]
|
||||
public string Parse500Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_500Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 1000 rows x 5 cols")]
|
||||
public string Parse1000Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_1000Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 1500 rows x 5 cols")]
|
||||
public string Parse1500Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_1500Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 5000 rows x 5 cols")]
|
||||
public string Parse5000Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_5000Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 10000 rows x 5 cols")]
|
||||
public string Parse10000Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_10000Rows, _pipeline);
|
||||
}
|
||||
}
|
||||
61
src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs
Normal file
61
src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using System.Text;
|
||||
|
||||
namespace Testamina.Markdig.Benchmarks.PipeTable;
|
||||
|
||||
/// <summary>
|
||||
/// Generates pipe table markdown content for benchmarking purposes.
|
||||
/// </summary>
|
||||
public static class PipeTableGenerator
|
||||
{
|
||||
private const int DefaultCellWidth = 10;
|
||||
|
||||
/// <summary>
|
||||
/// Generates a pipe table in markdown format.
|
||||
/// </summary>
|
||||
/// <param name="rows">Number of data rows (excluding header)</param>
|
||||
/// <param name="columns">Number of columns</param>
|
||||
/// <param name="cellWidth">Width of each cell content (default: 10)</param>
|
||||
/// <returns>Pipe table markdown string</returns>
|
||||
public static string Generate(int rows, int columns, int cellWidth = DefaultCellWidth)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Header row
|
||||
sb.Append('|');
|
||||
for (int col = 0; col < columns; col++)
|
||||
{
|
||||
sb.Append(' ');
|
||||
sb.Append($"Header {col + 1}".PadRight(cellWidth));
|
||||
sb.Append(" |");
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
// Separator row (with dashes)
|
||||
sb.Append('|');
|
||||
for (int col = 0; col < columns; col++)
|
||||
{
|
||||
sb.Append(new string('-', cellWidth + 2));
|
||||
sb.Append('|');
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
// Data rows
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
sb.Append('|');
|
||||
for (int col = 0; col < columns; col++)
|
||||
{
|
||||
sb.Append(' ');
|
||||
sb.Append($"R{row + 1}C{col + 1}".PadRight(cellWidth));
|
||||
sb.Append(" |");
|
||||
}
|
||||
sb.AppendLine();
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ using BenchmarkDotNet.Configs;
|
||||
using BenchmarkDotNet.Running;
|
||||
|
||||
using Markdig;
|
||||
using Testamina.Markdig.Benchmarks.PipeTable;
|
||||
|
||||
|
||||
namespace Testamina.Markdig.Benchmarks;
|
||||
@@ -68,7 +69,16 @@ public class Program
|
||||
//config.Add(gcDiagnoser);
|
||||
|
||||
//var config = DefaultConfig.Instance;
|
||||
BenchmarkRunner.Run<Program>(config);
|
||||
|
||||
// Run specific benchmarks based on command line arguments
|
||||
if (args.Length > 0 && args[0] == "--pipetable")
|
||||
{
|
||||
BenchmarkRunner.Run<PipeTableBenchmark>(config);
|
||||
}
|
||||
else
|
||||
{
|
||||
BenchmarkRunner.Run<Program>(config);
|
||||
}
|
||||
//BenchmarkRunner.Run<TestDictionary>(config);
|
||||
//BenchmarkRunner.Run<TestMatchPerf>();
|
||||
//BenchmarkRunner.Run<TestStringPerf>();
|
||||
|
||||
@@ -22,6 +22,18 @@ public partial class TestEmphasisPlus
|
||||
TestParser.TestSpec("normal ***Strong emphasis*** normal", "<p>normal <em><strong>Strong emphasis</strong></em> normal</p>", "");
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void SupplementaryPunctuation()
|
||||
{
|
||||
TestParser.TestSpec("a*a∇*a\n\na*∇a*a\n\na*a𝜵*a\n\na*𝜵a*a\n\na*𐬼a*a\n\na*a𐬼*a", "<p>a*a∇*a</p>\n<p>a*∇a*a</p>\n<p>a*a𝜵*a</p>\n<p>a*𝜵a*a</p>\n<p>a*𐬼a*a</p>\n<p>a*a𐬼*a</p>", "");
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RecognizeSupplementaryChars()
|
||||
{
|
||||
TestParser.TestSpec("🌶️**𰻞**🍜**𰻞**🌶️**麺**🍜", "<p>🌶️<strong>𰻞</strong>🍜<strong>𰻞</strong>🌶️<strong>麺</strong>🍜</p>", "");
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void OpenEmphasisHasConvenientContentStringSlice()
|
||||
{
|
||||
|
||||
@@ -31,4 +31,14 @@ public class TestSmartyPants
|
||||
|
||||
TestParser.TestSpec("<<test>>", "<p>«test»</p>", pipeline);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RecognizesSupplementaryCharacters()
|
||||
{
|
||||
var pipeline = new MarkdownPipelineBuilder()
|
||||
.UseSmartyPants()
|
||||
.Build();
|
||||
|
||||
TestParser.TestSpec("\"𝜵\"𠮷\"𝜵\"𩸽\"", "<p>"𝜵“𠮷”𝜵“𩸽”</p>", pipeline);
|
||||
}
|
||||
}
|
||||
|
||||
165
src/Markdig.Tests/TestStringSlice.cs
Normal file
165
src/Markdig.Tests/TestStringSlice.cs
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using Markdig.Helpers;
|
||||
|
||||
namespace Markdig.Tests;
|
||||
|
||||
[TestFixture]
|
||||
public class TestStringSlice
|
||||
{
|
||||
#if NET
|
||||
[Test]
|
||||
public void TestRuneBmp()
|
||||
{
|
||||
var slice = new StringSlice("01234");
|
||||
|
||||
Assert.AreEqual('0', slice.CurrentRune.Value);
|
||||
Assert.AreEqual(0, slice.Start);
|
||||
Assert.AreEqual('1', slice.NextRune().Value);
|
||||
Assert.AreEqual(1, slice.Start);
|
||||
Assert.AreEqual('2', slice.NextRune().Value);
|
||||
Assert.AreEqual(2, slice.Start);
|
||||
Assert.AreEqual('2', slice.CurrentRune.Value);
|
||||
Assert.AreEqual("234", slice.ToString());
|
||||
Assert.AreEqual('3', slice.PeekRuneExtra(1).Value);
|
||||
Assert.AreEqual('4', slice.PeekRuneExtra(2).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(3).Value);
|
||||
Assert.AreEqual('1', slice.PeekRuneExtra(-1).Value);
|
||||
Assert.AreEqual('0', slice.PeekRuneExtra(-2).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-3).Value);
|
||||
Assert.AreEqual('0', slice.RuneAt(0).Value);
|
||||
Assert.AreEqual('1', slice.RuneAt(1).Value);
|
||||
Assert.AreEqual('2', slice.RuneAt(2).Value);
|
||||
Assert.AreEqual('3', slice.RuneAt(3).Value);
|
||||
Assert.AreEqual('4', slice.RuneAt(4).Value);
|
||||
Assert.AreEqual(2, slice.Start);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void TestRuneSupplementaryOnly()
|
||||
{
|
||||
var slice = new StringSlice("𝟎𝟏𝟐𝟑𝟒");
|
||||
Assert.AreEqual(10, slice.Length);
|
||||
|
||||
// 𝟎 = U+1D7CE, 𝟐 = U+1D7D0
|
||||
Assert.AreEqual(0x1D7CE, slice.CurrentRune.Value); // 𝟎
|
||||
Assert.AreEqual(0, slice.Start);
|
||||
Assert.AreEqual(0x1D7CF, slice.NextRune().Value); // 𝟏
|
||||
Assert.AreEqual(2, slice.Start);
|
||||
Assert.AreEqual(0x1D7D0, slice.NextRune().Value); // 𝟐
|
||||
Assert.AreEqual(4, slice.Start);
|
||||
Assert.AreEqual(0x1D7D0, slice.CurrentRune.Value); // 𝟐
|
||||
Assert.AreEqual("𝟐𝟑𝟒", slice.ToString());
|
||||
// CurrentRune occupies 2 `char`s, so next Rune starts at index 2
|
||||
Assert.AreEqual(0x1D7D1, slice.PeekRuneExtra(2).Value); // 𝟑
|
||||
Assert.AreEqual(0x1D7D2, slice.PeekRuneExtra(4).Value); // 𝟒
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(6).Value);
|
||||
Assert.AreEqual(0x1D7CF, slice.PeekRuneExtra(-1).Value); // 𝟏
|
||||
Assert.AreEqual(0x1D7CE, slice.PeekRuneExtra(-3).Value); // 𝟎
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-5).Value);
|
||||
Assert.AreEqual(0x1D7CE, slice.RuneAt(0).Value); // 𝟎
|
||||
Assert.AreEqual(0x1D7CF, slice.RuneAt(2).Value); // 𝟏
|
||||
Assert.AreEqual(0x1D7D0, slice.RuneAt(4).Value); // 𝟐
|
||||
Assert.AreEqual(0x1D7D1, slice.RuneAt(6).Value); // 𝟑
|
||||
Assert.AreEqual(0x1D7D2, slice.RuneAt(8).Value); // 𝟒
|
||||
// The following usages are not expected. You should take into consideration the `char`s that the Rune you just acquired occupies.
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-4).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-2).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(1).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(3).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(5).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(1).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(3).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(5).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(7).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(9).Value);
|
||||
Assert.AreEqual(4, slice.Start);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void TestRuneIsolatedHighSurrogate()
|
||||
{
|
||||
var slice = new StringSlice("\ud800\ud801\ud802\ud803\ud804");
|
||||
Assert.AreEqual(0, slice.CurrentRune.Value);
|
||||
Assert.AreEqual(0, slice.Start);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual(0, slice.CurrentRune.Value);
|
||||
Assert.AreEqual('\ud801', slice.CurrentChar);
|
||||
Assert.AreEqual(1, slice.Start);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual(2, slice.Start);
|
||||
Assert.AreEqual('\ud802', slice.CurrentChar);
|
||||
Assert.AreEqual(0, slice.CurrentRune.Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-3).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-2).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-1).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(1).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(2).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(3).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(0).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(1).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(2).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(3).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(4).Value);
|
||||
Assert.AreEqual(2, slice.Start);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void TestRuneIsolatedLowSurrogate()
|
||||
{
|
||||
var slice = new StringSlice("\udc00\udc01\udc02\udc03\udc04");
|
||||
Assert.AreEqual(0, slice.CurrentRune.Value);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual('\udc01', slice.CurrentChar);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual('\udc02', slice.CurrentChar);
|
||||
Assert.AreEqual(0, slice.CurrentRune.Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-3).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-2).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(-1).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(1).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(2).Value);
|
||||
Assert.AreEqual(0, slice.PeekRuneExtra(3).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(0).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(1).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(2).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(3).Value);
|
||||
Assert.AreEqual(0, slice.RuneAt(4).Value);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void TestMixedInput()
|
||||
{
|
||||
var slice = new StringSlice("a\udc00bc𝟑d𝟒\udc00");
|
||||
Assert.AreEqual(10, slice.Length);
|
||||
Assert.AreEqual('a', slice.CurrentRune.Value);
|
||||
Assert.AreEqual(0, slice.Start);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual(1, slice.Start);
|
||||
Assert.AreEqual('b', slice.NextRune().Value);
|
||||
Assert.AreEqual(2, slice.Start);
|
||||
Assert.AreEqual('c', slice.NextRune().Value);
|
||||
Assert.AreEqual(3, slice.Start);
|
||||
Assert.AreEqual(0x1D7D1, slice.NextRune().Value);
|
||||
Assert.AreEqual(4, slice.Start);
|
||||
Assert.AreEqual('d', slice.NextRune().Value);
|
||||
Assert.AreEqual(6, slice.Start);
|
||||
Assert.AreEqual(0x1D7D2, slice.NextRune().Value);
|
||||
Assert.AreEqual(7, slice.Start);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual(9, slice.Start);
|
||||
Assert.False(slice.IsEmpty);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual(10, slice.Start);
|
||||
Assert.True(slice.IsEmpty);
|
||||
|
||||
slice = new StringSlice(slice.Text + 'a', 7, 10);
|
||||
Assert.AreEqual(0x1D7D2, slice.CurrentRune.Value);
|
||||
Assert.AreEqual(0, slice.NextRune().Value);
|
||||
Assert.AreEqual(9, slice.Start);
|
||||
Assert.AreEqual('a', slice.NextRune().Value);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -8,7 +8,6 @@ namespace Markdig.Tests;
|
||||
[TestFixture]
|
||||
public class TestStringSliceList
|
||||
{
|
||||
// TODO: Add tests for StringSlice
|
||||
// TODO: Add more tests for StringLineGroup
|
||||
|
||||
[Test]
|
||||
|
||||
@@ -36,16 +36,15 @@ public class SmartyPantsInlineParser : InlineParser, IPostInlineProcessor
|
||||
// -- – – 'ndash'
|
||||
// --- — — 'mdash'
|
||||
|
||||
var pc = slice.PeekCharExtra(-1);
|
||||
var c = slice.CurrentChar;
|
||||
var openingChar = c;
|
||||
var pc = slice.PeekRuneExtra(-1);
|
||||
var openingChar = slice.CurrentChar;
|
||||
|
||||
var startingPosition = slice.Start;
|
||||
|
||||
// undefined first
|
||||
var type = (SmartyPantType) 0;
|
||||
|
||||
switch (c)
|
||||
switch (openingChar)
|
||||
{
|
||||
case '\'':
|
||||
type = SmartyPantType.Quote; // We will resolve them at the end of parsing all inlines
|
||||
@@ -93,9 +92,9 @@ public class SmartyPantsInlineParser : InlineParser, IPostInlineProcessor
|
||||
}
|
||||
|
||||
// Skip char
|
||||
c = slice.NextChar();
|
||||
var next = slice.NextRune();
|
||||
|
||||
CharHelper.CheckOpenCloseDelimiter(pc, c, false, out bool canOpen, out bool canClose);
|
||||
CharHelper.CheckOpenCloseDelimiter(pc, next, false, out bool canOpen, out bool canClose);
|
||||
|
||||
bool postProcess = false;
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ public class PipeTableExtension : IMarkdownExtension
|
||||
var lineBreakParser = pipeline.InlineParsers.FindExact<LineBreakInlineParser>();
|
||||
if (!pipeline.InlineParsers.Contains<PipeTableParser>())
|
||||
{
|
||||
pipeline.InlineParsers.InsertBefore<EmphasisInlineParser>(new PipeTableParser(lineBreakParser!, Options));
|
||||
pipeline.InlineParsers.InsertAfter<EmphasisInlineParser>(new PipeTableParser(lineBreakParser!, Options));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ namespace Markdig.Extensions.Tables;
|
||||
/// <seealso cref="IPostInlineProcessor" />
|
||||
public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
{
|
||||
private readonly LineBreakInlineParser lineBreakParser;
|
||||
private readonly LineBreakInlineParser _lineBreakParser;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="PipeTableParser" /> class.
|
||||
@@ -28,7 +28,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
/// <param name="options">The options.</param>
|
||||
public PipeTableParser(LineBreakInlineParser lineBreakParser, PipeTableOptions? options = null)
|
||||
{
|
||||
this.lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser));
|
||||
_lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser));
|
||||
OpeningCharacters = ['|', '\n', '\r'];
|
||||
Options = options ?? new PipeTableOptions();
|
||||
}
|
||||
@@ -86,7 +86,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
tableState.IsInvalidTable = true;
|
||||
}
|
||||
tableState.LineHasPipe = false;
|
||||
lineBreakParser.Match(processor, ref slice);
|
||||
_lineBreakParser.Match(processor, ref slice);
|
||||
if (!isFirstLineEmpty)
|
||||
{
|
||||
tableState.ColumnAndLineDelimiters.Add(processor.Inline!);
|
||||
@@ -100,7 +100,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
Span = new SourceSpan(position, position),
|
||||
Line = globalLineIndex,
|
||||
Column = column,
|
||||
LocalLineIndex = localLineIndex
|
||||
LocalLineIndex = localLineIndex,
|
||||
IsClosed = true // Creates flat sibling structure for O(n) traversal
|
||||
};
|
||||
|
||||
tableState.LineHasPipe = true;
|
||||
@@ -125,6 +126,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
return true;
|
||||
}
|
||||
|
||||
// With flat structure, pipes are siblings at root level
|
||||
// Walk backwards from the last child to find pipe delimiters
|
||||
var child = container.LastChild;
|
||||
List<PipeTableDelimiterInline>? delimitersToRemove = null;
|
||||
|
||||
@@ -142,8 +145,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
break;
|
||||
}
|
||||
|
||||
var subContainer = child as ContainerInline;
|
||||
child = subContainer?.LastChild;
|
||||
// Walk siblings instead of descending into containers
|
||||
child = child.PreviousSibling;
|
||||
}
|
||||
|
||||
// If we have found any delimiters, transform them to literals
|
||||
@@ -186,8 +189,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
// Remove previous state
|
||||
state.ParserStates[Index] = null!;
|
||||
|
||||
// Continue
|
||||
if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe ) //|| tableState.LineIndex != state.LocalLineIndex)
|
||||
// Abort if not a valid table
|
||||
if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe)
|
||||
{
|
||||
if (tableState is not null)
|
||||
{
|
||||
@@ -204,11 +207,18 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
// Detect the header row
|
||||
var delimiters = tableState.ColumnAndLineDelimiters;
|
||||
// TODO: we could optimize this by merging FindHeaderRow and the cell loop
|
||||
var aligns = FindHeaderRow(delimiters);
|
||||
|
||||
if (Options.RequireHeaderSeparator && aligns is null)
|
||||
{
|
||||
// No valid header separator found - convert all pipe delimiters to literals
|
||||
foreach (var inline in delimiters)
|
||||
{
|
||||
if (inline is PipeTableDelimiterInline pipeDelimiter)
|
||||
{
|
||||
pipeDelimiter.ReplaceByLiteral();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -224,68 +234,40 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
var cells = tableState.Cells;
|
||||
cells.Clear();
|
||||
|
||||
//delimiters[0].DumpTo(state.DebugLog);
|
||||
// Pipes may end up nested inside unmatched emphasis delimiters, e.g.:
|
||||
// *a | b*|
|
||||
// Promote them to root level so we have a flat sibling structure.
|
||||
PromoteNestedPipesToRootLevel(delimiters, container);
|
||||
|
||||
// delimiters contain a list of `|` and `\n` delimiters
|
||||
// The `|` delimiters are created as child containers.
|
||||
// So the following:
|
||||
// | a | b \n
|
||||
// | d | e \n
|
||||
// The inline tree is now flat: all pipes and line breaks are siblings at root level.
|
||||
// For example, `| a | b \n| c | d \n` produces:
|
||||
// [|] [a] [|] [b] [\n] [|] [c] [|] [d] [\n]
|
||||
//
|
||||
// Will generate a tree of the following node:
|
||||
// |
|
||||
// a
|
||||
// |
|
||||
// b
|
||||
// \n
|
||||
// |
|
||||
// d
|
||||
// |
|
||||
// e
|
||||
// \n
|
||||
// When parsing delimiters, we need to recover whether a row is of the following form:
|
||||
// 0) | a | b | \n
|
||||
// 1) | a | b \n
|
||||
// 2) a | b \n
|
||||
// 3) a | b | \n
|
||||
// Tables support four row formats:
|
||||
// | a | b | (leading and trailing pipes)
|
||||
// | a | b (leading pipe only)
|
||||
// a | b (no leading or trailing pipes)
|
||||
// a | b | (trailing pipe only)
|
||||
|
||||
// If the last element is not a line break, add a line break to homogenize parsing in the next loop
|
||||
// Ensure the table ends with a line break to simplify row detection
|
||||
var lastElement = delimiters[delimiters.Count - 1];
|
||||
if (!(lastElement is LineBreakInline))
|
||||
{
|
||||
while (true)
|
||||
// Find the actual last sibling (there may be content after the last delimiter)
|
||||
while (lastElement.NextSibling != null)
|
||||
{
|
||||
if (lastElement is ContainerInline lastElementContainer)
|
||||
{
|
||||
var nextElement = lastElementContainer.LastChild;
|
||||
if (nextElement != null)
|
||||
{
|
||||
lastElement = nextElement;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
lastElement = lastElement.NextSibling;
|
||||
}
|
||||
|
||||
var endOfTable = new LineBreakInline();
|
||||
// If the last element is a container, we have to add the EOL to its child
|
||||
// otherwise only next sibling
|
||||
if (lastElement is ContainerInline)
|
||||
{
|
||||
((ContainerInline)lastElement).AppendChild(endOfTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
lastElement.InsertAfter(endOfTable);
|
||||
}
|
||||
lastElement.InsertAfter(endOfTable);
|
||||
delimiters.Add(endOfTable);
|
||||
tableState.EndOfLines.Add(endOfTable);
|
||||
}
|
||||
|
||||
int lastPipePos = 0;
|
||||
|
||||
// Cell loop
|
||||
// Reconstruct the table from the delimiters
|
||||
// Build table rows and cells by iterating through delimiters
|
||||
TableRow? row = null;
|
||||
TableRow? firstRow = null;
|
||||
for (int i = 0; i < delimiters.Count; i++)
|
||||
@@ -300,9 +282,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
firstRow ??= row;
|
||||
|
||||
// If the first delimiter is a pipe and doesn't have any parent or previous sibling, for cases like:
|
||||
// 0) | a | b | \n
|
||||
// 1) | a | b \n
|
||||
// Skip leading pipe at start of row (e.g., `| a | b` or `| a | b |`)
|
||||
if (pipeSeparator != null && (delimiter.PreviousSibling is null || delimiter.PreviousSibling is LineBreakInline))
|
||||
{
|
||||
delimiter.Remove();
|
||||
@@ -316,57 +296,37 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
}
|
||||
}
|
||||
|
||||
// We need to find the beginning/ending of a cell from a right delimiter. From the delimiter 'x', we need to find a (without the delimiter start `|`)
|
||||
// So we iterate back to the first pipe or line break
|
||||
// x
|
||||
// 1) | a | b \n
|
||||
// 2) a | b \n
|
||||
// Find cell content by walking backwards from this delimiter to the previous pipe or line break.
|
||||
// For `| a | b \n` at delimiter 'x':
|
||||
// [|] [a] [x] [b] [\n]
|
||||
// ^--- current delimiter
|
||||
// Walk back: [a] is the cell content (stop at [|])
|
||||
Inline? endOfCell = null;
|
||||
Inline? beginOfCell = null;
|
||||
var cellContentIt = delimiter;
|
||||
while (true)
|
||||
var cellContentIt = delimiter.PreviousSibling;
|
||||
while (cellContentIt != null)
|
||||
{
|
||||
cellContentIt = cellContentIt.PreviousSibling ?? cellContentIt.Parent;
|
||||
|
||||
if (cellContentIt is null || cellContentIt is LineBreakInline)
|
||||
{
|
||||
if (cellContentIt is LineBreakInline || cellContentIt is PipeTableDelimiterInline)
|
||||
break;
|
||||
}
|
||||
|
||||
// The cell begins at the first effective child after a | or the top ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation)
|
||||
if (cellContentIt is PipeTableDelimiterInline || (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null ))
|
||||
{
|
||||
beginOfCell = ((ContainerInline)cellContentIt).FirstChild;
|
||||
if (endOfCell is null)
|
||||
{
|
||||
endOfCell = beginOfCell;
|
||||
}
|
||||
// Stop at the root ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation)
|
||||
if (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null)
|
||||
break;
|
||||
}
|
||||
|
||||
beginOfCell = cellContentIt;
|
||||
if (endOfCell is null)
|
||||
{
|
||||
endOfCell = beginOfCell;
|
||||
}
|
||||
endOfCell ??= beginOfCell;
|
||||
|
||||
cellContentIt = cellContentIt.PreviousSibling;
|
||||
}
|
||||
|
||||
// If the current deilimiter is a pipe `|` OR
|
||||
// If the current delimiter is a pipe `|` OR
|
||||
// the beginOfCell/endOfCell are not null and
|
||||
// either they are :
|
||||
// either they are:
|
||||
// - different
|
||||
// - they contain a single element, but it is not a line break (\n) or an empty/whitespace Literal.
|
||||
// Then we can add a cell to the current row
|
||||
if (!isLine || (beginOfCell != null && endOfCell != null && ( beginOfCell != endOfCell || !(beginOfCell is LineBreakInline || (beginOfCell is LiteralInline beingOfCellLiteral && beingOfCellLiteral.Content.IsEmptyOrWhitespace())))))
|
||||
{
|
||||
if (!isLine)
|
||||
{
|
||||
// If the delimiter is a pipe, we need to remove it from the tree
|
||||
// so that previous loop looking for a parent will not go further on subsequent cells
|
||||
delimiter.Remove();
|
||||
lastPipePos = delimiter.Span.End;
|
||||
}
|
||||
|
||||
// We trim whitespace at the beginning and ending of the cell
|
||||
TrimStart(beginOfCell);
|
||||
TrimEnd(endOfCell);
|
||||
@@ -374,10 +334,20 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
var cellContainer = new ContainerInline();
|
||||
|
||||
// Copy elements from beginOfCell on the first level
|
||||
// The pipe delimiter serves as a boundary - stop when we hit it
|
||||
var cellIt = beginOfCell;
|
||||
while (cellIt != null && !IsLine(cellIt) && !(cellIt is PipeTableDelimiterInline))
|
||||
{
|
||||
var nextSibling = cellIt.NextSibling;
|
||||
|
||||
// Skip empty literals (can result from trimming)
|
||||
if (cellIt is LiteralInline { Content.IsEmpty: true })
|
||||
{
|
||||
cellIt.Remove();
|
||||
cellIt = nextSibling;
|
||||
continue;
|
||||
}
|
||||
|
||||
cellIt.Remove();
|
||||
if (cellContainer.Span.IsEmpty)
|
||||
{
|
||||
@@ -390,8 +360,16 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
cellIt = nextSibling;
|
||||
}
|
||||
|
||||
if (!isLine)
|
||||
{
|
||||
// Remove the pipe delimiter AFTER copying cell content
|
||||
// This preserves the sibling chain during the copy loop
|
||||
delimiter.Remove();
|
||||
lastPipePos = delimiter.Span.End;
|
||||
}
|
||||
|
||||
// Create the cell and add it to the pending row
|
||||
var tableParagraph = new ParagraphBlock()
|
||||
var tableParagraph = new ParagraphBlock
|
||||
{
|
||||
Span = cellContainer.Span,
|
||||
Line = cellContainer.Line,
|
||||
@@ -443,8 +421,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
endOfLine.Remove();
|
||||
}
|
||||
|
||||
// If we have a header row, we can remove it
|
||||
// TODO: we could optimize this by merging FindHeaderRow and the previous loop
|
||||
// Mark first row as header and remove the separator row if present
|
||||
var tableRow = (TableRow)table[0];
|
||||
tableRow.IsHeader = Options.RequireHeaderSeparator;
|
||||
if (aligns != null)
|
||||
@@ -454,11 +431,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
table.ColumnDefinitions.AddRange(aligns);
|
||||
}
|
||||
|
||||
// Perform delimiter processor that are coming after this processor
|
||||
// Perform all post-processors on cell content
|
||||
// With InsertAfter, emphasis runs before pipe table, so we need to re-run from index 0
|
||||
// to ensure emphasis delimiters in cells are properly matched
|
||||
foreach (var cell in cells)
|
||||
{
|
||||
var paragraph = (ParagraphBlock) cell[0];
|
||||
state.PostProcessInlines(postInlineProcessorIndex + 1, paragraph.Inline, null, true);
|
||||
state.PostProcessInlines(0, paragraph.Inline, null, true);
|
||||
if (paragraph.Inline?.LastChild is not null)
|
||||
{
|
||||
paragraph.Inline.Span.End = paragraph.Inline.LastChild.Span.End;
|
||||
@@ -548,7 +527,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
continue;
|
||||
}
|
||||
|
||||
// The last delimiter is always null,
|
||||
// Parse the separator row (second row) to extract column alignments
|
||||
for (int j = i + 1; j < delimiters.Count; j++)
|
||||
{
|
||||
var delimiter = delimiters[j];
|
||||
@@ -560,11 +539,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check the left side of a `|` delimiter
|
||||
// Parse the content before this delimiter as a column definition (e.g., `:---`, `---:`, `:---:`)
|
||||
// Skip if previous sibling is a pipe (empty cell) or whitespace
|
||||
TableColumnAlign? align = null;
|
||||
int delimiterCount = 0;
|
||||
if (delimiter.PreviousSibling != null &&
|
||||
!(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) && // ignore parsed whitespace
|
||||
!(delimiter.PreviousSibling is PipeTableDelimiterInline) &&
|
||||
!(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) &&
|
||||
!ParseHeaderString(delimiter.PreviousSibling, out align, out delimiterCount))
|
||||
{
|
||||
break;
|
||||
@@ -576,14 +557,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
totalDelimiterCount += delimiterCount;
|
||||
columnDefinitions.Add(new TableColumnDefinition() { Alignment = align, Width = delimiterCount});
|
||||
|
||||
// If this is the last delimiter, we need to check the right side of the `|` delimiter
|
||||
// If this is the last pipe, check for a trailing column definition (row without trailing pipe)
|
||||
// e.g., `| :--- | ---:` has content after the last pipe
|
||||
if (nextDelimiter is null)
|
||||
{
|
||||
var nextSibling = columnDelimiter != null
|
||||
? columnDelimiter.FirstChild
|
||||
: delimiter.NextSibling;
|
||||
var nextSibling = delimiter.NextSibling;
|
||||
|
||||
// If there is no content after
|
||||
// No trailing content means row ends with pipe: `| :--- |`
|
||||
if (IsNullOrSpace(nextSibling))
|
||||
{
|
||||
isValidRow = true;
|
||||
@@ -664,9 +644,9 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
private static void TrimStart(Inline? inline)
|
||||
{
|
||||
while (inline is ContainerInline && !(inline is DelimiterInline))
|
||||
while (inline is ContainerInline containerInline && !(containerInline is DelimiterInline))
|
||||
{
|
||||
inline = ((ContainerInline)inline).FirstChild;
|
||||
inline = containerInline.FirstChild;
|
||||
}
|
||||
|
||||
if (inline is LiteralInline literal)
|
||||
@@ -677,6 +657,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
private static void TrimEnd(Inline? inline)
|
||||
{
|
||||
// Walk into containers to find the last leaf to trim
|
||||
// Skip PipeTableDelimiterInline but walk into other containers (including emphasis)
|
||||
while (inline is ContainerInline container && !(inline is PipeTableDelimiterInline))
|
||||
{
|
||||
inline = container.LastChild;
|
||||
}
|
||||
|
||||
if (inline is LiteralInline literal)
|
||||
{
|
||||
literal.Content.TrimEnd();
|
||||
@@ -697,6 +684,106 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Promotes nested pipe delimiters and line breaks to root level.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles cases like `*a | b*|` where the pipe ends up inside an unmatched emphasis container.
|
||||
/// After promotion, all delimiters become siblings at root level for consistent cell boundary detection.
|
||||
/// </remarks>
|
||||
private static void PromoteNestedPipesToRootLevel(List<Inline> delimiters, ContainerInline root)
|
||||
{
|
||||
for (int i = 0; i < delimiters.Count; i++)
|
||||
{
|
||||
var delimiter = delimiters[i];
|
||||
|
||||
// Handle both pipe delimiters and line breaks
|
||||
bool isPipe = delimiter is PipeTableDelimiterInline;
|
||||
bool isLineBreak = delimiter is LineBreakInline;
|
||||
if (!isPipe && !isLineBreak)
|
||||
continue;
|
||||
|
||||
// Skip if already at root level
|
||||
if (delimiter.Parent == root)
|
||||
continue;
|
||||
|
||||
// Find the top-level ancestor (direct child of root)
|
||||
var ancestor = delimiter.Parent;
|
||||
while (ancestor?.Parent != null && ancestor.Parent != root)
|
||||
{
|
||||
ancestor = ancestor.Parent;
|
||||
}
|
||||
|
||||
if (ancestor is null || ancestor.Parent != root)
|
||||
continue;
|
||||
|
||||
// Split: promote delimiter to be sibling of ancestor
|
||||
SplitContainerAtDelimiter(delimiter, ancestor);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Splits a container at the delimiter, promoting the delimiter to root level.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For input `*a | b*`, the pipe is inside the emphasis container:
|
||||
/// EmphasisDelimiter { "a", Pipe, "b" }
|
||||
/// After splitting:
|
||||
/// EmphasisDelimiter { "a" }, Pipe, Container { "b" }
|
||||
/// </remarks>
|
||||
private static void SplitContainerAtDelimiter(Inline delimiter, Inline ancestor)
|
||||
{
|
||||
if (delimiter.Parent is not { } parent) return;
|
||||
|
||||
// Collect content after the delimiter
|
||||
var contentAfter = new List<Inline>();
|
||||
var current = delimiter.NextSibling;
|
||||
while (current != null)
|
||||
{
|
||||
contentAfter.Add(current);
|
||||
current = current.NextSibling;
|
||||
}
|
||||
|
||||
// Remove content after delimiter from parent
|
||||
foreach (var inline in contentAfter)
|
||||
{
|
||||
inline.Remove();
|
||||
}
|
||||
|
||||
// Remove delimiter from parent
|
||||
delimiter.Remove();
|
||||
|
||||
// Insert delimiter after the ancestor (at root level)
|
||||
ancestor.InsertAfter(delimiter);
|
||||
|
||||
// If there's content after, wrap in new container and insert after delimiter
|
||||
if (contentAfter.Count > 0)
|
||||
{
|
||||
// Create new container matching the original parent type
|
||||
var newContainer = CreateMatchingContainer(parent);
|
||||
foreach (var inline in contentAfter)
|
||||
{
|
||||
newContainer.AppendChild(inline);
|
||||
}
|
||||
delimiter.InsertAfter(newContainer);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a container to wrap content split from the source container.
|
||||
/// </summary>
|
||||
private static ContainerInline CreateMatchingContainer(ContainerInline source)
|
||||
{
|
||||
// Emphasis processing runs before pipe table processing, so emphasis delimiters
|
||||
// are already resolved. A plain ContainerInline suffices.
|
||||
return new ContainerInline
|
||||
{
|
||||
Span = source.Span,
|
||||
Line = source.Line,
|
||||
Column = source.Column
|
||||
};
|
||||
}
|
||||
|
||||
private sealed class TableState
|
||||
{
|
||||
public bool IsInvalidTable { get; set; }
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using System.Buffers;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
|
||||
namespace Markdig.Helpers;
|
||||
|
||||
@@ -72,14 +73,51 @@ public static class CharHelper
|
||||
private static bool IsPunctuationException(char c) =>
|
||||
c is '−' or '-' or '†' or '‡';
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static bool IsPunctuationException(Rune c) =>
|
||||
c.IsBmp && IsPunctuationException((char)c.Value);
|
||||
|
||||
public static void CheckOpenCloseDelimiter(char pc, char c, bool enableWithinWord, out bool canOpen, out bool canClose)
|
||||
{
|
||||
pc.CheckUnicodeCategory(out bool prevIsWhiteSpace, out bool prevIsPunctuation);
|
||||
c.CheckUnicodeCategory(out bool nextIsWhiteSpace, out bool nextIsPunctuation);
|
||||
CheckOpenCloseDelimiter(
|
||||
prevIsWhiteSpace,
|
||||
prevIsPunctuation,
|
||||
prevIsPunctuation && IsPunctuationException(pc),
|
||||
nextIsWhiteSpace,
|
||||
nextIsPunctuation,
|
||||
nextIsPunctuation && IsPunctuationException(c),
|
||||
enableWithinWord,
|
||||
out canOpen,
|
||||
out canClose);
|
||||
}
|
||||
|
||||
var prevIsExcepted = prevIsPunctuation && IsPunctuationException(pc);
|
||||
var nextIsExcepted = nextIsPunctuation && IsPunctuationException(c);
|
||||
#if NET
|
||||
public
|
||||
#else
|
||||
internal
|
||||
#endif
|
||||
static void CheckOpenCloseDelimiter(Rune pc, Rune c, bool enableWithinWord, out bool canOpen, out bool canClose)
|
||||
{
|
||||
pc.CheckUnicodeCategory(out bool prevIsWhiteSpace, out bool prevIsPunctuation);
|
||||
c.CheckUnicodeCategory(out bool nextIsWhiteSpace, out bool nextIsPunctuation);
|
||||
|
||||
CheckOpenCloseDelimiter(
|
||||
prevIsWhiteSpace,
|
||||
prevIsPunctuation,
|
||||
prevIsPunctuation && IsPunctuationException(pc),
|
||||
nextIsWhiteSpace,
|
||||
nextIsPunctuation,
|
||||
nextIsPunctuation && IsPunctuationException(c),
|
||||
enableWithinWord,
|
||||
out canOpen,
|
||||
out canClose);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static void CheckOpenCloseDelimiter(bool prevIsWhiteSpace, bool prevIsPunctuation, bool prevIsExcepted, bool nextIsWhiteSpace, bool nextIsPunctuation, bool nextIsExcepted, bool enableWithinWord, out bool canOpen, out bool canClose)
|
||||
{
|
||||
// A left-flanking delimiter run is a delimiter run that is
|
||||
// (1) not followed by Unicode whitespace, and either
|
||||
// (2a) not followed by a punctuation character or
|
||||
@@ -100,13 +138,13 @@ public static class CharHelper
|
||||
if (!enableWithinWord)
|
||||
{
|
||||
var temp = canOpen;
|
||||
// A single _ character can open emphasis iff it is part of a left-flanking delimiter run and either
|
||||
// (a) not part of a right-flanking delimiter run or
|
||||
// A single _ character can open emphasis iff it is part of a left-flanking delimiter run and either
|
||||
// (a) not part of a right-flanking delimiter run or
|
||||
// (b) part of a right-flanking delimiter run preceded by punctuation.
|
||||
canOpen = canOpen && (!canClose || prevIsPunctuation);
|
||||
|
||||
// A single _ character can close emphasis iff it is part of a right-flanking delimiter run and either
|
||||
// (a) not part of a left-flanking delimiter run or
|
||||
// (a) not part of a left-flanking delimiter run or
|
||||
// (b) part of a left-flanking delimiter run followed by punctuation.
|
||||
canClose = canClose && (!temp || nextIsPunctuation);
|
||||
}
|
||||
@@ -180,6 +218,11 @@ public static class CharHelper
|
||||
return (column & (TabSize - 1)) != 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <see langword="true"/> if the character is a <see href="https://spec.commonmark.org/0.31.2/#unicode-whitespace-character">Unicode whitespace character</see>.
|
||||
/// </summary>
|
||||
/// <param name="c">The character to evaluate.</param>
|
||||
/// <returns><see langword="true"/> if the character is a Unicode whitespace character</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsWhitespace(this char c)
|
||||
{
|
||||
@@ -199,6 +242,21 @@ public static class CharHelper
|
||||
return IsWhitespaceRare(c);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <see langword="true"/> if the character is a <see href="https://spec.commonmark.org/0.31.2/#unicode-whitespace-character">Unicode whitespace character</see>.
|
||||
/// </summary>
|
||||
/// <param name="r">The character to evaluate. A supplementary character is also accepted.</param>
|
||||
/// <returns><see langword="true"/> if the character is a Unicode whitespace character</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
#if NET
|
||||
public
|
||||
#else
|
||||
internal
|
||||
#endif
|
||||
static bool IsWhitespace(this Rune r) => r.IsBmp && IsWhitespace((char)r.Value);
|
||||
// Note: there is no supplementary character whose Unicode category is Zs (at least as of Unicode 17).
|
||||
// https://www.compart.com/en/unicode/category/Zs
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsWhiteSpaceOrZero(this char c)
|
||||
{
|
||||
@@ -243,7 +301,12 @@ public static class CharHelper
|
||||
return s_escapableSymbolChars.Contains(c);
|
||||
}
|
||||
|
||||
// Check if a char is a space or a punctuation
|
||||
/// <summary>
|
||||
/// Checks the Unicode category of the given character and determines whether it is a whitespace or punctuation character.
|
||||
/// </summary>
|
||||
/// <param name="c">The character to check.</param>
|
||||
/// <param name="space">Output parameter indicating whether the character is a whitespace character.</param>
|
||||
/// <param name="punctuation">Output parameter indicating whether the character is a punctuation character.</param>
|
||||
public static void CheckUnicodeCategory(this char c, out bool space, out bool punctuation)
|
||||
{
|
||||
if (IsWhitespace(c))
|
||||
@@ -263,6 +326,36 @@ public static class CharHelper
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Check if a character is a <see href="https://spec.commonmark.org/0.31.2/#unicode-whitespace-character">Unicode whitespace</see> or <see href="https://spec.commonmark.org/0.31.2/#unicode-punctuation-character">punctuation character</see>.
|
||||
/// </summary>
|
||||
/// <param name="r">The character to evaluate. A supplementary character is also accepted.</param>
|
||||
/// <param name="space"><see langword="true"/> if the character is an <see href="https://spec.commonmark.org/0.31.2/#unicode-whitespace-character">Unicode whitespace character</see></param>
|
||||
/// <param name="punctuation"><see langword="true"/> if the character is a <see href="https://spec.commonmark.org/0.31.2/#unicode-punctuation-character">Unicode punctuation character</see></param>
|
||||
#if NET
|
||||
public
|
||||
#else
|
||||
internal
|
||||
#endif
|
||||
static void CheckUnicodeCategory(this Rune r, out bool space, out bool punctuation)
|
||||
{
|
||||
if (IsWhitespace(r))
|
||||
{
|
||||
space = true;
|
||||
punctuation = false;
|
||||
}
|
||||
else if (r.Value <= 127)
|
||||
{
|
||||
space = r.Value == 0;
|
||||
punctuation = r.IsBmp && IsAsciiPunctuationOrZero((char)r.Value);
|
||||
}
|
||||
else
|
||||
{
|
||||
space = false;
|
||||
punctuation = (CommonMarkPunctuationCategoryMask & (1 << (int)Rune.GetUnicodeCategory(r))) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
internal static bool IsSpaceOrPunctuationForGFMAutoLink(char c)
|
||||
{
|
||||
@@ -306,22 +399,37 @@ public static class CharHelper
|
||||
return c == '\0';
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> if the character is a <see href="https://spec.commonmark.org/0.31.2/#space">space</see> (U+0020).
|
||||
/// </summary>
|
||||
/// <param name="c">The character to evaluate</param>
|
||||
/// <returns><see langword="true"/> if the character is a space</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsSpace(this char c)
|
||||
{
|
||||
// 2.1 Characters and lines
|
||||
// 2.1 Characters and lines
|
||||
// A space is U+0020.
|
||||
return c == ' ';
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> if the character is a <see href="https://spec.commonmark.org/0.31.2/#tab">tab</see> (U+0009).
|
||||
/// </summary>
|
||||
/// <param name="c">The character to evaluate</param>
|
||||
/// <returns><see langword="true"/> if the character is a tab</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsTab(this char c)
|
||||
{
|
||||
// 2.1 Characters and lines
|
||||
// 2.1 Characters and lines
|
||||
// A space is U+0009.
|
||||
return c == '\t';
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> if the character is a <see href="https://spec.commonmark.org/0.31.2/#space">space</see> (U+0020) or <see href="https://spec.commonmark.org/0.31.2/#tab">tab</see> (U+0009).
|
||||
/// </summary>
|
||||
/// <param name="c">The character to evaluate.</param>
|
||||
/// <returns><see langword="true"/> if the character is a space or tab</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsSpaceOrTab(this char c)
|
||||
{
|
||||
|
||||
@@ -51,10 +51,7 @@ public sealed class CharacterMap<T> where T : class
|
||||
{
|
||||
nonAsciiMap ??= [];
|
||||
|
||||
if (!nonAsciiMap.ContainsKey(openingChar))
|
||||
{
|
||||
nonAsciiMap[openingChar] = state.Value;
|
||||
}
|
||||
nonAsciiMap.TryAdd(openingChar, state.Value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using Markdig.Syntax;
|
||||
@@ -60,12 +60,12 @@ public static class LinkHelper
|
||||
}
|
||||
else
|
||||
{
|
||||
normalized = allowOnlyAscii ? CharNormalizer.ConvertToAscii(c) : null;
|
||||
normalized = allowOnlyAscii ? CharNormalizer.ConvertToAscii(c) : ReadOnlySpan<char>.Empty;
|
||||
}
|
||||
|
||||
for (int j = 0; j < (normalized.Length < 1 ? 1 : normalized.Length); j++)
|
||||
{
|
||||
if (normalized != null)
|
||||
if (!normalized.IsEmpty)
|
||||
{
|
||||
c = normalized[j];
|
||||
}
|
||||
@@ -215,13 +215,13 @@ public static class LinkHelper
|
||||
return false;
|
||||
}
|
||||
|
||||
// An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
|
||||
// followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
|
||||
// An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
|
||||
// followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
|
||||
// If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).
|
||||
// A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop.
|
||||
|
||||
// a scheme is any sequence of 2–32 characters
|
||||
// beginning with an ASCII letter
|
||||
// a scheme is any sequence of 2–32 characters
|
||||
// beginning with an ASCII letter
|
||||
// and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”).
|
||||
|
||||
// An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec:
|
||||
@@ -276,7 +276,7 @@ public static class LinkHelper
|
||||
|
||||
if (isValidChar)
|
||||
{
|
||||
// a scheme is any sequence of 2–32 characters
|
||||
// a scheme is any sequence of 2–32 characters
|
||||
if (state > 0 && builder.Length >= 32)
|
||||
{
|
||||
goto ReturnFalse;
|
||||
@@ -307,7 +307,7 @@ public static class LinkHelper
|
||||
}
|
||||
}
|
||||
|
||||
// append ':' or '@'
|
||||
// append ':' or '@'
|
||||
builder.Append(c);
|
||||
|
||||
if (state < 0)
|
||||
@@ -415,10 +415,10 @@ public static class LinkHelper
|
||||
|
||||
public static bool TryParseInlineLink(ref StringSlice text, out string? link, out string? title, out SourceSpan linkSpan, out SourceSpan titleSpan)
|
||||
{
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
// 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
|
||||
// 3. an optional link destination,
|
||||
// 4. an optional link title separated from the link destination by whitespace,
|
||||
// 3. an optional link destination,
|
||||
// 4. an optional link title separated from the link destination by whitespace,
|
||||
// 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
|
||||
// 6. and a right parenthesis )
|
||||
bool isValid = false;
|
||||
@@ -429,7 +429,7 @@ public static class LinkHelper
|
||||
linkSpan = SourceSpan.Empty;
|
||||
titleSpan = SourceSpan.Empty;
|
||||
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
if (c == '(')
|
||||
{
|
||||
text.SkipChar();
|
||||
@@ -505,10 +505,10 @@ public static class LinkHelper
|
||||
out SourceSpan triviaAfterTitle,
|
||||
out bool urlHasPointyBrackets)
|
||||
{
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
// 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
|
||||
// 3. an optional link destination,
|
||||
// 4. an optional link title separated from the link destination by whitespace,
|
||||
// 3. an optional link destination,
|
||||
// 4. an optional link title separated from the link destination by whitespace,
|
||||
// 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
|
||||
// 6. and a right parenthesis )
|
||||
bool isValid = false;
|
||||
@@ -526,7 +526,7 @@ public static class LinkHelper
|
||||
urlHasPointyBrackets = false;
|
||||
titleEnclosingCharacter = '\0';
|
||||
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
// 1. An inline link consists of a link text followed immediately by a left parenthesis (,
|
||||
if (c == '(')
|
||||
{
|
||||
text.SkipChar();
|
||||
@@ -773,7 +773,7 @@ public static class LinkHelper
|
||||
|
||||
var c = text.CurrentChar;
|
||||
|
||||
// a sequence of zero or more characters between an opening < and a closing >
|
||||
// a sequence of zero or more characters between an opening < and a closing >
|
||||
// that contains no line breaks, or unescaped < or > characters, or
|
||||
if (c == '<')
|
||||
{
|
||||
@@ -820,9 +820,9 @@ public static class LinkHelper
|
||||
else
|
||||
{
|
||||
// a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters,
|
||||
// and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
|
||||
// balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
|
||||
// parentheses.
|
||||
// and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
|
||||
// balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
|
||||
// parentheses.
|
||||
bool hasEscape = false;
|
||||
int openedParent = 0;
|
||||
while (true)
|
||||
@@ -922,7 +922,7 @@ public static class LinkHelper
|
||||
|
||||
var c = text.CurrentChar;
|
||||
|
||||
// a sequence of zero or more characters between an opening < and a closing >
|
||||
// a sequence of zero or more characters between an opening < and a closing >
|
||||
// that contains no line breaks, or unescaped < or > characters, or
|
||||
if (c == '<')
|
||||
{
|
||||
@@ -969,9 +969,9 @@ public static class LinkHelper
|
||||
else
|
||||
{
|
||||
// a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters,
|
||||
// and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
|
||||
// balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
|
||||
// parentheses.
|
||||
// and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
|
||||
// balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
|
||||
// parentheses.
|
||||
bool hasEscape = false;
|
||||
int openedParent = 0;
|
||||
while (true)
|
||||
@@ -1201,7 +1201,7 @@ public static class LinkHelper
|
||||
|
||||
if (c != '\0' && c != '\n' && c != '\r')
|
||||
{
|
||||
// If we were able to parse the url but the title doesn't end with space,
|
||||
// If we were able to parse the url but the title doesn't end with space,
|
||||
// we are still returning a valid definition
|
||||
if (newLineCount > 0 && title != null)
|
||||
{
|
||||
@@ -1341,7 +1341,7 @@ public static class LinkHelper
|
||||
|
||||
if (c != '\0' && c != '\n' && c != '\r')
|
||||
{
|
||||
// If we were able to parse the url but the title doesn't end with space,
|
||||
// If we were able to parse the url but the title doesn't end with space,
|
||||
// we are still returning a valid definition
|
||||
if (newLineCount > 0 && title != null)
|
||||
{
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
#nullable disable
|
||||
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
|
||||
namespace Markdig.Helpers;
|
||||
|
||||
@@ -125,6 +126,34 @@ public struct StringSlice : ICharIterator
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current <see cref="Rune"/>. Recognizes supplementary code points that cannot be covered by a single <see cref="char"/>.
|
||||
/// </summary>
|
||||
/// <returns>The current rune or <see langword="default"/> if the current position contains an incomplete surrogate pair or <see cref="IsEmpty"/>.</returns>
|
||||
#if NET
|
||||
public
|
||||
#else
|
||||
internal
|
||||
#endif
|
||||
readonly Rune CurrentRune
|
||||
{
|
||||
get
|
||||
{
|
||||
int start = Start;
|
||||
if (start > End) return default;
|
||||
|
||||
char first = Text[start];
|
||||
// '\0' is stored in `rune` if `TryCreate` returns false
|
||||
if (!Rune.TryCreate(first, out Rune rune) && start + 1 <= End)
|
||||
{
|
||||
// The first character is a surrogate, check if we have a valid pair
|
||||
Rune.TryCreate(first, Text[start + 1], out rune);
|
||||
}
|
||||
|
||||
return rune;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether this instance is empty.
|
||||
/// </summary>
|
||||
@@ -145,6 +174,32 @@ public struct StringSlice : ICharIterator
|
||||
get => Text[index];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the <see cref="Rune"/> at the specified index.
|
||||
/// Recognizes supplementary code points that cannot be covered by a single <see cref="char"/>.
|
||||
/// </summary>
|
||||
/// <param name="index">The index into <see cref="Text"/>.</param>
|
||||
/// <returns>The rune at the specified index or <see langword="default"/> if the location contains an incomplete surrogate pair.</returns>
|
||||
/// <exception cref="IndexOutOfRangeException">Thrown when the given <paramref name="index"/> is out of range</exception>
|
||||
#if NET
|
||||
public
|
||||
#else
|
||||
internal
|
||||
#endif
|
||||
readonly Rune RuneAt(int index)
|
||||
{
|
||||
string text = Text;
|
||||
char first = text[index];
|
||||
|
||||
if (!Rune.TryCreate(first, out Rune rune) && (uint)(index + 1) < (uint)text.Length)
|
||||
{
|
||||
// The first character is a surrogate, check if we have a valid pair
|
||||
Rune.TryCreate(first, text[index + 1], out rune);
|
||||
}
|
||||
|
||||
return rune;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Goes to the next character, incrementing the <see cref="Start" /> position.
|
||||
@@ -166,6 +221,50 @@ public struct StringSlice : ICharIterator
|
||||
return Text[start];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Goes to the next <see cref="Rune"/>, incrementing the <see cref="Start"/> position.
|
||||
/// If <see cref="CurrentRune"/> is a supplementary character, <see cref="Start"/> will be advanced by 2.
|
||||
/// </summary>
|
||||
/// <returns>The current rune or <see langword="default"/> if the next position contains an incomplete surrogate pair or <see cref="IsEmpty"/>.</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
#if NET
|
||||
public
|
||||
#else
|
||||
internal
|
||||
#endif
|
||||
Rune NextRune()
|
||||
{
|
||||
int start = Start;
|
||||
if (start >= End)
|
||||
{
|
||||
Start = End + 1;
|
||||
return default;
|
||||
}
|
||||
|
||||
// Start may be pointing at the start of a previous surrogate pair. Check if we have to advance by 2 chars.
|
||||
if (
|
||||
// Advance to the next character, checking for a valid surrogate pair
|
||||
char.IsHighSurrogate(Text[start++])
|
||||
// Don't unconditionally increment `start` here. Check the surrogate code unit at `start` is a part of a valid surrogate pair first.
|
||||
&& start <= End
|
||||
&& char.IsLowSurrogate(Text[start]))
|
||||
{
|
||||
// Valid surrogate pair representing a supplementary character
|
||||
start++;
|
||||
}
|
||||
|
||||
Start = start;
|
||||
var first = Text[start];
|
||||
// '\0' is stored in `rune` if `TryCreate` returns false
|
||||
if (!Rune.TryCreate(first, out Rune rune) && start + 1 <= End)
|
||||
{
|
||||
// Supplementary character
|
||||
Rune.TryCreate(first, Text[start + 1], out rune);
|
||||
}
|
||||
|
||||
return rune;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Goes to the next character, incrementing the <see cref="Start" /> position.
|
||||
/// </summary>
|
||||
@@ -244,6 +343,60 @@ public struct StringSlice : ICharIterator
|
||||
return (uint)index < (uint)text.Length ? text[index] : '\0';
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Peeks a <see cref="Rune"/> at the specified offset from the current beginning of the slice
|
||||
/// without using the range <see cref="Start"/> or <see cref="End"/>, returns <see langword="default"/> if outside the <see cref="Text"/>.
|
||||
/// Recognizes supplementary code points that cannot be covered by a single <see cref="char"/>.
|
||||
/// A positive <paramref name="offset"/> value expects the <em>high</em> surrogate and a negative <paramref name="offset"/> expects the <em>low</em> surrogate of the surrogate pair of a supplementary character at that position.
|
||||
/// </summary>
|
||||
/// <param name="offset">The offset.</param>
|
||||
/// <returns>The rune at the specified offset, returns default if none.</returns>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
#if NET
|
||||
public
|
||||
#else
|
||||
internal
|
||||
#endif
|
||||
readonly Rune PeekRuneExtra(int offset)
|
||||
{
|
||||
int index = Start + offset;
|
||||
string text = Text;
|
||||
if ((uint)index >= (uint)text.Length)
|
||||
{
|
||||
return default;
|
||||
}
|
||||
|
||||
var bmpOrNearerSurrogate = text[index];
|
||||
if (Rune.TryCreate(bmpOrNearerSurrogate, out var rune))
|
||||
{
|
||||
// BMP
|
||||
return rune;
|
||||
}
|
||||
|
||||
// Check if we have a valid surrogate pair
|
||||
if (offset < 0)
|
||||
{
|
||||
// The code unit at `index` should be a low surrogate
|
||||
// The scalar value (rune) of a supplementary character should start at `index - 1`, which should be a high surrogate
|
||||
// By casting to uint and comparing with < text.Length ("abusing" overflow), we can check both > 0 and < text.Length in one check
|
||||
if ((uint)(index - 1) < (uint)text.Length)
|
||||
{
|
||||
// Stores '\0' in `rune` if `TryCreate` returns false
|
||||
Rune.TryCreate(text[index - 1], bmpOrNearerSurrogate, out rune);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// The code unit at `index` should be a high surrogate and the start of a scalar value (rune) of a supplementary character
|
||||
if ((uint)(index + 1) < (uint)text.Length)
|
||||
{
|
||||
Rune.TryCreate(bmpOrNearerSurrogate, text[index + 1], out rune);
|
||||
}
|
||||
}
|
||||
|
||||
return rune;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Matches the specified text.
|
||||
/// </summary>
|
||||
@@ -474,7 +627,7 @@ public struct StringSlice : ICharIterator
|
||||
return default;
|
||||
}
|
||||
|
||||
#if NETCOREAPP3_1_OR_GREATER
|
||||
#if NET
|
||||
return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start), length);
|
||||
#else
|
||||
return text.AsSpan(start, length);
|
||||
|
||||
@@ -27,4 +27,142 @@ internal static class UnicodeUtility
|
||||
highSurrogateCodePoint = (char)((value + ((0xD800u - 0x40u) << 10)) >> 10);
|
||||
lowSurrogateCodePoint = (char)((value & 0x3FFu) + 0xDC00u);
|
||||
}
|
||||
|
||||
#if !NETCOREAPP3_0_OR_GREATER
|
||||
// The following section is used only for the implementation of Rune.
|
||||
|
||||
/// <summary>
|
||||
/// The Unicode replacement character U+FFFD.
|
||||
/// </summary>
|
||||
public const uint ReplacementChar = 0xFFFD;
|
||||
|
||||
/// <summary>
|
||||
/// Returns the Unicode plane (0 through 16, inclusive) which contains this code point.
|
||||
/// </summary>
|
||||
public static int GetPlane(uint codePoint)
|
||||
{
|
||||
UnicodeDebug.AssertIsValidCodePoint(codePoint);
|
||||
|
||||
return (int)(codePoint >> 16);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a Unicode scalar value from two code points representing a UTF-16 surrogate pair.
|
||||
/// </summary>
|
||||
public static uint GetScalarFromUtf16SurrogatePair(uint highSurrogateCodePoint, uint lowSurrogateCodePoint)
|
||||
{
|
||||
UnicodeDebug.AssertIsHighSurrogateCodePoint(highSurrogateCodePoint);
|
||||
UnicodeDebug.AssertIsLowSurrogateCodePoint(lowSurrogateCodePoint);
|
||||
|
||||
// This calculation comes from the Unicode specification, Table 3-5.
|
||||
// Need to remove the D800 marker from the high surrogate and the DC00 marker from the low surrogate,
|
||||
// then fix up the "wwww = uuuuu - 1" section of the bit distribution. The code is written as below
|
||||
// to become just two instructions: shl, lea.
|
||||
|
||||
return (highSurrogateCodePoint << 10) + lowSurrogateCodePoint - ((0xD800U << 10) + 0xDC00U - (1 << 16));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> iff <paramref name="value"/> is an ASCII
|
||||
/// character ([ U+0000..U+007F ]).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per http://www.unicode.org/glossary/#ASCII, ASCII is only U+0000..U+007F.
|
||||
/// </remarks>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsAsciiCodePoint(uint value) => value <= 0x7Fu;
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> iff <paramref name="value"/> is a UTF-16 high surrogate code point,
|
||||
/// i.e., is in [ U+D800..U+DBFF ], inclusive.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsHighSurrogateCodePoint(uint value) => IsInRangeInclusive(value, 0xD800U, 0xDBFFU);
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> iff <paramref name="value"/> is between
|
||||
/// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsInRangeInclusive(uint value, uint lowerBound, uint upperBound) => (value - lowerBound) <= (upperBound - lowerBound);
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> iff <paramref name="value"/> is a UTF-16 low surrogate code point,
|
||||
/// i.e., is in [ U+DC00..U+DFFF ], inclusive.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsLowSurrogateCodePoint(uint value) => IsInRangeInclusive(value, 0xDC00U, 0xDFFFU);
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> iff <paramref name="value"/> is a UTF-16 surrogate code point,
|
||||
/// i.e., is in [ U+D800..U+DFFF ], inclusive.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsSurrogateCodePoint(uint value) => IsInRangeInclusive(value, 0xD800U, 0xDFFFU);
|
||||
|
||||
/// <summary>
|
||||
/// Returns <see langword="true"/> iff <paramref name="codePoint"/> is a valid Unicode code
|
||||
/// point, i.e., is in [ U+0000..U+10FFFF ], inclusive.
|
||||
/// </summary>
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsValidCodePoint(uint codePoint) => codePoint <= 0x10FFFFU;
|
||||
|
||||
/// <summary>
|
||||
/// Given a Unicode scalar value, gets the number of UTF-16 code units required to represent this value.
|
||||
/// </summary>
|
||||
public static int GetUtf16SequenceLength(uint value)
|
||||
{
|
||||
UnicodeDebug.AssertIsValidScalar(value);
|
||||
|
||||
value -= 0x10000; // if value < 0x10000, high byte = 0xFF; else high byte = 0x00
|
||||
value += (2 << 24); // if value < 0x10000, high byte = 0x01; else high byte = 0x02
|
||||
value >>= 24; // shift high byte down
|
||||
return (int)value; // and return it
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Given a Unicode scalar value, gets the number of UTF-8 code units required to represent this value.
|
||||
/// </summary>
|
||||
public static int GetUtf8SequenceLength(uint value)
|
||||
{
|
||||
UnicodeDebug.AssertIsValidScalar(value);
|
||||
|
||||
// The logic below can handle all valid scalar values branchlessly.
|
||||
// It gives generally good performance across all inputs, and on x86
|
||||
// it's only six instructions: lea, sar, xor, add, shr, lea.
|
||||
|
||||
// 'a' will be -1 if input is < 0x800; else 'a' will be 0
|
||||
// => 'a' will be -1 if input is 1 or 2 UTF-8 code units; else 'a' will be 0
|
||||
|
||||
int a = ((int)value - 0x0800) >> 31;
|
||||
|
||||
// The number of UTF-8 code units for a given scalar is as follows:
|
||||
// - U+0000..U+007F => 1 code unit
|
||||
// - U+0080..U+07FF => 2 code units
|
||||
// - U+0800..U+FFFF => 3 code units
|
||||
// - U+10000+ => 4 code units
|
||||
//
|
||||
// If we XOR the incoming scalar with 0xF800, the chart mutates:
|
||||
// - U+0000..U+F7FF => 3 code units
|
||||
// - U+F800..U+F87F => 1 code unit
|
||||
// - U+F880..U+FFFF => 2 code units
|
||||
// - U+10000+ => 4 code units
|
||||
//
|
||||
// Since the 1- and 3-code unit cases are now clustered, they can
|
||||
// both be checked together very cheaply.
|
||||
|
||||
value ^= 0xF800u;
|
||||
value -= 0xF880u; // if scalar is 1 or 3 code units, high byte = 0xFF; else high byte = 0x00
|
||||
value += (4 << 24); // if scalar is 1 or 3 code units, high byte = 0x03; else high byte = 0x04
|
||||
value >>= 24; // shift high byte down
|
||||
|
||||
// Final return value:
|
||||
// - U+0000..U+007F => 3 + (-1) * 2 = 1
|
||||
// - U+0080..U+07FF => 4 + (-1) * 2 = 2
|
||||
// - U+0800..U+FFFF => 3 + ( 0) * 2 = 3
|
||||
// - U+10000+ => 4 + ( 0) * 2 = 4
|
||||
return (int)value + (a * 2);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -85,7 +85,8 @@ public class CodeInlineParser : InlineParser
|
||||
// We saw the start of a code inline, but the close sticks are not present on the same line.
|
||||
// If the next line starts with a pipe character, this is likely an incomplete CodeInline within a table.
|
||||
// Treat it as regular text to avoid breaking the overall table shape.
|
||||
if (processor.Inline != null && processor.Inline.ContainsParentOfType<PipeTableDelimiterInline>())
|
||||
// Use ContainsParentOrSiblingOfType to handle both nested and flat pipe table structures.
|
||||
if (processor.Inline != null && processor.Inline.ContainsParentOrSiblingOfType<PipeTableDelimiterInline>())
|
||||
{
|
||||
slice.Start = openingStart;
|
||||
return false;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
using System.Text;
|
||||
using Markdig.Helpers;
|
||||
using Markdig.Renderers.Html;
|
||||
using Markdig.Syntax;
|
||||
@@ -125,7 +125,10 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
|
||||
}
|
||||
|
||||
// Follow DelimiterInline (EmphasisDelimiter, TableDelimiter...)
|
||||
child = delimiterInline.FirstChild;
|
||||
// If the delimiter has IsClosed=true (e.g., pipe table delimiter), it has no children
|
||||
// In that case, continue to next sibling instead of stopping
|
||||
var firstChild = delimiterInline.FirstChild;
|
||||
child = firstChild ?? delimiterInline.NextSibling;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -150,18 +153,19 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
|
||||
var delimiterChar = slice.CurrentChar;
|
||||
var emphasisDesc = emphasisMap![delimiterChar]!;
|
||||
|
||||
char pc = (char)0;
|
||||
Rune pc = (Rune)0;
|
||||
if (processor.Inline is HtmlEntityInline htmlEntityInline)
|
||||
{
|
||||
if (htmlEntityInline.Transcoded.Length > 0)
|
||||
{
|
||||
pc = htmlEntityInline.Transcoded[htmlEntityInline.Transcoded.End];
|
||||
pc = htmlEntityInline.Transcoded.RuneAt(htmlEntityInline.Transcoded.End);
|
||||
}
|
||||
}
|
||||
if (pc == 0)
|
||||
if (pc.Value == 0)
|
||||
{
|
||||
pc = slice.PeekCharExtra(-1);
|
||||
if (pc == delimiterChar && slice.PeekCharExtra(-2) != '\\')
|
||||
pc = slice.PeekRuneExtra(-1);
|
||||
// delimiterChar is BMP, so slice.PeekCharExtra(-2) is (a part of) the character two positions back.
|
||||
if (pc == (Rune)delimiterChar && slice.PeekCharExtra(-2) != '\\')
|
||||
{
|
||||
// If we get here, we determined that either:
|
||||
// a) there weren't enough delimiters in the delimiter run to satisfy the MinimumCount condition
|
||||
@@ -179,12 +183,13 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
|
||||
return false;
|
||||
}
|
||||
|
||||
char c = slice.CurrentChar;
|
||||
Rune c = slice.CurrentRune;
|
||||
|
||||
// The following character is actually an entity, we need to decode it
|
||||
if (HtmlEntityParser.TryParse(ref slice, out string? htmlString, out int htmlLength))
|
||||
{
|
||||
c = htmlString[0];
|
||||
// Note: c is U+FFFD when decode error
|
||||
Rune.DecodeFromUtf16(htmlString, out c, out _);
|
||||
}
|
||||
|
||||
// Calculate Open-Close for current character
|
||||
|
||||
@@ -38,11 +38,10 @@ public abstract class ParserList<T, TState> : OrderedList<T> where T : notnull,
|
||||
{
|
||||
foreach (var openingChar in parser.OpeningCharacters)
|
||||
{
|
||||
if (!charCounter.ContainsKey(openingChar))
|
||||
if (!charCounter.TryAdd(openingChar, 1))
|
||||
{
|
||||
charCounter[openingChar] = 0;
|
||||
charCounter[openingChar]++;
|
||||
}
|
||||
charCounter[openingChar]++;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
23
src/Markdig/Polyfills/DictionaryExtensions.cs
Normal file
23
src/Markdig/Polyfills/DictionaryExtensions.cs
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
#if !(NETSTANDARD2_1_OR_GREATER || NET)
|
||||
|
||||
namespace System.Collections.Generic;
|
||||
|
||||
internal static class DictionaryExtensions
|
||||
{
|
||||
public static bool TryAdd<TKey, TValue>(this Dictionary<TKey, TValue> dictionary, TKey key, TValue value) where TKey : notnull
|
||||
{
|
||||
if (!dictionary.ContainsKey(key))
|
||||
{
|
||||
dictionary[key] = value;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
1638
src/Markdig/Polyfills/Rune.cs
Normal file
1638
src/Markdig/Polyfills/Rune.cs
Normal file
File diff suppressed because it is too large
Load Diff
74
src/Markdig/Polyfills/UnicodeDebug.cs
Normal file
74
src/Markdig/Polyfills/UnicodeDebug.cs
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
// Based on https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeDebug.cs
|
||||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
|
||||
#if !NET
|
||||
// Used only by Rune as for now
|
||||
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace System.Text;
|
||||
|
||||
internal static class UnicodeDebug
|
||||
{
|
||||
[Conditional("DEBUG")]
|
||||
internal static void AssertIsValidCodePoint(uint codePoint)
|
||||
{
|
||||
if (!UnicodeUtility.IsValidCodePoint(codePoint))
|
||||
{
|
||||
Debug.Fail($"The value {ToHexString(codePoint)} is not a valid Unicode code point.");
|
||||
}
|
||||
}
|
||||
|
||||
[Conditional("DEBUG")]
|
||||
internal static void AssertIsHighSurrogateCodePoint(uint codePoint)
|
||||
{
|
||||
if (!UnicodeUtility.IsHighSurrogateCodePoint(codePoint))
|
||||
{
|
||||
Debug.Fail($"The value {ToHexString(codePoint)} is not a valid UTF-16 high surrogate code point.");
|
||||
}
|
||||
}
|
||||
|
||||
[Conditional("DEBUG")]
|
||||
internal static void AssertIsLowSurrogateCodePoint(uint codePoint)
|
||||
{
|
||||
if (!UnicodeUtility.IsLowSurrogateCodePoint(codePoint))
|
||||
{
|
||||
Debug.Fail($"The value {ToHexString(codePoint)} is not a valid UTF-16 low surrogate code point.");
|
||||
}
|
||||
}
|
||||
|
||||
[Conditional("DEBUG")]
|
||||
internal static void AssertIsValidScalar(uint scalarValue)
|
||||
{
|
||||
if (!UnicodeUtility.IsValidUnicodeScalar(scalarValue))
|
||||
{
|
||||
Debug.Fail($"The value {ToHexString(scalarValue)} is not a valid Unicode scalar value.");
|
||||
}
|
||||
}
|
||||
|
||||
[Conditional("DEBUG")]
|
||||
internal static void AssertIsValidSupplementaryPlaneScalar(uint scalarValue)
|
||||
{
|
||||
if (!UnicodeUtility.IsValidUnicodeScalar(scalarValue) || UnicodeUtility.IsBmpCodePoint(scalarValue))
|
||||
{
|
||||
Debug.Fail($"The value {ToHexString(scalarValue)} is not a valid supplementary plane Unicode scalar value.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Formats a code point as the hex string "U+XXXX".
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The input value doesn't have to be a real code point in the Unicode codespace. It can be any integer.
|
||||
/// </remarks>
|
||||
private static string ToHexString(uint codePoint)
|
||||
{
|
||||
return FormattableString.Invariant($"U+{codePoint:X4}");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -216,6 +216,47 @@ public abstract class Inline : MarkdownObject, IInline
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether there is a sibling of the specified type among root-level siblings.
|
||||
/// This walks up to find the root container, then checks all siblings.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Type of the sibling to check</typeparam>
|
||||
/// <returns><c>true</c> if a sibling of the specified type exists; <c>false</c> otherwise</returns>
|
||||
public bool ContainsParentOrSiblingOfType<T>() where T : Inline
|
||||
{
|
||||
// First check parents (handles nested case)
|
||||
if (ContainsParentOfType<T>())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Then check siblings at root level (handles flat case)
|
||||
// Find the root container
|
||||
var root = Parent;
|
||||
while (root?.Parent != null)
|
||||
{
|
||||
root = root.Parent;
|
||||
}
|
||||
|
||||
if (root is not ContainerInline container)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Walk siblings looking for the type
|
||||
var sibling = container.FirstChild;
|
||||
while (sibling != null)
|
||||
{
|
||||
if (sibling is T)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
sibling = sibling.NextSibling;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Iterates on parents of the specified type.
|
||||
/// </summary>
|
||||
|
||||
@@ -40,10 +40,7 @@ public class LinkReferenceDefinitionGroup : ContainerBlock
|
||||
if (!Contains(link))
|
||||
{
|
||||
Add(link);
|
||||
if (!Links.ContainsKey(label))
|
||||
{
|
||||
Links[label] = link;
|
||||
}
|
||||
Links.TryAdd(label, link);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user