Optimize PipeTable parsing: O(n²) → O(n) for 3.7x–85x speedup, enables 10K+ row tables (#922)

* Optimize PipeTable parsing: O(n²) → O(n) for large tables

Pipe tables were creating deeply nested tree structures where each pipe
delimiter contained all subsequent content as children, causing O(n²)
traversal complexity for n cells. This change restructures the parser to
use a flat sibling-based structure, treating tables as matrices rather
than nested trees.

Key changes:
- Set IsClosed=true on PipeTableDelimiterInline to prevent nesting
- Add PromoteNestedPipesToRootLevel() to flatten pipes nested in emphasis
- Update cell boundary detection to use sibling traversal
- Move EmphasisInlineParser before PipeTableParser in processing order
- Fix EmphasisInlineParser to continue past IsClosed delimiters
- Add ContainsParentOrSiblingOfType<T>() helper for flat structure detection

Performance improvements (measured on typical markdown content):

| Rows | Before    | After   | Speedup |
|------|-----------|---------|---------|
| 100  | 542 μs    | 150 μs  | 3.6x    |
| 500  | 23,018 μs | 763 μs  | 30x     |
| 1000 | 89,418 μs | 1,596 μs| 56x     |
| 1500 | 201,593 μs| 2,740 μs| 74x     |
| 5000 | CRASH     | 10,588 μs| ∞      |
| 10000| CRASH     | 18,551 μs| ∞      |

Tables with 5000+ rows previously crashed due to stack overflow from
recursive depth. They now parse successfully with linear time complexity.

* remove baseline results file

* Do not use System.Index and fix nullabillity checks for older platforms
This commit is contained in:
Martijn Laarman
2026-01-30 22:05:18 +01:00
committed by GitHub
parent 3602433b84
commit d47fbc757f
10 changed files with 423 additions and 137 deletions

2
.gitignore vendored
View File

@@ -8,6 +8,8 @@
*.sln.docstates *.sln.docstates
*.nuget.props *.nuget.props
*.nuget.targets *.nuget.targets
src/.idea
BenchmarkDotNet.Artifacts
# User-specific files (MonoDevelop/Xamarin Studio) # User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs *.userprefs

View File

@@ -0,0 +1,81 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Diagnosers;
using Markdig;
namespace Testamina.Markdig.Benchmarks.PipeTable;
/// <summary>
/// Benchmark for pipe table parsing performance, especially for large tables.
/// Tests the performance of PipeTableParser with varying table sizes.
/// </summary>
[MemoryDiagnoser]
[GcServer(true)] // Use server GC to get more comprehensive GC stats
public class PipeTableBenchmark
{
private string _100Rows = null!;
private string _500Rows = null!;
private string _1000Rows = null!;
private string _1500Rows = null!;
private string _5000Rows = null!;
private string _10000Rows = null!;
private MarkdownPipeline _pipeline = null!;
[GlobalSetup]
public void Setup()
{
// Pipeline with pipe tables enabled (part of advanced extensions)
_pipeline = new MarkdownPipelineBuilder()
.UseAdvancedExtensions()
.Build();
// Generate tables of various sizes
// Note: Before optimization, 5000+ rows hit depth limit due to nested tree structure.
// After optimization, these should work.
_100Rows = PipeTableGenerator.Generate(rows: 100, columns: 5);
_500Rows = PipeTableGenerator.Generate(rows: 500, columns: 5);
_1000Rows = PipeTableGenerator.Generate(rows: 1000, columns: 5);
_1500Rows = PipeTableGenerator.Generate(rows: 1500, columns: 5);
_5000Rows = PipeTableGenerator.Generate(rows: 5000, columns: 5);
_10000Rows = PipeTableGenerator.Generate(rows: 10000, columns: 5);
}
[Benchmark(Description = "PipeTable 100 rows x 5 cols")]
public string Parse100Rows()
{
return Markdown.ToHtml(_100Rows, _pipeline);
}
[Benchmark(Description = "PipeTable 500 rows x 5 cols")]
public string Parse500Rows()
{
return Markdown.ToHtml(_500Rows, _pipeline);
}
[Benchmark(Description = "PipeTable 1000 rows x 5 cols")]
public string Parse1000Rows()
{
return Markdown.ToHtml(_1000Rows, _pipeline);
}
[Benchmark(Description = "PipeTable 1500 rows x 5 cols")]
public string Parse1500Rows()
{
return Markdown.ToHtml(_1500Rows, _pipeline);
}
[Benchmark(Description = "PipeTable 5000 rows x 5 cols")]
public string Parse5000Rows()
{
return Markdown.ToHtml(_5000Rows, _pipeline);
}
[Benchmark(Description = "PipeTable 10000 rows x 5 cols")]
public string Parse10000Rows()
{
return Markdown.ToHtml(_10000Rows, _pipeline);
}
}

View File

@@ -0,0 +1,61 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Text;
namespace Testamina.Markdig.Benchmarks.PipeTable;
/// <summary>
/// Generates pipe table markdown content for benchmarking purposes.
/// </summary>
public static class PipeTableGenerator
{
private const int DefaultCellWidth = 10;
/// <summary>
/// Generates a pipe table in markdown format.
/// </summary>
/// <param name="rows">Number of data rows (excluding header)</param>
/// <param name="columns">Number of columns</param>
/// <param name="cellWidth">Width of each cell content (default: 10)</param>
/// <returns>Pipe table markdown string</returns>
public static string Generate(int rows, int columns, int cellWidth = DefaultCellWidth)
{
var sb = new StringBuilder();
// Header row
sb.Append('|');
for (int col = 0; col < columns; col++)
{
sb.Append(' ');
sb.Append($"Header {col + 1}".PadRight(cellWidth));
sb.Append(" |");
}
sb.AppendLine();
// Separator row (with dashes)
sb.Append('|');
for (int col = 0; col < columns; col++)
{
sb.Append(new string('-', cellWidth + 2));
sb.Append('|');
}
sb.AppendLine();
// Data rows
for (int row = 0; row < rows; row++)
{
sb.Append('|');
for (int col = 0; col < columns; col++)
{
sb.Append(' ');
sb.Append($"R{row + 1}C{col + 1}".PadRight(cellWidth));
sb.Append(" |");
}
sb.AppendLine();
}
return sb.ToString();
}
}

View File

@@ -7,6 +7,7 @@ using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Running; using BenchmarkDotNet.Running;
using Markdig; using Markdig;
using Testamina.Markdig.Benchmarks.PipeTable;
namespace Testamina.Markdig.Benchmarks; namespace Testamina.Markdig.Benchmarks;
@@ -68,7 +69,16 @@ public class Program
//config.Add(gcDiagnoser); //config.Add(gcDiagnoser);
//var config = DefaultConfig.Instance; //var config = DefaultConfig.Instance;
BenchmarkRunner.Run<Program>(config);
// Run specific benchmarks based on command line arguments
if (args.Length > 0 && args[0] == "--pipetable")
{
BenchmarkRunner.Run<PipeTableBenchmark>(config);
}
else
{
BenchmarkRunner.Run<Program>(config);
}
//BenchmarkRunner.Run<TestDictionary>(config); //BenchmarkRunner.Run<TestDictionary>(config);
//BenchmarkRunner.Run<TestMatchPerf>(); //BenchmarkRunner.Run<TestMatchPerf>();
//BenchmarkRunner.Run<TestStringPerf>(); //BenchmarkRunner.Run<TestStringPerf>();

View File

@@ -38,7 +38,7 @@ public class PipeTableExtension : IMarkdownExtension
var lineBreakParser = pipeline.InlineParsers.FindExact<LineBreakInlineParser>(); var lineBreakParser = pipeline.InlineParsers.FindExact<LineBreakInlineParser>();
if (!pipeline.InlineParsers.Contains<PipeTableParser>()) if (!pipeline.InlineParsers.Contains<PipeTableParser>())
{ {
pipeline.InlineParsers.InsertBefore<EmphasisInlineParser>(new PipeTableParser(lineBreakParser!, Options)); pipeline.InlineParsers.InsertAfter<EmphasisInlineParser>(new PipeTableParser(lineBreakParser!, Options));
} }
} }

View File

@@ -19,7 +19,7 @@ namespace Markdig.Extensions.Tables;
/// <seealso cref="IPostInlineProcessor" /> /// <seealso cref="IPostInlineProcessor" />
public class PipeTableParser : InlineParser, IPostInlineProcessor public class PipeTableParser : InlineParser, IPostInlineProcessor
{ {
private readonly LineBreakInlineParser lineBreakParser; private readonly LineBreakInlineParser _lineBreakParser;
/// <summary> /// <summary>
/// Initializes a new instance of the <see cref="PipeTableParser" /> class. /// Initializes a new instance of the <see cref="PipeTableParser" /> class.
@@ -28,7 +28,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
/// <param name="options">The options.</param> /// <param name="options">The options.</param>
public PipeTableParser(LineBreakInlineParser lineBreakParser, PipeTableOptions? options = null) public PipeTableParser(LineBreakInlineParser lineBreakParser, PipeTableOptions? options = null)
{ {
this.lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser)); _lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser));
OpeningCharacters = ['|', '\n', '\r']; OpeningCharacters = ['|', '\n', '\r'];
Options = options ?? new PipeTableOptions(); Options = options ?? new PipeTableOptions();
} }
@@ -86,7 +86,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
tableState.IsInvalidTable = true; tableState.IsInvalidTable = true;
} }
tableState.LineHasPipe = false; tableState.LineHasPipe = false;
lineBreakParser.Match(processor, ref slice); _lineBreakParser.Match(processor, ref slice);
if (!isFirstLineEmpty) if (!isFirstLineEmpty)
{ {
tableState.ColumnAndLineDelimiters.Add(processor.Inline!); tableState.ColumnAndLineDelimiters.Add(processor.Inline!);
@@ -100,7 +100,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
Span = new SourceSpan(position, position), Span = new SourceSpan(position, position),
Line = globalLineIndex, Line = globalLineIndex,
Column = column, Column = column,
LocalLineIndex = localLineIndex LocalLineIndex = localLineIndex,
IsClosed = true // Creates flat sibling structure for O(n) traversal
}; };
tableState.LineHasPipe = true; tableState.LineHasPipe = true;
@@ -125,6 +126,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
return true; return true;
} }
// With flat structure, pipes are siblings at root level
// Walk backwards from the last child to find pipe delimiters
var child = container.LastChild; var child = container.LastChild;
List<PipeTableDelimiterInline>? delimitersToRemove = null; List<PipeTableDelimiterInline>? delimitersToRemove = null;
@@ -142,8 +145,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
break; break;
} }
var subContainer = child as ContainerInline; // Walk siblings instead of descending into containers
child = subContainer?.LastChild; child = child.PreviousSibling;
} }
// If we have found any delimiters, transform them to literals // If we have found any delimiters, transform them to literals
@@ -186,8 +189,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
// Remove previous state // Remove previous state
state.ParserStates[Index] = null!; state.ParserStates[Index] = null!;
// Continue // Abort if not a valid table
if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe ) //|| tableState.LineIndex != state.LocalLineIndex) if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe)
{ {
if (tableState is not null) if (tableState is not null)
{ {
@@ -204,11 +207,18 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
// Detect the header row // Detect the header row
var delimiters = tableState.ColumnAndLineDelimiters; var delimiters = tableState.ColumnAndLineDelimiters;
// TODO: we could optimize this by merging FindHeaderRow and the cell loop
var aligns = FindHeaderRow(delimiters); var aligns = FindHeaderRow(delimiters);
if (Options.RequireHeaderSeparator && aligns is null) if (Options.RequireHeaderSeparator && aligns is null)
{ {
// No valid header separator found - convert all pipe delimiters to literals
foreach (var inline in delimiters)
{
if (inline is PipeTableDelimiterInline pipeDelimiter)
{
pipeDelimiter.ReplaceByLiteral();
}
}
return true; return true;
} }
@@ -224,68 +234,40 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
var cells = tableState.Cells; var cells = tableState.Cells;
cells.Clear(); cells.Clear();
//delimiters[0].DumpTo(state.DebugLog); // Pipes may end up nested inside unmatched emphasis delimiters, e.g.:
// *a | b*|
// Promote them to root level so we have a flat sibling structure.
PromoteNestedPipesToRootLevel(delimiters, container);
// delimiters contain a list of `|` and `\n` delimiters // The inline tree is now flat: all pipes and line breaks are siblings at root level.
// The `|` delimiters are created as child containers. // For example, `| a | b \n| c | d \n` produces:
// So the following: // [|] [a] [|] [b] [\n] [|] [c] [|] [d] [\n]
// | a | b \n
// | d | e \n
// //
// Will generate a tree of the following node: // Tables support four row formats:
// | // | a | b | (leading and trailing pipes)
// a // | a | b (leading pipe only)
// | // a | b (no leading or trailing pipes)
// b // a | b | (trailing pipe only)
// \n
// |
// d
// |
// e
// \n
// When parsing delimiters, we need to recover whether a row is of the following form:
// 0) | a | b | \n
// 1) | a | b \n
// 2) a | b \n
// 3) a | b | \n
// If the last element is not a line break, add a line break to homogenize parsing in the next loop // Ensure the table ends with a line break to simplify row detection
var lastElement = delimiters[delimiters.Count - 1]; var lastElement = delimiters[delimiters.Count - 1];
if (!(lastElement is LineBreakInline)) if (!(lastElement is LineBreakInline))
{ {
while (true) // Find the actual last sibling (there may be content after the last delimiter)
while (lastElement.NextSibling != null)
{ {
if (lastElement is ContainerInline lastElementContainer) lastElement = lastElement.NextSibling;
{
var nextElement = lastElementContainer.LastChild;
if (nextElement != null)
{
lastElement = nextElement;
continue;
}
}
break;
} }
var endOfTable = new LineBreakInline(); var endOfTable = new LineBreakInline();
// If the last element is a container, we have to add the EOL to its child lastElement.InsertAfter(endOfTable);
// otherwise only next sibling
if (lastElement is ContainerInline)
{
((ContainerInline)lastElement).AppendChild(endOfTable);
}
else
{
lastElement.InsertAfter(endOfTable);
}
delimiters.Add(endOfTable); delimiters.Add(endOfTable);
tableState.EndOfLines.Add(endOfTable); tableState.EndOfLines.Add(endOfTable);
} }
int lastPipePos = 0; int lastPipePos = 0;
// Cell loop // Build table rows and cells by iterating through delimiters
// Reconstruct the table from the delimiters
TableRow? row = null; TableRow? row = null;
TableRow? firstRow = null; TableRow? firstRow = null;
for (int i = 0; i < delimiters.Count; i++) for (int i = 0; i < delimiters.Count; i++)
@@ -300,9 +282,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
firstRow ??= row; firstRow ??= row;
// If the first delimiter is a pipe and doesn't have any parent or previous sibling, for cases like: // Skip leading pipe at start of row (e.g., `| a | b` or `| a | b |`)
// 0) | a | b | \n
// 1) | a | b \n
if (pipeSeparator != null && (delimiter.PreviousSibling is null || delimiter.PreviousSibling is LineBreakInline)) if (pipeSeparator != null && (delimiter.PreviousSibling is null || delimiter.PreviousSibling is LineBreakInline))
{ {
delimiter.Remove(); delimiter.Remove();
@@ -316,57 +296,37 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
} }
} }
// We need to find the beginning/ending of a cell from a right delimiter. From the delimiter 'x', we need to find a (without the delimiter start `|`) // Find cell content by walking backwards from this delimiter to the previous pipe or line break.
// So we iterate back to the first pipe or line break // For `| a | b \n` at delimiter 'x':
// x // [|] [a] [x] [b] [\n]
// 1) | a | b \n // ^--- current delimiter
// 2) a | b \n // Walk back: [a] is the cell content (stop at [|])
Inline? endOfCell = null; Inline? endOfCell = null;
Inline? beginOfCell = null; Inline? beginOfCell = null;
var cellContentIt = delimiter; var cellContentIt = delimiter.PreviousSibling;
while (true) while (cellContentIt != null)
{ {
cellContentIt = cellContentIt.PreviousSibling ?? cellContentIt.Parent; if (cellContentIt is LineBreakInline || cellContentIt is PipeTableDelimiterInline)
if (cellContentIt is null || cellContentIt is LineBreakInline)
{
break; break;
}
// The cell begins at the first effective child after a | or the top ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation) // Stop at the root ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation)
if (cellContentIt is PipeTableDelimiterInline || (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null )) if (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null)
{
beginOfCell = ((ContainerInline)cellContentIt).FirstChild;
if (endOfCell is null)
{
endOfCell = beginOfCell;
}
break; break;
}
beginOfCell = cellContentIt; beginOfCell = cellContentIt;
if (endOfCell is null) endOfCell ??= beginOfCell;
{
endOfCell = beginOfCell; cellContentIt = cellContentIt.PreviousSibling;
}
} }
// If the current deilimiter is a pipe `|` OR // If the current delimiter is a pipe `|` OR
// the beginOfCell/endOfCell are not null and // the beginOfCell/endOfCell are not null and
// either they are : // either they are:
// - different // - different
// - they contain a single element, but it is not a line break (\n) or an empty/whitespace Literal. // - they contain a single element, but it is not a line break (\n) or an empty/whitespace Literal.
// Then we can add a cell to the current row // Then we can add a cell to the current row
if (!isLine || (beginOfCell != null && endOfCell != null && ( beginOfCell != endOfCell || !(beginOfCell is LineBreakInline || (beginOfCell is LiteralInline beingOfCellLiteral && beingOfCellLiteral.Content.IsEmptyOrWhitespace()))))) if (!isLine || (beginOfCell != null && endOfCell != null && ( beginOfCell != endOfCell || !(beginOfCell is LineBreakInline || (beginOfCell is LiteralInline beingOfCellLiteral && beingOfCellLiteral.Content.IsEmptyOrWhitespace())))))
{ {
if (!isLine)
{
// If the delimiter is a pipe, we need to remove it from the tree
// so that previous loop looking for a parent will not go further on subsequent cells
delimiter.Remove();
lastPipePos = delimiter.Span.End;
}
// We trim whitespace at the beginning and ending of the cell // We trim whitespace at the beginning and ending of the cell
TrimStart(beginOfCell); TrimStart(beginOfCell);
TrimEnd(endOfCell); TrimEnd(endOfCell);
@@ -374,10 +334,20 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
var cellContainer = new ContainerInline(); var cellContainer = new ContainerInline();
// Copy elements from beginOfCell on the first level // Copy elements from beginOfCell on the first level
// The pipe delimiter serves as a boundary - stop when we hit it
var cellIt = beginOfCell; var cellIt = beginOfCell;
while (cellIt != null && !IsLine(cellIt) && !(cellIt is PipeTableDelimiterInline)) while (cellIt != null && !IsLine(cellIt) && !(cellIt is PipeTableDelimiterInline))
{ {
var nextSibling = cellIt.NextSibling; var nextSibling = cellIt.NextSibling;
// Skip empty literals (can result from trimming)
if (cellIt is LiteralInline { Content.IsEmpty: true })
{
cellIt.Remove();
cellIt = nextSibling;
continue;
}
cellIt.Remove(); cellIt.Remove();
if (cellContainer.Span.IsEmpty) if (cellContainer.Span.IsEmpty)
{ {
@@ -390,8 +360,16 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
cellIt = nextSibling; cellIt = nextSibling;
} }
if (!isLine)
{
// Remove the pipe delimiter AFTER copying cell content
// This preserves the sibling chain during the copy loop
delimiter.Remove();
lastPipePos = delimiter.Span.End;
}
// Create the cell and add it to the pending row // Create the cell and add it to the pending row
var tableParagraph = new ParagraphBlock() var tableParagraph = new ParagraphBlock
{ {
Span = cellContainer.Span, Span = cellContainer.Span,
Line = cellContainer.Line, Line = cellContainer.Line,
@@ -443,8 +421,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
endOfLine.Remove(); endOfLine.Remove();
} }
// If we have a header row, we can remove it // Mark first row as header and remove the separator row if present
// TODO: we could optimize this by merging FindHeaderRow and the previous loop
var tableRow = (TableRow)table[0]; var tableRow = (TableRow)table[0];
tableRow.IsHeader = Options.RequireHeaderSeparator; tableRow.IsHeader = Options.RequireHeaderSeparator;
if (aligns != null) if (aligns != null)
@@ -454,11 +431,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
table.ColumnDefinitions.AddRange(aligns); table.ColumnDefinitions.AddRange(aligns);
} }
// Perform delimiter processor that are coming after this processor // Perform all post-processors on cell content
// With InsertAfter, emphasis runs before pipe table, so we need to re-run from index 0
// to ensure emphasis delimiters in cells are properly matched
foreach (var cell in cells) foreach (var cell in cells)
{ {
var paragraph = (ParagraphBlock) cell[0]; var paragraph = (ParagraphBlock) cell[0];
state.PostProcessInlines(postInlineProcessorIndex + 1, paragraph.Inline, null, true); state.PostProcessInlines(0, paragraph.Inline, null, true);
if (paragraph.Inline?.LastChild is not null) if (paragraph.Inline?.LastChild is not null)
{ {
paragraph.Inline.Span.End = paragraph.Inline.LastChild.Span.End; paragraph.Inline.Span.End = paragraph.Inline.LastChild.Span.End;
@@ -548,7 +527,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
continue; continue;
} }
// The last delimiter is always null, // Parse the separator row (second row) to extract column alignments
for (int j = i + 1; j < delimiters.Count; j++) for (int j = i + 1; j < delimiters.Count; j++)
{ {
var delimiter = delimiters[j]; var delimiter = delimiters[j];
@@ -560,11 +539,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
continue; continue;
} }
// Check the left side of a `|` delimiter // Parse the content before this delimiter as a column definition (e.g., `:---`, `---:`, `:---:`)
// Skip if previous sibling is a pipe (empty cell) or whitespace
TableColumnAlign? align = null; TableColumnAlign? align = null;
int delimiterCount = 0; int delimiterCount = 0;
if (delimiter.PreviousSibling != null && if (delimiter.PreviousSibling != null &&
!(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) && // ignore parsed whitespace !(delimiter.PreviousSibling is PipeTableDelimiterInline) &&
!(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) &&
!ParseHeaderString(delimiter.PreviousSibling, out align, out delimiterCount)) !ParseHeaderString(delimiter.PreviousSibling, out align, out delimiterCount))
{ {
break; break;
@@ -576,14 +557,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
totalDelimiterCount += delimiterCount; totalDelimiterCount += delimiterCount;
columnDefinitions.Add(new TableColumnDefinition() { Alignment = align, Width = delimiterCount}); columnDefinitions.Add(new TableColumnDefinition() { Alignment = align, Width = delimiterCount});
// If this is the last delimiter, we need to check the right side of the `|` delimiter // If this is the last pipe, check for a trailing column definition (row without trailing pipe)
// e.g., `| :--- | ---:` has content after the last pipe
if (nextDelimiter is null) if (nextDelimiter is null)
{ {
var nextSibling = columnDelimiter != null var nextSibling = delimiter.NextSibling;
? columnDelimiter.FirstChild
: delimiter.NextSibling;
// If there is no content after // No trailing content means row ends with pipe: `| :--- |`
if (IsNullOrSpace(nextSibling)) if (IsNullOrSpace(nextSibling))
{ {
isValidRow = true; isValidRow = true;
@@ -664,9 +644,9 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
private static void TrimStart(Inline? inline) private static void TrimStart(Inline? inline)
{ {
while (inline is ContainerInline && !(inline is DelimiterInline)) while (inline is ContainerInline containerInline && !(containerInline is DelimiterInline))
{ {
inline = ((ContainerInline)inline).FirstChild; inline = containerInline.FirstChild;
} }
if (inline is LiteralInline literal) if (inline is LiteralInline literal)
@@ -677,6 +657,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
private static void TrimEnd(Inline? inline) private static void TrimEnd(Inline? inline)
{ {
// Walk into containers to find the last leaf to trim
// Skip PipeTableDelimiterInline but walk into other containers (including emphasis)
while (inline is ContainerInline container && !(inline is PipeTableDelimiterInline))
{
inline = container.LastChild;
}
if (inline is LiteralInline literal) if (inline is LiteralInline literal)
{ {
literal.Content.TrimEnd(); literal.Content.TrimEnd();
@@ -697,6 +684,106 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
return false; return false;
} }
/// <summary>
/// Promotes nested pipe delimiters and line breaks to root level.
/// </summary>
/// <remarks>
/// Handles cases like `*a | b*|` where the pipe ends up inside an unmatched emphasis container.
/// After promotion, all delimiters become siblings at root level for consistent cell boundary detection.
/// </remarks>
private static void PromoteNestedPipesToRootLevel(List<Inline> delimiters, ContainerInline root)
{
for (int i = 0; i < delimiters.Count; i++)
{
var delimiter = delimiters[i];
// Handle both pipe delimiters and line breaks
bool isPipe = delimiter is PipeTableDelimiterInline;
bool isLineBreak = delimiter is LineBreakInline;
if (!isPipe && !isLineBreak)
continue;
// Skip if already at root level
if (delimiter.Parent == root)
continue;
// Find the top-level ancestor (direct child of root)
var ancestor = delimiter.Parent;
while (ancestor?.Parent != null && ancestor.Parent != root)
{
ancestor = ancestor.Parent;
}
if (ancestor is null || ancestor.Parent != root)
continue;
// Split: promote delimiter to be sibling of ancestor
SplitContainerAtDelimiter(delimiter, ancestor);
}
}
/// <summary>
/// Splits a container at the delimiter, promoting the delimiter to root level.
/// </summary>
/// <remarks>
/// For input `*a | b*`, the pipe is inside the emphasis container:
/// EmphasisDelimiter { "a", Pipe, "b" }
/// After splitting:
/// EmphasisDelimiter { "a" }, Pipe, Container { "b" }
/// </remarks>
private static void SplitContainerAtDelimiter(Inline delimiter, Inline ancestor)
{
if (delimiter.Parent is not { } parent) return;
// Collect content after the delimiter
var contentAfter = new List<Inline>();
var current = delimiter.NextSibling;
while (current != null)
{
contentAfter.Add(current);
current = current.NextSibling;
}
// Remove content after delimiter from parent
foreach (var inline in contentAfter)
{
inline.Remove();
}
// Remove delimiter from parent
delimiter.Remove();
// Insert delimiter after the ancestor (at root level)
ancestor.InsertAfter(delimiter);
// If there's content after, wrap in new container and insert after delimiter
if (contentAfter.Count > 0)
{
// Create new container matching the original parent type
var newContainer = CreateMatchingContainer(parent);
foreach (var inline in contentAfter)
{
newContainer.AppendChild(inline);
}
delimiter.InsertAfter(newContainer);
}
}
/// <summary>
/// Creates a container to wrap content split from the source container.
/// </summary>
private static ContainerInline CreateMatchingContainer(ContainerInline source)
{
// Emphasis processing runs before pipe table processing, so emphasis delimiters
// are already resolved. A plain ContainerInline suffices.
return new ContainerInline
{
Span = source.Span,
Line = source.Line,
Column = source.Column
};
}
private sealed class TableState private sealed class TableState
{ {
public bool IsInvalidTable { get; set; } public bool IsInvalidTable { get; set; }

View File

@@ -1,5 +1,5 @@
// Copyright (c) Alexandre Mutel. All rights reserved. // Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license. // This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information. // See the license.txt file in the project root for more information.
using Markdig.Syntax; using Markdig.Syntax;
@@ -215,13 +215,13 @@ public static class LinkHelper
return false; return false;
} }
// An absolute URI, for these purposes, consists of a scheme followed by a colon (:) // An absolute URI, for these purposes, consists of a scheme followed by a colon (:)
// followed by zero or more characters other than ASCII whitespace and control characters, <, and >. // followed by zero or more characters other than ASCII whitespace and control characters, <, and >.
// If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space). // If the URI includes these characters, they must be percent-encoded (e.g. %20 for a space).
// A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop. // A URI that would end with a full stop (.) is treated instead as ending immediately before the full stop.
// a scheme is any sequence of 232 characters // a scheme is any sequence of 232 characters
// beginning with an ASCII letter // beginning with an ASCII letter
// and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”). // and followed by any combination of ASCII letters, digits, or the symbols plus (”+”), period (”.”), or hyphen (”-”).
// An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec: // An email address, for these purposes, is anything that matches the non-normative regex from the HTML5 spec:
@@ -276,7 +276,7 @@ public static class LinkHelper
if (isValidChar) if (isValidChar)
{ {
// a scheme is any sequence of 232 characters // a scheme is any sequence of 232 characters
if (state > 0 && builder.Length >= 32) if (state > 0 && builder.Length >= 32)
{ {
goto ReturnFalse; goto ReturnFalse;
@@ -307,7 +307,7 @@ public static class LinkHelper
} }
} }
// append ':' or '@' // append ':' or '@'
builder.Append(c); builder.Append(c);
if (state < 0) if (state < 0)
@@ -415,10 +415,10 @@ public static class LinkHelper
public static bool TryParseInlineLink(ref StringSlice text, out string? link, out string? title, out SourceSpan linkSpan, out SourceSpan titleSpan) public static bool TryParseInlineLink(ref StringSlice text, out string? link, out string? title, out SourceSpan linkSpan, out SourceSpan titleSpan)
{ {
// 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
// 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
// 3. an optional link destination, // 3. an optional link destination,
// 4. an optional link title separated from the link destination by whitespace, // 4. an optional link title separated from the link destination by whitespace,
// 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
// 6. and a right parenthesis ) // 6. and a right parenthesis )
bool isValid = false; bool isValid = false;
@@ -429,7 +429,7 @@ public static class LinkHelper
linkSpan = SourceSpan.Empty; linkSpan = SourceSpan.Empty;
titleSpan = SourceSpan.Empty; titleSpan = SourceSpan.Empty;
// 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
if (c == '(') if (c == '(')
{ {
text.SkipChar(); text.SkipChar();
@@ -505,10 +505,10 @@ public static class LinkHelper
out SourceSpan triviaAfterTitle, out SourceSpan triviaAfterTitle,
out bool urlHasPointyBrackets) out bool urlHasPointyBrackets)
{ {
// 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
// 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 2. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
// 3. an optional link destination, // 3. an optional link destination,
// 4. an optional link title separated from the link destination by whitespace, // 4. an optional link title separated from the link destination by whitespace,
// 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces? // 5. optional whitespace, TODO: specs: is it whitespace or multiple whitespaces?
// 6. and a right parenthesis ) // 6. and a right parenthesis )
bool isValid = false; bool isValid = false;
@@ -526,7 +526,7 @@ public static class LinkHelper
urlHasPointyBrackets = false; urlHasPointyBrackets = false;
titleEnclosingCharacter = '\0'; titleEnclosingCharacter = '\0';
// 1. An inline link consists of a link text followed immediately by a left parenthesis (, // 1. An inline link consists of a link text followed immediately by a left parenthesis (,
if (c == '(') if (c == '(')
{ {
text.SkipChar(); text.SkipChar();
@@ -773,7 +773,7 @@ public static class LinkHelper
var c = text.CurrentChar; var c = text.CurrentChar;
// a sequence of zero or more characters between an opening < and a closing > // a sequence of zero or more characters between an opening < and a closing >
// that contains no line breaks, or unescaped < or > characters, or // that contains no line breaks, or unescaped < or > characters, or
if (c == '<') if (c == '<')
{ {
@@ -820,9 +820,9 @@ public static class LinkHelper
else else
{ {
// a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters, // a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters,
// and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a // and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
// balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped // balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
// parentheses. // parentheses.
bool hasEscape = false; bool hasEscape = false;
int openedParent = 0; int openedParent = 0;
while (true) while (true)
@@ -922,7 +922,7 @@ public static class LinkHelper
var c = text.CurrentChar; var c = text.CurrentChar;
// a sequence of zero or more characters between an opening < and a closing > // a sequence of zero or more characters between an opening < and a closing >
// that contains no line breaks, or unescaped < or > characters, or // that contains no line breaks, or unescaped < or > characters, or
if (c == '<') if (c == '<')
{ {
@@ -969,9 +969,9 @@ public static class LinkHelper
else else
{ {
// a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters, // a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters,
// and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a // and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
// balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped // balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
// parentheses. // parentheses.
bool hasEscape = false; bool hasEscape = false;
int openedParent = 0; int openedParent = 0;
while (true) while (true)
@@ -1201,7 +1201,7 @@ public static class LinkHelper
if (c != '\0' && c != '\n' && c != '\r') if (c != '\0' && c != '\n' && c != '\r')
{ {
// If we were able to parse the url but the title doesn't end with space, // If we were able to parse the url but the title doesn't end with space,
// we are still returning a valid definition // we are still returning a valid definition
if (newLineCount > 0 && title != null) if (newLineCount > 0 && title != null)
{ {
@@ -1341,7 +1341,7 @@ public static class LinkHelper
if (c != '\0' && c != '\n' && c != '\r') if (c != '\0' && c != '\n' && c != '\r')
{ {
// If we were able to parse the url but the title doesn't end with space, // If we were able to parse the url but the title doesn't end with space,
// we are still returning a valid definition // we are still returning a valid definition
if (newLineCount > 0 && title != null) if (newLineCount > 0 && title != null)
{ {

View File

@@ -85,7 +85,8 @@ public class CodeInlineParser : InlineParser
// We saw the start of a code inline, but the close sticks are not present on the same line. // We saw the start of a code inline, but the close sticks are not present on the same line.
// If the next line starts with a pipe character, this is likely an incomplete CodeInline within a table. // If the next line starts with a pipe character, this is likely an incomplete CodeInline within a table.
// Treat it as regular text to avoid breaking the overall table shape. // Treat it as regular text to avoid breaking the overall table shape.
if (processor.Inline != null && processor.Inline.ContainsParentOfType<PipeTableDelimiterInline>()) // Use ContainsParentOrSiblingOfType to handle both nested and flat pipe table structures.
if (processor.Inline != null && processor.Inline.ContainsParentOrSiblingOfType<PipeTableDelimiterInline>())
{ {
slice.Start = openingStart; slice.Start = openingStart;
return false; return false;

View File

@@ -125,7 +125,10 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
} }
// Follow DelimiterInline (EmphasisDelimiter, TableDelimiter...) // Follow DelimiterInline (EmphasisDelimiter, TableDelimiter...)
child = delimiterInline.FirstChild; // If the delimiter has IsClosed=true (e.g., pipe table delimiter), it has no children
// In that case, continue to next sibling instead of stopping
var firstChild = delimiterInline.FirstChild;
child = firstChild ?? delimiterInline.NextSibling;
} }
else else
{ {

View File

@@ -216,6 +216,47 @@ public abstract class Inline : MarkdownObject, IInline
return false; return false;
} }
/// <summary>
/// Determines whether there is a sibling of the specified type among root-level siblings.
/// This walks up to find the root container, then checks all siblings.
/// </summary>
/// <typeparam name="T">Type of the sibling to check</typeparam>
/// <returns><c>true</c> if a sibling of the specified type exists; <c>false</c> otherwise</returns>
public bool ContainsParentOrSiblingOfType<T>() where T : Inline
{
// First check parents (handles nested case)
if (ContainsParentOfType<T>())
{
return true;
}
// Then check siblings at root level (handles flat case)
// Find the root container
var root = Parent;
while (root?.Parent != null)
{
root = root.Parent;
}
if (root is not ContainerInline container)
{
return false;
}
// Walk siblings looking for the type
var sibling = container.FirstChild;
while (sibling != null)
{
if (sibling is T)
{
return true;
}
sibling = sibling.NextSibling;
}
return false;
}
/// <summary> /// <summary>
/// Iterates on parents of the specified type. /// Iterates on parents of the specified type.
/// </summary> /// </summary>