mirror of
https://github.com/xoofx/markdig.git
synced 2026-02-04 05:44:50 +00:00
Optimize PipeTable parsing: O(n²) → O(n) for 3.7x–85x speedup, enables 10K+ row tables (#922)
* Optimize PipeTable parsing: O(n²) → O(n) for large tables Pipe tables were creating deeply nested tree structures where each pipe delimiter contained all subsequent content as children, causing O(n²) traversal complexity for n cells. This change restructures the parser to use a flat sibling-based structure, treating tables as matrices rather than nested trees. Key changes: - Set IsClosed=true on PipeTableDelimiterInline to prevent nesting - Add PromoteNestedPipesToRootLevel() to flatten pipes nested in emphasis - Update cell boundary detection to use sibling traversal - Move EmphasisInlineParser before PipeTableParser in processing order - Fix EmphasisInlineParser to continue past IsClosed delimiters - Add ContainsParentOrSiblingOfType<T>() helper for flat structure detection Performance improvements (measured on typical markdown content): | Rows | Before | After | Speedup | |------|-----------|---------|---------| | 100 | 542 μs | 150 μs | 3.6x | | 500 | 23,018 μs | 763 μs | 30x | | 1000 | 89,418 μs | 1,596 μs| 56x | | 1500 | 201,593 μs| 2,740 μs| 74x | | 5000 | CRASH | 10,588 μs| ∞ | | 10000| CRASH | 18,551 μs| ∞ | Tables with 5000+ rows previously crashed due to stack overflow from recursive depth. They now parse successfully with linear time complexity. * remove baseline results file * Do not use System.Index and fix nullabillity checks for older platforms
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -8,6 +8,8 @@
|
||||
*.sln.docstates
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
src/.idea
|
||||
BenchmarkDotNet.Artifacts
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
81
src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs
Normal file
81
src/Markdig.Benchmarks/PipeTable/PipeTableBenchmark.cs
Normal file
@@ -0,0 +1,81 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using BenchmarkDotNet.Diagnosers;
|
||||
using Markdig;
|
||||
|
||||
namespace Testamina.Markdig.Benchmarks.PipeTable;
|
||||
|
||||
/// <summary>
|
||||
/// Benchmark for pipe table parsing performance, especially for large tables.
|
||||
/// Tests the performance of PipeTableParser with varying table sizes.
|
||||
/// </summary>
|
||||
[MemoryDiagnoser]
|
||||
[GcServer(true)] // Use server GC to get more comprehensive GC stats
|
||||
public class PipeTableBenchmark
|
||||
{
|
||||
private string _100Rows = null!;
|
||||
private string _500Rows = null!;
|
||||
private string _1000Rows = null!;
|
||||
private string _1500Rows = null!;
|
||||
private string _5000Rows = null!;
|
||||
private string _10000Rows = null!;
|
||||
private MarkdownPipeline _pipeline = null!;
|
||||
|
||||
[GlobalSetup]
|
||||
public void Setup()
|
||||
{
|
||||
// Pipeline with pipe tables enabled (part of advanced extensions)
|
||||
_pipeline = new MarkdownPipelineBuilder()
|
||||
.UseAdvancedExtensions()
|
||||
.Build();
|
||||
|
||||
// Generate tables of various sizes
|
||||
// Note: Before optimization, 5000+ rows hit depth limit due to nested tree structure.
|
||||
// After optimization, these should work.
|
||||
_100Rows = PipeTableGenerator.Generate(rows: 100, columns: 5);
|
||||
_500Rows = PipeTableGenerator.Generate(rows: 500, columns: 5);
|
||||
_1000Rows = PipeTableGenerator.Generate(rows: 1000, columns: 5);
|
||||
_1500Rows = PipeTableGenerator.Generate(rows: 1500, columns: 5);
|
||||
_5000Rows = PipeTableGenerator.Generate(rows: 5000, columns: 5);
|
||||
_10000Rows = PipeTableGenerator.Generate(rows: 10000, columns: 5);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 100 rows x 5 cols")]
|
||||
public string Parse100Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_100Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 500 rows x 5 cols")]
|
||||
public string Parse500Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_500Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 1000 rows x 5 cols")]
|
||||
public string Parse1000Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_1000Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 1500 rows x 5 cols")]
|
||||
public string Parse1500Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_1500Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 5000 rows x 5 cols")]
|
||||
public string Parse5000Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_5000Rows, _pipeline);
|
||||
}
|
||||
|
||||
[Benchmark(Description = "PipeTable 10000 rows x 5 cols")]
|
||||
public string Parse10000Rows()
|
||||
{
|
||||
return Markdown.ToHtml(_10000Rows, _pipeline);
|
||||
}
|
||||
}
|
||||
61
src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs
Normal file
61
src/Markdig.Benchmarks/PipeTable/PipeTableGenerator.cs
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using System.Text;
|
||||
|
||||
namespace Testamina.Markdig.Benchmarks.PipeTable;
|
||||
|
||||
/// <summary>
|
||||
/// Generates pipe table markdown content for benchmarking purposes.
|
||||
/// </summary>
|
||||
public static class PipeTableGenerator
|
||||
{
|
||||
private const int DefaultCellWidth = 10;
|
||||
|
||||
/// <summary>
|
||||
/// Generates a pipe table in markdown format.
|
||||
/// </summary>
|
||||
/// <param name="rows">Number of data rows (excluding header)</param>
|
||||
/// <param name="columns">Number of columns</param>
|
||||
/// <param name="cellWidth">Width of each cell content (default: 10)</param>
|
||||
/// <returns>Pipe table markdown string</returns>
|
||||
public static string Generate(int rows, int columns, int cellWidth = DefaultCellWidth)
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
|
||||
// Header row
|
||||
sb.Append('|');
|
||||
for (int col = 0; col < columns; col++)
|
||||
{
|
||||
sb.Append(' ');
|
||||
sb.Append($"Header {col + 1}".PadRight(cellWidth));
|
||||
sb.Append(" |");
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
// Separator row (with dashes)
|
||||
sb.Append('|');
|
||||
for (int col = 0; col < columns; col++)
|
||||
{
|
||||
sb.Append(new string('-', cellWidth + 2));
|
||||
sb.Append('|');
|
||||
}
|
||||
sb.AppendLine();
|
||||
|
||||
// Data rows
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
sb.Append('|');
|
||||
for (int col = 0; col < columns; col++)
|
||||
{
|
||||
sb.Append(' ');
|
||||
sb.Append($"R{row + 1}C{col + 1}".PadRight(cellWidth));
|
||||
sb.Append(" |");
|
||||
}
|
||||
sb.AppendLine();
|
||||
}
|
||||
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ using BenchmarkDotNet.Configs;
|
||||
using BenchmarkDotNet.Running;
|
||||
|
||||
using Markdig;
|
||||
using Testamina.Markdig.Benchmarks.PipeTable;
|
||||
|
||||
|
||||
namespace Testamina.Markdig.Benchmarks;
|
||||
@@ -68,7 +69,16 @@ public class Program
|
||||
//config.Add(gcDiagnoser);
|
||||
|
||||
//var config = DefaultConfig.Instance;
|
||||
BenchmarkRunner.Run<Program>(config);
|
||||
|
||||
// Run specific benchmarks based on command line arguments
|
||||
if (args.Length > 0 && args[0] == "--pipetable")
|
||||
{
|
||||
BenchmarkRunner.Run<PipeTableBenchmark>(config);
|
||||
}
|
||||
else
|
||||
{
|
||||
BenchmarkRunner.Run<Program>(config);
|
||||
}
|
||||
//BenchmarkRunner.Run<TestDictionary>(config);
|
||||
//BenchmarkRunner.Run<TestMatchPerf>();
|
||||
//BenchmarkRunner.Run<TestStringPerf>();
|
||||
|
||||
@@ -38,7 +38,7 @@ public class PipeTableExtension : IMarkdownExtension
|
||||
var lineBreakParser = pipeline.InlineParsers.FindExact<LineBreakInlineParser>();
|
||||
if (!pipeline.InlineParsers.Contains<PipeTableParser>())
|
||||
{
|
||||
pipeline.InlineParsers.InsertBefore<EmphasisInlineParser>(new PipeTableParser(lineBreakParser!, Options));
|
||||
pipeline.InlineParsers.InsertAfter<EmphasisInlineParser>(new PipeTableParser(lineBreakParser!, Options));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ namespace Markdig.Extensions.Tables;
|
||||
/// <seealso cref="IPostInlineProcessor" />
|
||||
public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
{
|
||||
private readonly LineBreakInlineParser lineBreakParser;
|
||||
private readonly LineBreakInlineParser _lineBreakParser;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="PipeTableParser" /> class.
|
||||
@@ -28,7 +28,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
/// <param name="options">The options.</param>
|
||||
public PipeTableParser(LineBreakInlineParser lineBreakParser, PipeTableOptions? options = null)
|
||||
{
|
||||
this.lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser));
|
||||
_lineBreakParser = lineBreakParser ?? throw new ArgumentNullException(nameof(lineBreakParser));
|
||||
OpeningCharacters = ['|', '\n', '\r'];
|
||||
Options = options ?? new PipeTableOptions();
|
||||
}
|
||||
@@ -86,7 +86,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
tableState.IsInvalidTable = true;
|
||||
}
|
||||
tableState.LineHasPipe = false;
|
||||
lineBreakParser.Match(processor, ref slice);
|
||||
_lineBreakParser.Match(processor, ref slice);
|
||||
if (!isFirstLineEmpty)
|
||||
{
|
||||
tableState.ColumnAndLineDelimiters.Add(processor.Inline!);
|
||||
@@ -100,7 +100,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
Span = new SourceSpan(position, position),
|
||||
Line = globalLineIndex,
|
||||
Column = column,
|
||||
LocalLineIndex = localLineIndex
|
||||
LocalLineIndex = localLineIndex,
|
||||
IsClosed = true // Creates flat sibling structure for O(n) traversal
|
||||
};
|
||||
|
||||
tableState.LineHasPipe = true;
|
||||
@@ -125,6 +126,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
return true;
|
||||
}
|
||||
|
||||
// With flat structure, pipes are siblings at root level
|
||||
// Walk backwards from the last child to find pipe delimiters
|
||||
var child = container.LastChild;
|
||||
List<PipeTableDelimiterInline>? delimitersToRemove = null;
|
||||
|
||||
@@ -142,8 +145,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
break;
|
||||
}
|
||||
|
||||
var subContainer = child as ContainerInline;
|
||||
child = subContainer?.LastChild;
|
||||
// Walk siblings instead of descending into containers
|
||||
child = child.PreviousSibling;
|
||||
}
|
||||
|
||||
// If we have found any delimiters, transform them to literals
|
||||
@@ -186,8 +189,8 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
// Remove previous state
|
||||
state.ParserStates[Index] = null!;
|
||||
|
||||
// Continue
|
||||
if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe ) //|| tableState.LineIndex != state.LocalLineIndex)
|
||||
// Abort if not a valid table
|
||||
if (tableState is null || container is null || tableState.IsInvalidTable || !tableState.LineHasPipe)
|
||||
{
|
||||
if (tableState is not null)
|
||||
{
|
||||
@@ -204,11 +207,18 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
// Detect the header row
|
||||
var delimiters = tableState.ColumnAndLineDelimiters;
|
||||
// TODO: we could optimize this by merging FindHeaderRow and the cell loop
|
||||
var aligns = FindHeaderRow(delimiters);
|
||||
|
||||
if (Options.RequireHeaderSeparator && aligns is null)
|
||||
{
|
||||
// No valid header separator found - convert all pipe delimiters to literals
|
||||
foreach (var inline in delimiters)
|
||||
{
|
||||
if (inline is PipeTableDelimiterInline pipeDelimiter)
|
||||
{
|
||||
pipeDelimiter.ReplaceByLiteral();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -224,68 +234,40 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
var cells = tableState.Cells;
|
||||
cells.Clear();
|
||||
|
||||
//delimiters[0].DumpTo(state.DebugLog);
|
||||
// Pipes may end up nested inside unmatched emphasis delimiters, e.g.:
|
||||
// *a | b*|
|
||||
// Promote them to root level so we have a flat sibling structure.
|
||||
PromoteNestedPipesToRootLevel(delimiters, container);
|
||||
|
||||
// delimiters contain a list of `|` and `\n` delimiters
|
||||
// The `|` delimiters are created as child containers.
|
||||
// So the following:
|
||||
// | a | b \n
|
||||
// | d | e \n
|
||||
// The inline tree is now flat: all pipes and line breaks are siblings at root level.
|
||||
// For example, `| a | b \n| c | d \n` produces:
|
||||
// [|] [a] [|] [b] [\n] [|] [c] [|] [d] [\n]
|
||||
//
|
||||
// Will generate a tree of the following node:
|
||||
// |
|
||||
// a
|
||||
// |
|
||||
// b
|
||||
// \n
|
||||
// |
|
||||
// d
|
||||
// |
|
||||
// e
|
||||
// \n
|
||||
// When parsing delimiters, we need to recover whether a row is of the following form:
|
||||
// 0) | a | b | \n
|
||||
// 1) | a | b \n
|
||||
// 2) a | b \n
|
||||
// 3) a | b | \n
|
||||
// Tables support four row formats:
|
||||
// | a | b | (leading and trailing pipes)
|
||||
// | a | b (leading pipe only)
|
||||
// a | b (no leading or trailing pipes)
|
||||
// a | b | (trailing pipe only)
|
||||
|
||||
// If the last element is not a line break, add a line break to homogenize parsing in the next loop
|
||||
// Ensure the table ends with a line break to simplify row detection
|
||||
var lastElement = delimiters[delimiters.Count - 1];
|
||||
if (!(lastElement is LineBreakInline))
|
||||
{
|
||||
while (true)
|
||||
// Find the actual last sibling (there may be content after the last delimiter)
|
||||
while (lastElement.NextSibling != null)
|
||||
{
|
||||
if (lastElement is ContainerInline lastElementContainer)
|
||||
{
|
||||
var nextElement = lastElementContainer.LastChild;
|
||||
if (nextElement != null)
|
||||
{
|
||||
lastElement = nextElement;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
lastElement = lastElement.NextSibling;
|
||||
}
|
||||
|
||||
var endOfTable = new LineBreakInline();
|
||||
// If the last element is a container, we have to add the EOL to its child
|
||||
// otherwise only next sibling
|
||||
if (lastElement is ContainerInline)
|
||||
{
|
||||
((ContainerInline)lastElement).AppendChild(endOfTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
lastElement.InsertAfter(endOfTable);
|
||||
}
|
||||
lastElement.InsertAfter(endOfTable);
|
||||
delimiters.Add(endOfTable);
|
||||
tableState.EndOfLines.Add(endOfTable);
|
||||
}
|
||||
|
||||
int lastPipePos = 0;
|
||||
|
||||
// Cell loop
|
||||
// Reconstruct the table from the delimiters
|
||||
// Build table rows and cells by iterating through delimiters
|
||||
TableRow? row = null;
|
||||
TableRow? firstRow = null;
|
||||
for (int i = 0; i < delimiters.Count; i++)
|
||||
@@ -300,9 +282,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
firstRow ??= row;
|
||||
|
||||
// If the first delimiter is a pipe and doesn't have any parent or previous sibling, for cases like:
|
||||
// 0) | a | b | \n
|
||||
// 1) | a | b \n
|
||||
// Skip leading pipe at start of row (e.g., `| a | b` or `| a | b |`)
|
||||
if (pipeSeparator != null && (delimiter.PreviousSibling is null || delimiter.PreviousSibling is LineBreakInline))
|
||||
{
|
||||
delimiter.Remove();
|
||||
@@ -316,57 +296,37 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
}
|
||||
}
|
||||
|
||||
// We need to find the beginning/ending of a cell from a right delimiter. From the delimiter 'x', we need to find a (without the delimiter start `|`)
|
||||
// So we iterate back to the first pipe or line break
|
||||
// x
|
||||
// 1) | a | b \n
|
||||
// 2) a | b \n
|
||||
// Find cell content by walking backwards from this delimiter to the previous pipe or line break.
|
||||
// For `| a | b \n` at delimiter 'x':
|
||||
// [|] [a] [x] [b] [\n]
|
||||
// ^--- current delimiter
|
||||
// Walk back: [a] is the cell content (stop at [|])
|
||||
Inline? endOfCell = null;
|
||||
Inline? beginOfCell = null;
|
||||
var cellContentIt = delimiter;
|
||||
while (true)
|
||||
var cellContentIt = delimiter.PreviousSibling;
|
||||
while (cellContentIt != null)
|
||||
{
|
||||
cellContentIt = cellContentIt.PreviousSibling ?? cellContentIt.Parent;
|
||||
|
||||
if (cellContentIt is null || cellContentIt is LineBreakInline)
|
||||
{
|
||||
if (cellContentIt is LineBreakInline || cellContentIt is PipeTableDelimiterInline)
|
||||
break;
|
||||
}
|
||||
|
||||
// The cell begins at the first effective child after a | or the top ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation)
|
||||
if (cellContentIt is PipeTableDelimiterInline || (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null ))
|
||||
{
|
||||
beginOfCell = ((ContainerInline)cellContentIt).FirstChild;
|
||||
if (endOfCell is null)
|
||||
{
|
||||
endOfCell = beginOfCell;
|
||||
}
|
||||
// Stop at the root ContainerInline (which is not necessary to bring into the tree + it contains an invalid span calculation)
|
||||
if (cellContentIt.GetType() == typeof(ContainerInline) && cellContentIt.Parent is null)
|
||||
break;
|
||||
}
|
||||
|
||||
beginOfCell = cellContentIt;
|
||||
if (endOfCell is null)
|
||||
{
|
||||
endOfCell = beginOfCell;
|
||||
}
|
||||
endOfCell ??= beginOfCell;
|
||||
|
||||
cellContentIt = cellContentIt.PreviousSibling;
|
||||
}
|
||||
|
||||
// If the current deilimiter is a pipe `|` OR
|
||||
// If the current delimiter is a pipe `|` OR
|
||||
// the beginOfCell/endOfCell are not null and
|
||||
// either they are :
|
||||
// either they are:
|
||||
// - different
|
||||
// - they contain a single element, but it is not a line break (\n) or an empty/whitespace Literal.
|
||||
// Then we can add a cell to the current row
|
||||
if (!isLine || (beginOfCell != null && endOfCell != null && ( beginOfCell != endOfCell || !(beginOfCell is LineBreakInline || (beginOfCell is LiteralInline beingOfCellLiteral && beingOfCellLiteral.Content.IsEmptyOrWhitespace())))))
|
||||
{
|
||||
if (!isLine)
|
||||
{
|
||||
// If the delimiter is a pipe, we need to remove it from the tree
|
||||
// so that previous loop looking for a parent will not go further on subsequent cells
|
||||
delimiter.Remove();
|
||||
lastPipePos = delimiter.Span.End;
|
||||
}
|
||||
|
||||
// We trim whitespace at the beginning and ending of the cell
|
||||
TrimStart(beginOfCell);
|
||||
TrimEnd(endOfCell);
|
||||
@@ -374,10 +334,20 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
var cellContainer = new ContainerInline();
|
||||
|
||||
// Copy elements from beginOfCell on the first level
|
||||
// The pipe delimiter serves as a boundary - stop when we hit it
|
||||
var cellIt = beginOfCell;
|
||||
while (cellIt != null && !IsLine(cellIt) && !(cellIt is PipeTableDelimiterInline))
|
||||
{
|
||||
var nextSibling = cellIt.NextSibling;
|
||||
|
||||
// Skip empty literals (can result from trimming)
|
||||
if (cellIt is LiteralInline { Content.IsEmpty: true })
|
||||
{
|
||||
cellIt.Remove();
|
||||
cellIt = nextSibling;
|
||||
continue;
|
||||
}
|
||||
|
||||
cellIt.Remove();
|
||||
if (cellContainer.Span.IsEmpty)
|
||||
{
|
||||
@@ -390,8 +360,16 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
cellIt = nextSibling;
|
||||
}
|
||||
|
||||
if (!isLine)
|
||||
{
|
||||
// Remove the pipe delimiter AFTER copying cell content
|
||||
// This preserves the sibling chain during the copy loop
|
||||
delimiter.Remove();
|
||||
lastPipePos = delimiter.Span.End;
|
||||
}
|
||||
|
||||
// Create the cell and add it to the pending row
|
||||
var tableParagraph = new ParagraphBlock()
|
||||
var tableParagraph = new ParagraphBlock
|
||||
{
|
||||
Span = cellContainer.Span,
|
||||
Line = cellContainer.Line,
|
||||
@@ -443,8 +421,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
endOfLine.Remove();
|
||||
}
|
||||
|
||||
// If we have a header row, we can remove it
|
||||
// TODO: we could optimize this by merging FindHeaderRow and the previous loop
|
||||
// Mark first row as header and remove the separator row if present
|
||||
var tableRow = (TableRow)table[0];
|
||||
tableRow.IsHeader = Options.RequireHeaderSeparator;
|
||||
if (aligns != null)
|
||||
@@ -454,11 +431,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
table.ColumnDefinitions.AddRange(aligns);
|
||||
}
|
||||
|
||||
// Perform delimiter processor that are coming after this processor
|
||||
// Perform all post-processors on cell content
|
||||
// With InsertAfter, emphasis runs before pipe table, so we need to re-run from index 0
|
||||
// to ensure emphasis delimiters in cells are properly matched
|
||||
foreach (var cell in cells)
|
||||
{
|
||||
var paragraph = (ParagraphBlock) cell[0];
|
||||
state.PostProcessInlines(postInlineProcessorIndex + 1, paragraph.Inline, null, true);
|
||||
state.PostProcessInlines(0, paragraph.Inline, null, true);
|
||||
if (paragraph.Inline?.LastChild is not null)
|
||||
{
|
||||
paragraph.Inline.Span.End = paragraph.Inline.LastChild.Span.End;
|
||||
@@ -548,7 +527,7 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
continue;
|
||||
}
|
||||
|
||||
// The last delimiter is always null,
|
||||
// Parse the separator row (second row) to extract column alignments
|
||||
for (int j = i + 1; j < delimiters.Count; j++)
|
||||
{
|
||||
var delimiter = delimiters[j];
|
||||
@@ -560,11 +539,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check the left side of a `|` delimiter
|
||||
// Parse the content before this delimiter as a column definition (e.g., `:---`, `---:`, `:---:`)
|
||||
// Skip if previous sibling is a pipe (empty cell) or whitespace
|
||||
TableColumnAlign? align = null;
|
||||
int delimiterCount = 0;
|
||||
if (delimiter.PreviousSibling != null &&
|
||||
!(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) && // ignore parsed whitespace
|
||||
!(delimiter.PreviousSibling is PipeTableDelimiterInline) &&
|
||||
!(delimiter.PreviousSibling is LiteralInline li && li.Content.IsEmptyOrWhitespace()) &&
|
||||
!ParseHeaderString(delimiter.PreviousSibling, out align, out delimiterCount))
|
||||
{
|
||||
break;
|
||||
@@ -576,14 +557,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
totalDelimiterCount += delimiterCount;
|
||||
columnDefinitions.Add(new TableColumnDefinition() { Alignment = align, Width = delimiterCount});
|
||||
|
||||
// If this is the last delimiter, we need to check the right side of the `|` delimiter
|
||||
// If this is the last pipe, check for a trailing column definition (row without trailing pipe)
|
||||
// e.g., `| :--- | ---:` has content after the last pipe
|
||||
if (nextDelimiter is null)
|
||||
{
|
||||
var nextSibling = columnDelimiter != null
|
||||
? columnDelimiter.FirstChild
|
||||
: delimiter.NextSibling;
|
||||
var nextSibling = delimiter.NextSibling;
|
||||
|
||||
// If there is no content after
|
||||
// No trailing content means row ends with pipe: `| :--- |`
|
||||
if (IsNullOrSpace(nextSibling))
|
||||
{
|
||||
isValidRow = true;
|
||||
@@ -664,9 +644,9 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
private static void TrimStart(Inline? inline)
|
||||
{
|
||||
while (inline is ContainerInline && !(inline is DelimiterInline))
|
||||
while (inline is ContainerInline containerInline && !(containerInline is DelimiterInline))
|
||||
{
|
||||
inline = ((ContainerInline)inline).FirstChild;
|
||||
inline = containerInline.FirstChild;
|
||||
}
|
||||
|
||||
if (inline is LiteralInline literal)
|
||||
@@ -677,6 +657,13 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
|
||||
private static void TrimEnd(Inline? inline)
|
||||
{
|
||||
// Walk into containers to find the last leaf to trim
|
||||
// Skip PipeTableDelimiterInline but walk into other containers (including emphasis)
|
||||
while (inline is ContainerInline container && !(inline is PipeTableDelimiterInline))
|
||||
{
|
||||
inline = container.LastChild;
|
||||
}
|
||||
|
||||
if (inline is LiteralInline literal)
|
||||
{
|
||||
literal.Content.TrimEnd();
|
||||
@@ -697,6 +684,106 @@ public class PipeTableParser : InlineParser, IPostInlineProcessor
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Promotes nested pipe delimiters and line breaks to root level.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles cases like `*a | b*|` where the pipe ends up inside an unmatched emphasis container.
|
||||
/// After promotion, all delimiters become siblings at root level for consistent cell boundary detection.
|
||||
/// </remarks>
|
||||
private static void PromoteNestedPipesToRootLevel(List<Inline> delimiters, ContainerInline root)
|
||||
{
|
||||
for (int i = 0; i < delimiters.Count; i++)
|
||||
{
|
||||
var delimiter = delimiters[i];
|
||||
|
||||
// Handle both pipe delimiters and line breaks
|
||||
bool isPipe = delimiter is PipeTableDelimiterInline;
|
||||
bool isLineBreak = delimiter is LineBreakInline;
|
||||
if (!isPipe && !isLineBreak)
|
||||
continue;
|
||||
|
||||
// Skip if already at root level
|
||||
if (delimiter.Parent == root)
|
||||
continue;
|
||||
|
||||
// Find the top-level ancestor (direct child of root)
|
||||
var ancestor = delimiter.Parent;
|
||||
while (ancestor?.Parent != null && ancestor.Parent != root)
|
||||
{
|
||||
ancestor = ancestor.Parent;
|
||||
}
|
||||
|
||||
if (ancestor is null || ancestor.Parent != root)
|
||||
continue;
|
||||
|
||||
// Split: promote delimiter to be sibling of ancestor
|
||||
SplitContainerAtDelimiter(delimiter, ancestor);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Splits a container at the delimiter, promoting the delimiter to root level.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// For input `*a | b*`, the pipe is inside the emphasis container:
|
||||
/// EmphasisDelimiter { "a", Pipe, "b" }
|
||||
/// After splitting:
|
||||
/// EmphasisDelimiter { "a" }, Pipe, Container { "b" }
|
||||
/// </remarks>
|
||||
private static void SplitContainerAtDelimiter(Inline delimiter, Inline ancestor)
|
||||
{
|
||||
if (delimiter.Parent is not { } parent) return;
|
||||
|
||||
// Collect content after the delimiter
|
||||
var contentAfter = new List<Inline>();
|
||||
var current = delimiter.NextSibling;
|
||||
while (current != null)
|
||||
{
|
||||
contentAfter.Add(current);
|
||||
current = current.NextSibling;
|
||||
}
|
||||
|
||||
// Remove content after delimiter from parent
|
||||
foreach (var inline in contentAfter)
|
||||
{
|
||||
inline.Remove();
|
||||
}
|
||||
|
||||
// Remove delimiter from parent
|
||||
delimiter.Remove();
|
||||
|
||||
// Insert delimiter after the ancestor (at root level)
|
||||
ancestor.InsertAfter(delimiter);
|
||||
|
||||
// If there's content after, wrap in new container and insert after delimiter
|
||||
if (contentAfter.Count > 0)
|
||||
{
|
||||
// Create new container matching the original parent type
|
||||
var newContainer = CreateMatchingContainer(parent);
|
||||
foreach (var inline in contentAfter)
|
||||
{
|
||||
newContainer.AppendChild(inline);
|
||||
}
|
||||
delimiter.InsertAfter(newContainer);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a container to wrap content split from the source container.
|
||||
/// </summary>
|
||||
private static ContainerInline CreateMatchingContainer(ContainerInline source)
|
||||
{
|
||||
// Emphasis processing runs before pipe table processing, so emphasis delimiters
|
||||
// are already resolved. A plain ContainerInline suffices.
|
||||
return new ContainerInline
|
||||
{
|
||||
Span = source.Span,
|
||||
Line = source.Line,
|
||||
Column = source.Column
|
||||
};
|
||||
}
|
||||
|
||||
private sealed class TableState
|
||||
{
|
||||
public bool IsInvalidTable { get; set; }
|
||||
|
||||
@@ -85,7 +85,8 @@ public class CodeInlineParser : InlineParser
|
||||
// We saw the start of a code inline, but the close sticks are not present on the same line.
|
||||
// If the next line starts with a pipe character, this is likely an incomplete CodeInline within a table.
|
||||
// Treat it as regular text to avoid breaking the overall table shape.
|
||||
if (processor.Inline != null && processor.Inline.ContainsParentOfType<PipeTableDelimiterInline>())
|
||||
// Use ContainsParentOrSiblingOfType to handle both nested and flat pipe table structures.
|
||||
if (processor.Inline != null && processor.Inline.ContainsParentOrSiblingOfType<PipeTableDelimiterInline>())
|
||||
{
|
||||
slice.Start = openingStart;
|
||||
return false;
|
||||
|
||||
@@ -125,7 +125,10 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
|
||||
}
|
||||
|
||||
// Follow DelimiterInline (EmphasisDelimiter, TableDelimiter...)
|
||||
child = delimiterInline.FirstChild;
|
||||
// If the delimiter has IsClosed=true (e.g., pipe table delimiter), it has no children
|
||||
// In that case, continue to next sibling instead of stopping
|
||||
var firstChild = delimiterInline.FirstChild;
|
||||
child = firstChild ?? delimiterInline.NextSibling;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -216,6 +216,47 @@ public abstract class Inline : MarkdownObject, IInline
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether there is a sibling of the specified type among root-level siblings.
|
||||
/// This walks up to find the root container, then checks all siblings.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">Type of the sibling to check</typeparam>
|
||||
/// <returns><c>true</c> if a sibling of the specified type exists; <c>false</c> otherwise</returns>
|
||||
public bool ContainsParentOrSiblingOfType<T>() where T : Inline
|
||||
{
|
||||
// First check parents (handles nested case)
|
||||
if (ContainsParentOfType<T>())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Then check siblings at root level (handles flat case)
|
||||
// Find the root container
|
||||
var root = Parent;
|
||||
while (root?.Parent != null)
|
||||
{
|
||||
root = root.Parent;
|
||||
}
|
||||
|
||||
if (root is not ContainerInline container)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Walk siblings looking for the type
|
||||
var sibling = container.FirstChild;
|
||||
while (sibling != null)
|
||||
{
|
||||
if (sibling is T)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
sibling = sibling.NextSibling;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Iterates on parents of the specified type.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user