mirror of
https://github.com/xoofx/markdig.git
synced 2026-02-06 21:36:15 +00:00
Compare commits
6 Commits
example-lu
...
0.30.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
98c687b4ed | ||
|
|
8e4a732efe | ||
|
|
bce4b70dc6 | ||
|
|
1f71520de9 | ||
|
|
bfd7b6460c | ||
|
|
0e26ec5382 |
92
src/Markdig.Tests/TestCharHelper.cs
Normal file
92
src/Markdig.Tests/TestCharHelper.cs
Normal file
@@ -0,0 +1,92 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using Markdig.Helpers;
|
||||
using NUnit.Framework;
|
||||
|
||||
namespace Markdig.Tests
|
||||
{
|
||||
public class TestCharHelper
|
||||
{
|
||||
// An ASCII punctuation character is
|
||||
// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., / (U+0021–2F),
|
||||
// :, ;, <, =, >, ?, @ (U+003A–0040),
|
||||
// [, \, ], ^, _, ` (U+005B–0060),
|
||||
// {, |, }, or ~ (U+007B–007E).
|
||||
private static readonly HashSet<char> s_asciiPunctuation = new()
|
||||
{
|
||||
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
|
||||
':', ';', '<', '=', '>', '?', '@',
|
||||
'[', '\\', ']', '^', '_', '`',
|
||||
'{', '|', '}', '~'
|
||||
};
|
||||
|
||||
// A Unicode punctuation character is an ASCII punctuation character or anything in the general Unicode categories
|
||||
// Pc, Pd, Pe, Pf, Pi, Po, or Ps.
|
||||
private static readonly HashSet<UnicodeCategory> s_punctuationCategories = new()
|
||||
{
|
||||
UnicodeCategory.ConnectorPunctuation,
|
||||
UnicodeCategory.DashPunctuation,
|
||||
UnicodeCategory.ClosePunctuation,
|
||||
UnicodeCategory.FinalQuotePunctuation,
|
||||
UnicodeCategory.InitialQuotePunctuation,
|
||||
UnicodeCategory.OtherPunctuation,
|
||||
UnicodeCategory.OpenPunctuation
|
||||
};
|
||||
|
||||
private static bool ExpectedIsPunctuation(char c)
|
||||
{
|
||||
return c <= 127
|
||||
? s_asciiPunctuation.Contains(c)
|
||||
: s_punctuationCategories.Contains(CharUnicodeInfo.GetUnicodeCategory(c));
|
||||
}
|
||||
|
||||
private static bool ExpectedIsWhitespace(char c)
|
||||
{
|
||||
// A Unicode whitespace character is any code point in the Unicode Zs general category,
|
||||
// or a tab (U+0009), line feed (U+000A), form feed (U+000C), or carriage return (U+000D).
|
||||
return c == '\t' || c == '\n' || c == '\u000C' || c == '\r' ||
|
||||
CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator;
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsWhitespace()
|
||||
{
|
||||
for (int i = char.MinValue; i <= char.MaxValue; i++)
|
||||
{
|
||||
char c = (char)i;
|
||||
|
||||
Assert.AreEqual(ExpectedIsWhitespace(c), CharHelper.IsWhitespace(c));
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void CheckUnicodeCategory()
|
||||
{
|
||||
for (int i = char.MinValue; i <= char.MaxValue; i++)
|
||||
{
|
||||
char c = (char)i;
|
||||
|
||||
bool expectedSpace = c == 0 || ExpectedIsWhitespace(c);
|
||||
bool expectedPunctuation = c == 0 || ExpectedIsPunctuation(c);
|
||||
|
||||
CharHelper.CheckUnicodeCategory(c, out bool spaceActual, out bool punctuationActual);
|
||||
|
||||
Assert.AreEqual(expectedSpace, spaceActual);
|
||||
Assert.AreEqual(expectedPunctuation, punctuationActual);
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void IsSpaceOrPunctuation()
|
||||
{
|
||||
for (int i = char.MinValue; i <= char.MaxValue; i++)
|
||||
{
|
||||
char c = (char)i;
|
||||
|
||||
bool expected = c == 0 || ExpectedIsWhitespace(c) || ExpectedIsPunctuation(c);
|
||||
|
||||
Assert.AreEqual(expected, CharHelper.IsSpaceOrPunctuation(c));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using Markdig.Extensions.JiraLinks;
|
||||
using Markdig.Renderers.Roundtrip;
|
||||
using Markdig.Syntax;
|
||||
using NUnit.Framework;
|
||||
|
||||
@@ -67,6 +68,15 @@ namespace Markdig.Tests
|
||||
TestDescendantsOrder.TestSchemas(specsSyntaxTrees);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void ParseEmptyDocumentWithTrackTriviaEnabled()
|
||||
{
|
||||
var document = Markdown.Parse("", trackTrivia: true);
|
||||
using var sw = new StringWriter();
|
||||
new RoundtripRenderer(sw).Render(document);
|
||||
Assert.AreEqual("", sw.ToString());
|
||||
}
|
||||
|
||||
public static void TestSpec(string inputText, string expectedOutputText, string extensions = null, bool plainText = false, string context = null)
|
||||
{
|
||||
context ??= string.Empty;
|
||||
|
||||
@@ -10,9 +10,7 @@ namespace Markdig.Tests
|
||||
{
|
||||
[TestCase("| S | T |\r\n|---|---| \r\n| G | H |")]
|
||||
[TestCase("| S | T |\r\n|---|---|\t\r\n| G | H |")]
|
||||
[TestCase("| S | T |\r\n|---|---|\v\r\n| G | H |")]
|
||||
[TestCase("| S | T |\r\n|---|---|\f\r\n| G | H |")]
|
||||
[TestCase("| S | T |\r\n|---|---|\f\v\t \r\n| G | H |")]
|
||||
[TestCase("| S | \r\n|---|\r\n| G |\r\n\r\n| D | D |\r\n| ---| ---| \r\n| V | V |", 2)]
|
||||
public void TestTableBug(string markdown, int tableCount = 1)
|
||||
{
|
||||
|
||||
@@ -53,7 +53,7 @@ namespace Markdig.Helpers
|
||||
|
||||
// A right-flanking delimiter run is a delimiter run that is
|
||||
// (1) not preceded by Unicode whitespace, and either
|
||||
// (1a) not preceded by a punctuation character, or
|
||||
// (2a) not preceded by a punctuation character, or
|
||||
// (2b) preceded by a punctuation character and followed by Unicode whitespace or a punctuation character.
|
||||
// For purposes of this definition, the beginning and the end of the line count as Unicode whitespace.
|
||||
canClose = !prevIsWhiteSpace &&
|
||||
@@ -144,9 +144,37 @@ namespace Markdig.Helpers
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static bool IsWhitespace(this char c)
|
||||
{
|
||||
// 2.1 Characters and lines
|
||||
// A whitespace character is a space(U + 0020), tab(U + 0009), newline(U + 000A), line tabulation (U + 000B), form feed (U + 000C), or carriage return (U + 000D).
|
||||
return c <= ' ' && (c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r');
|
||||
// 2.1 Characters and lines
|
||||
// A Unicode whitespace character is any code point in the Unicode Zs general category,
|
||||
// or a tab (U+0009), line feed (U+000A), form feed (U+000C), or carriage return (U+000D).
|
||||
if (c <= ' ')
|
||||
{
|
||||
const long Mask =
|
||||
(1L << ' ') |
|
||||
(1L << '\t') |
|
||||
(1L << '\n') |
|
||||
(1L << '\f') |
|
||||
(1L << '\r');
|
||||
|
||||
return (Mask & (1L << c)) != 0;
|
||||
}
|
||||
|
||||
return c >= '\u00A0' && IsWhitespaceRare(c);
|
||||
|
||||
static bool IsWhitespaceRare(char c)
|
||||
{
|
||||
// return CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator;
|
||||
|
||||
if (c < 5760)
|
||||
{
|
||||
return c == '\u00A0';
|
||||
}
|
||||
else
|
||||
{
|
||||
return c <= 12288 &&
|
||||
(c == 5760 || IsInInclusiveRange(c, 8192, 8202) || c == 8239 || c == 8287 || c == 12288);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
@@ -171,46 +199,47 @@ namespace Markdig.Helpers
|
||||
// Check if a char is a space or a punctuation
|
||||
public static void CheckUnicodeCategory(this char c, out bool space, out bool punctuation)
|
||||
{
|
||||
// Credits: code from CommonMark.NET
|
||||
// Copyright (c) 2014, Kārlis Gaņģis All rights reserved.
|
||||
// See license for details: https://github.com/Knagis/CommonMark.NET/blob/master/LICENSE.md
|
||||
if (c <= 'ÿ')
|
||||
if (IsWhitespace(c))
|
||||
{
|
||||
space = c == '\0' || c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085';
|
||||
punctuation = c == '\0' || (c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126);
|
||||
space = true;
|
||||
punctuation = false;
|
||||
}
|
||||
else if (c <= 127)
|
||||
{
|
||||
space = c == '\0';
|
||||
punctuation = c == '\0' || IsAsciiPunctuation(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
var category = CharUnicodeInfo.GetUnicodeCategory(c);
|
||||
space = category == UnicodeCategory.SpaceSeparator
|
||||
|| category == UnicodeCategory.LineSeparator
|
||||
|| category == UnicodeCategory.ParagraphSeparator;
|
||||
punctuation = !space &&
|
||||
(category == UnicodeCategory.ConnectorPunctuation
|
||||
// A Unicode punctuation character is an ASCII punctuation character
|
||||
// or anything in the general Unicode categories Pc, Pd, Pe, Pf, Pi, Po, or Ps.
|
||||
space = false;
|
||||
UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(c);
|
||||
punctuation = category == UnicodeCategory.ConnectorPunctuation
|
||||
|| category == UnicodeCategory.DashPunctuation
|
||||
|| category == UnicodeCategory.OpenPunctuation
|
||||
|| category == UnicodeCategory.ClosePunctuation
|
||||
|| category == UnicodeCategory.InitialQuotePunctuation
|
||||
|| category == UnicodeCategory.FinalQuotePunctuation
|
||||
|| category == UnicodeCategory.OtherPunctuation);
|
||||
|| category == UnicodeCategory.OtherPunctuation;
|
||||
}
|
||||
}
|
||||
|
||||
// Same as CheckUnicodeCategory
|
||||
internal static bool IsSpaceOrPunctuation(this char c)
|
||||
{
|
||||
if (c <= 'ÿ')
|
||||
if (IsWhitespace(c))
|
||||
{
|
||||
return c == '\0' || c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085' ||
|
||||
(c >= 33 && c <= 47 && c != 38) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126);
|
||||
return true;
|
||||
}
|
||||
else if (c <= 127)
|
||||
{
|
||||
return c == '\0' || IsAsciiPunctuation(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
var category = CharUnicodeInfo.GetUnicodeCategory(c);
|
||||
return category == UnicodeCategory.SpaceSeparator
|
||||
|| category == UnicodeCategory.LineSeparator
|
||||
|| category == UnicodeCategory.ParagraphSeparator
|
||||
|| category == UnicodeCategory.ConnectorPunctuation
|
||||
return category == UnicodeCategory.ConnectorPunctuation
|
||||
|| category == UnicodeCategory.DashPunctuation
|
||||
|| category == UnicodeCategory.OpenPunctuation
|
||||
|| category == UnicodeCategory.ClosePunctuation
|
||||
@@ -289,44 +318,16 @@ namespace Markdig.Helpers
|
||||
public static bool IsAsciiPunctuation(this char c)
|
||||
{
|
||||
// 2.1 Characters and lines
|
||||
// An ASCII punctuation character is !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~.
|
||||
switch (c)
|
||||
{
|
||||
case '!':
|
||||
case '"':
|
||||
case '#':
|
||||
case '$':
|
||||
case '%':
|
||||
case '&':
|
||||
case '\'':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case '+':
|
||||
case ',':
|
||||
case '-':
|
||||
case '.':
|
||||
case '/':
|
||||
case ':':
|
||||
case ';':
|
||||
case '<':
|
||||
case '=':
|
||||
case '>':
|
||||
case '?':
|
||||
case '@':
|
||||
case '[':
|
||||
case '\\':
|
||||
case ']':
|
||||
case '^':
|
||||
case '_':
|
||||
case '`':
|
||||
case '{':
|
||||
case '|':
|
||||
case '}':
|
||||
case '~':
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
// An ASCII punctuation character is
|
||||
// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., / (U+0021–2F),
|
||||
// :, ;, <, =, >, ?, @ (U+003A–0040),
|
||||
// [, \, ], ^, _, ` (U+005B–0060),
|
||||
// {, |, }, or ~ (U+007B–007E).
|
||||
return c <= 127 && (
|
||||
IsInInclusiveRange(c, 33, 47) ||
|
||||
IsInInclusiveRange(c, 58, 64) ||
|
||||
IsInInclusiveRange(c, 91, 96) ||
|
||||
IsInInclusiveRange(c, 123, 126));
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
using System;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Markdig.Helpers
|
||||
{
|
||||
@@ -193,7 +194,7 @@ namespace Markdig.Helpers
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`')
|
||||
if (IsSpaceOrSpecialHtmlChar(c))
|
||||
{
|
||||
break;
|
||||
}
|
||||
@@ -202,6 +203,26 @@ namespace Markdig.Helpers
|
||||
c = text.NextChar();
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
static bool IsSpaceOrSpecialHtmlChar(char c)
|
||||
{
|
||||
if (c > '>')
|
||||
{
|
||||
return c == '`';
|
||||
}
|
||||
|
||||
const long BitMask =
|
||||
(1L << ' ')
|
||||
| (1L << '\n')
|
||||
| (1L << '"')
|
||||
| (1L << '\'')
|
||||
| (1L << '=')
|
||||
| (1L << '<')
|
||||
| (1L << '>');
|
||||
|
||||
return (BitMask & (1L << c)) != 0;
|
||||
}
|
||||
|
||||
// We need at least one char after '='
|
||||
if (matchCount == 0)
|
||||
{
|
||||
@@ -227,7 +248,7 @@ namespace Markdig.Helpers
|
||||
while (true)
|
||||
{
|
||||
c = text.NextChar();
|
||||
if (c.IsAlphaNumeric() || c == '_' || c == ':' || c == '.' || c == '-')
|
||||
if (c.IsAlphaNumeric() || IsCharToAppend(c))
|
||||
{
|
||||
builder.Append(c);
|
||||
}
|
||||
@@ -235,6 +256,23 @@ namespace Markdig.Helpers
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
static bool IsCharToAppend(char c)
|
||||
{
|
||||
if ((uint)(c - '-') > '_' - '-')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const long BitMask =
|
||||
(1L << '_')
|
||||
| (1L << ':')
|
||||
| (1L << '.')
|
||||
| (1L << '-');
|
||||
|
||||
return (BitMask & (1L << c)) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
hasAttribute = true;
|
||||
|
||||
@@ -55,7 +55,7 @@ namespace Markdig.Helpers
|
||||
public static void ArgumentOutOfRangeException(string paramName) => throw new ArgumentOutOfRangeException(paramName);
|
||||
|
||||
[DoesNotReturn]
|
||||
public static void ArgumentOutOfRangeException(string message, string paramName) => throw new ArgumentOutOfRangeException(message, paramName);
|
||||
public static void ArgumentOutOfRangeException(string message, string paramName) => throw new ArgumentOutOfRangeException(paramName, message);
|
||||
|
||||
[DoesNotReturn]
|
||||
public static void ArgumentOutOfRangeException_index() => throw new ArgumentOutOfRangeException("index");
|
||||
|
||||
@@ -33,7 +33,7 @@ namespace Markdig.Parsers.Inlines
|
||||
/// <summary>
|
||||
/// The character of this emphasis.
|
||||
/// </summary>
|
||||
public char Character { get; }
|
||||
public char Character { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The minimum number of character this emphasis is expected to have (must be >=1)
|
||||
|
||||
@@ -65,7 +65,11 @@ namespace Markdig.Parsers
|
||||
var noBlocksFoundBlock = new EmptyBlock(null);
|
||||
List<StringSlice> linesBefore = blockProcessor.UseLinesBefore();
|
||||
noBlocksFoundBlock.LinesAfter = new List<StringSlice>();
|
||||
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
|
||||
if (linesBefore != null)
|
||||
{
|
||||
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
|
||||
}
|
||||
|
||||
document.Add(noBlocksFoundBlock);
|
||||
}
|
||||
else if (lastBlock != null && blockProcessor.LinesBefore != null)
|
||||
|
||||
Reference in New Issue
Block a user