mirror of
https://github.com/xoofx/markdig.git
synced 2026-02-04 05:44:50 +00:00
fix escaped characters in LinkInline
This commit is contained in:
@@ -63,4 +63,4 @@ In order:
|
||||
- Newline struct itself
|
||||
- handling newlines
|
||||
- should newlines be supported?
|
||||
- Example 207, 209: Special-casing certain edgecases
|
||||
- Example 207, 209, 291: Special-casing certain edgecases
|
||||
@@ -418,7 +418,9 @@ namespace Markdig.Helpers
|
||||
public static bool TryParseInlineLinkWhitespace(
|
||||
ref StringSlice text,
|
||||
out string link,
|
||||
out string unescapedLink,
|
||||
out string title,
|
||||
out string unescapedTitle,
|
||||
out char titleEnclosingCharacter,
|
||||
out SourceSpan linkSpan,
|
||||
out SourceSpan titleSpan,
|
||||
@@ -436,7 +438,9 @@ namespace Markdig.Helpers
|
||||
bool isValid = false;
|
||||
var c = text.CurrentChar;
|
||||
link = null;
|
||||
unescapedLink = null;
|
||||
title = null;
|
||||
unescapedTitle = null;
|
||||
|
||||
linkSpan = SourceSpan.Empty;
|
||||
titleSpan = SourceSpan.Empty;
|
||||
@@ -454,7 +458,7 @@ namespace Markdig.Helpers
|
||||
text.TrimStart();
|
||||
whitespaceBeforeLink = new SourceSpan(sourcePosition, text.Start - 1);
|
||||
var pos = text.Start;
|
||||
if (TryParseUrl(ref text, out link, out urlHasPointyBrackets))
|
||||
if (TryParseUrlWhitespace(ref text, out link, out unescapedLink, out urlHasPointyBrackets))
|
||||
{
|
||||
linkSpan.Start = pos;
|
||||
linkSpan.End = text.Start - 1;
|
||||
@@ -482,7 +486,7 @@ namespace Markdig.Helpers
|
||||
{
|
||||
isValid = true;
|
||||
}
|
||||
else if (TryParseTitle(ref text, out title, out titleEnclosingCharacter))
|
||||
else if (TryParseTitleWhitespace(ref text, out title, out unescapedTitle, out titleEnclosingCharacter))
|
||||
{
|
||||
titleSpan.Start = pos;
|
||||
titleSpan.End = text.Start - 1;
|
||||
@@ -612,6 +616,108 @@ namespace Markdig.Helpers
|
||||
return isValid;
|
||||
}
|
||||
|
||||
public static bool TryParseTitleWhitespace<T>(ref T text, out string title, out string unescapedTitle, out char enclosingCharacter) where T : ICharIterator
|
||||
{
|
||||
bool isValid = false;
|
||||
var buffer = StringBuilderCache.Local();
|
||||
var unescaped = new StringBuilder();
|
||||
enclosingCharacter = '\0';
|
||||
|
||||
// a sequence of zero or more characters between straight double-quote characters ("), including a " character only if it is backslash-escaped, or
|
||||
// a sequence of zero or more characters between straight single-quote characters ('), including a ' character only if it is backslash-escaped, or
|
||||
var c = text.CurrentChar;
|
||||
if (c == '\'' || c == '"' || c == '(')
|
||||
{
|
||||
enclosingCharacter = c;
|
||||
var closingQuote = c == '(' ? ')' : c;
|
||||
bool hasEscape = false;
|
||||
// -1: undefined
|
||||
// 0: has only spaces
|
||||
// 1: has other characters
|
||||
int hasOnlyWhiteSpacesSinceLastLine = -1;
|
||||
while (true)
|
||||
{
|
||||
c = text.NextChar();
|
||||
|
||||
if (c == '\r' || c == '\n')
|
||||
{
|
||||
if (hasOnlyWhiteSpacesSinceLastLine >= 0)
|
||||
{
|
||||
if (hasOnlyWhiteSpacesSinceLastLine == 1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
hasOnlyWhiteSpacesSinceLastLine = -1;
|
||||
}
|
||||
buffer.Append(c);
|
||||
unescaped.Append(c);
|
||||
if (c == '\r' && text.PeekChar() == '\n')
|
||||
{
|
||||
buffer.Append('\n');
|
||||
unescaped.Append(c);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '\0')
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == closingQuote)
|
||||
{
|
||||
if (hasEscape)
|
||||
{
|
||||
buffer.Append(closingQuote);
|
||||
unescaped.Append(closingQuote);
|
||||
hasEscape = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip last quote
|
||||
text.NextChar();
|
||||
isValid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasEscape && !c.IsAsciiPunctuation())
|
||||
{
|
||||
buffer.Append('\\');
|
||||
unescaped.Append('\\');
|
||||
}
|
||||
|
||||
if (c == '\\')
|
||||
{
|
||||
hasEscape = true;
|
||||
unescaped.Append('\\');
|
||||
continue;
|
||||
}
|
||||
|
||||
hasEscape = false;
|
||||
|
||||
if (c.IsSpaceOrTab())
|
||||
{
|
||||
if (hasOnlyWhiteSpacesSinceLastLine < 0)
|
||||
{
|
||||
hasOnlyWhiteSpacesSinceLastLine = 1;
|
||||
}
|
||||
}
|
||||
else if (c != '\n' && c != '\r' && (c != '\r' && text.PeekChar() != '\n'))
|
||||
{
|
||||
hasOnlyWhiteSpacesSinceLastLine = 0;
|
||||
}
|
||||
|
||||
buffer.Append(c);
|
||||
unescaped.Append(c);
|
||||
}
|
||||
}
|
||||
|
||||
title = isValid ? buffer.ToString() : null;
|
||||
unescapedTitle = isValid ? unescaped.ToString() : null;
|
||||
buffer.Length = 0;
|
||||
return isValid;
|
||||
}
|
||||
|
||||
public static bool TryParseUrl<T>(T text, out string link) where T : ICharIterator
|
||||
{
|
||||
return TryParseUrl(ref text, out link, out _);
|
||||
@@ -757,6 +863,155 @@ namespace Markdig.Helpers
|
||||
return isValid;
|
||||
}
|
||||
|
||||
public static bool TryParseUrlWhitespace<T>(ref T text, out string link, out string unescapedLink, out bool hasPointyBrackets, bool isAutoLink = false) where T : ICharIterator
|
||||
{
|
||||
bool isValid = false;
|
||||
hasPointyBrackets = false;
|
||||
var buffer = StringBuilderCache.Local();
|
||||
var unescaped = new StringBuilder();
|
||||
unescapedLink = null;
|
||||
|
||||
var c = text.CurrentChar;
|
||||
|
||||
// a sequence of zero or more characters between an opening < and a closing >
|
||||
// that contains no line breaks, or unescaped < or > characters, or
|
||||
if (c == '<')
|
||||
{
|
||||
bool hasEscape = false;
|
||||
do
|
||||
{
|
||||
c = text.NextChar();
|
||||
if (!hasEscape && c == '>')
|
||||
{
|
||||
text.NextChar();
|
||||
hasPointyBrackets = true;
|
||||
isValid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!hasEscape && c == '<')
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasEscape && !c.IsAsciiPunctuation())
|
||||
{
|
||||
buffer.Append('\\');
|
||||
unescaped.Append('\\');
|
||||
}
|
||||
|
||||
if (c == '\\')
|
||||
{
|
||||
hasEscape = true;
|
||||
unescaped.Append('\\');
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c.IsNewLine())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
hasEscape = false;
|
||||
|
||||
buffer.Append(c);
|
||||
unescaped.Append(c);
|
||||
|
||||
} while (c != '\0');
|
||||
}
|
||||
else
|
||||
{
|
||||
// a nonempty sequence of characters that does not start with <, does not include ASCII space or control characters,
|
||||
// and includes parentheses only if (a) they are backslash-escaped or (b) they are part of a
|
||||
// balanced pair of unescaped parentheses that is not itself inside a balanced pair of unescaped
|
||||
// parentheses.
|
||||
bool hasEscape = false;
|
||||
int openedParent = 0;
|
||||
while (true)
|
||||
{
|
||||
// Match opening and closing parenthesis
|
||||
if (c == '(')
|
||||
{
|
||||
if (!hasEscape)
|
||||
{
|
||||
openedParent++;
|
||||
}
|
||||
}
|
||||
|
||||
if (c == ')')
|
||||
{
|
||||
if (!hasEscape)
|
||||
{
|
||||
openedParent--;
|
||||
if (openedParent < 0)
|
||||
{
|
||||
isValid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isAutoLink)
|
||||
{
|
||||
if (hasEscape && !c.IsAsciiPunctuation())
|
||||
{
|
||||
buffer.Append('\\');
|
||||
unescaped.Append('\\');
|
||||
}
|
||||
|
||||
// If we have an escape
|
||||
if (c == '\\')
|
||||
{
|
||||
hasEscape = true;
|
||||
c = text.NextChar();
|
||||
unescaped.Append('\\');
|
||||
continue;
|
||||
}
|
||||
|
||||
hasEscape = false;
|
||||
}
|
||||
|
||||
if (IsEndOfUri(c, isAutoLink))
|
||||
{
|
||||
isValid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (isAutoLink)
|
||||
{
|
||||
if (c == '&')
|
||||
{
|
||||
if (HtmlHelper.ScanEntity(text, out _, out _, out _) > 0)
|
||||
{
|
||||
isValid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (IsTrailingUrlStopCharacter(c) && IsEndOfUri(text.PeekChar(), true))
|
||||
{
|
||||
isValid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
buffer.Append(c);
|
||||
unescaped.Append(c);
|
||||
|
||||
c = text.NextChar();
|
||||
}
|
||||
|
||||
if (openedParent > 0)
|
||||
{
|
||||
isValid = false;
|
||||
}
|
||||
}
|
||||
|
||||
link = isValid ? buffer.ToString() : null;
|
||||
unescapedLink = isValid ? unescaped.ToString() : null;
|
||||
buffer.Length = 0;
|
||||
return isValid;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static bool IsTrailingUrlStopCharacter(char c)
|
||||
{
|
||||
|
||||
@@ -223,7 +223,9 @@ namespace Markdig.Parsers.Inlines
|
||||
if (LinkHelper.TryParseInlineLinkWhitespace(
|
||||
ref text,
|
||||
out string url,
|
||||
out string unescapedUrl,
|
||||
out string title,
|
||||
out string unescapedTitle,
|
||||
out char titleEnclosingCharacter,
|
||||
out SourceSpan linkSpan,
|
||||
out SourceSpan titleSpan,
|
||||
@@ -240,9 +242,11 @@ namespace Markdig.Parsers.Inlines
|
||||
{
|
||||
WhitespaceBeforeUrl = wsBeforeLink,
|
||||
Url = HtmlHelper.Unescape(url), // TODO: RTP: unescape
|
||||
UnescapedUrl = unescapedUrl, // TODO: RTP: unescape
|
||||
UrlHasPointyBrackets = urlHasPointyBrackets,
|
||||
WhitespaceAfterUrl = wsAfterLink,
|
||||
Title = HtmlHelper.Unescape(title), // TODO: RTP: unescape
|
||||
UnescapedTitle = unescapedTitle,
|
||||
TitleEnclosingCharacter = titleEnclosingCharacter,
|
||||
WhitespaceAfterTitle = wsAfterTitle,
|
||||
IsImage = openParent.IsImage,
|
||||
|
||||
@@ -58,14 +58,14 @@ namespace Markdig.Renderers.Normalize.Inlines
|
||||
{
|
||||
renderer.Write('<');
|
||||
}
|
||||
renderer.Write(link.Url);
|
||||
renderer.Write(link.UnescapedUrl);
|
||||
if (link.UrlHasPointyBrackets)
|
||||
{
|
||||
renderer.Write('>');
|
||||
}
|
||||
renderer.Write(link.WhitespaceAfterUrl);
|
||||
|
||||
if (!string.IsNullOrEmpty(link.Title))
|
||||
if (!string.IsNullOrEmpty(link.UnescapedTitle))
|
||||
{
|
||||
var open = link.TitleEnclosingCharacter;
|
||||
var close = link.TitleEnclosingCharacter;
|
||||
@@ -74,7 +74,8 @@ namespace Markdig.Renderers.Normalize.Inlines
|
||||
close = ')';
|
||||
}
|
||||
renderer.Write(open);
|
||||
renderer.Write(link.Title.Replace(@"""", @"\""")); // TODO: RTP: should this always be done?
|
||||
//renderer.Write(link.Title.Replace(@"""", @"\""")); // TODO: RTP: should this always be done?
|
||||
renderer.Write(link.UnescapedTitle);
|
||||
renderer.Write(close);
|
||||
renderer.Write(link.WhitespaceAfterTitle);
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@ namespace Markdig.Syntax.Inlines
|
||||
/// Gets or sets the URL.
|
||||
/// </summary>
|
||||
public string Url { get; set; }
|
||||
public string UnescapedUrl { get; internal set; }
|
||||
|
||||
public bool UrlHasPointyBrackets { get; set; }
|
||||
|
||||
@@ -60,6 +61,7 @@ namespace Markdig.Syntax.Inlines
|
||||
/// Gets or sets the title.
|
||||
/// </summary>
|
||||
public string Title { get; set; }
|
||||
public string UnescapedTitle { get; internal set; }
|
||||
|
||||
public char TitleEnclosingCharacter { get; set; }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user