Add support for GFM autolinks (#165, #169)

This commit is contained in:
Alexandre Mutel
2017-11-17 21:28:27 +01:00
parent 9a30883e2a
commit 964538ec79
10 changed files with 325 additions and 82 deletions

View File

@@ -76,3 +76,67 @@ Check **http://www.a.com** or __http://www.b.com__
.
<p>Check <strong><a href="http://www.a.com">http://www.a.com</a></strong> or <strong><a href="http://www.b.com">http://www.b.com</a></strong></p>
````````````````````````````````
### GFM Support
Extract from [GFM Autolinks extensions specs](https://github.github.com/gfm/#autolinks-extension-)
```````````````````````````````` example
www.commonmark.org
.
<p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
````````````````````````````````
```````````````````````````````` example
Visit www.commonmark.org/help for more information.
.
<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>
````````````````````````````````
```````````````````````````````` example
Visit www.commonmark.org.
Visit www.commonmark.org/a.b.
.
<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>
<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>
````````````````````````````````
```````````````````````````````` example
www.google.com/search?q=Markup+(business)
(www.google.com/search?q=Markup+(business))
.
<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>
<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>
````````````````````````````````
```````````````````````````````` example
www.google.com/search?q=commonmark&hl=en
www.google.com/search?q=commonmark&hl;
.
<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>
<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;hl;</p>
````````````````````````````````
```````````````````````````````` example
www.commonmark.org/he<lp
.
<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>
````````````````````````````````
```````````````````````````````` example
http://commonmark.org
(Visit https://encrypted.google.com/search?q=Markup+(business))
Anonymous FTP is available at ftp://foo.bar.baz.
.
<p><a href="http://commonmark.org">http://commonmark.org</a></p>
<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
<p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p>
````````````````````````````````

View File

@@ -1,4 +1,4 @@
using System;
using System;
using NUnit.Framework;
namespace Markdig.Tests
@@ -20874,6 +20874,157 @@ namespace Markdig.Tests
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 7, "Extensions AutoLinks");
TestParser.TestSpec("Check **http://www.a.com** or __http://www.b.com__", "<p>Check <strong><a href=\"http://www.a.com\">http://www.a.com</a></strong> or <strong><a href=\"http://www.b.com\">http://www.b.com</a></strong></p>", "autolinks|advanced");
}
}
// ### GFM Support
//
// Extract from [GFM Autolinks extensions specs](https://github.github.com/gfm/#autolinks-extension-)
[TestFixture]
public partial class TestExtensionsAutoLinksGFMSupport
{
[Test]
public void Example008()
{
// Example 8
// Section: Extensions AutoLinks GFM Support
//
// The following CommonMark:
// www.commonmark.org
//
// Should be rendered as:
// <p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 8, "Extensions AutoLinks GFM Support");
TestParser.TestSpec("www.commonmark.org", "<p><a href=\"http://www.commonmark.org\">www.commonmark.org</a></p>", "autolinks|advanced");
}
}
[TestFixture]
public partial class TestExtensionsAutoLinksGFMSupport
{
[Test]
public void Example009()
{
// Example 9
// Section: Extensions AutoLinks GFM Support
//
// The following CommonMark:
// Visit www.commonmark.org/help for more information.
//
// Should be rendered as:
// <p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 9, "Extensions AutoLinks GFM Support");
TestParser.TestSpec("Visit www.commonmark.org/help for more information.", "<p>Visit <a href=\"http://www.commonmark.org/help\">www.commonmark.org/help</a> for more information.</p>", "autolinks|advanced");
}
}
[TestFixture]
public partial class TestExtensionsAutoLinksGFMSupport
{
[Test]
public void Example010()
{
// Example 10
// Section: Extensions AutoLinks GFM Support
//
// The following CommonMark:
// Visit www.commonmark.org.
//
// Visit www.commonmark.org/a.b.
//
// Should be rendered as:
// <p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>
// <p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 10, "Extensions AutoLinks GFM Support");
TestParser.TestSpec("Visit www.commonmark.org.\n\nVisit www.commonmark.org/a.b.", "<p>Visit <a href=\"http://www.commonmark.org\">www.commonmark.org</a>.</p>\n<p>Visit <a href=\"http://www.commonmark.org/a.b\">www.commonmark.org/a.b</a>.</p>", "autolinks|advanced");
}
}
[TestFixture]
public partial class TestExtensionsAutoLinksGFMSupport
{
[Test]
public void Example011()
{
// Example 11
// Section: Extensions AutoLinks GFM Support
//
// The following CommonMark:
// www.google.com/search?q=Markup+(business)
//
// (www.google.com/search?q=Markup+(business))
//
// Should be rendered as:
// <p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>
// <p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 11, "Extensions AutoLinks GFM Support");
TestParser.TestSpec("www.google.com/search?q=Markup+(business)\n\n(www.google.com/search?q=Markup+(business))", "<p><a href=\"http://www.google.com/search?q=Markup+(business)\">www.google.com/search?q=Markup+(business)</a></p>\n<p>(<a href=\"http://www.google.com/search?q=Markup+(business)\">www.google.com/search?q=Markup+(business)</a>)</p>", "autolinks|advanced");
}
}
[TestFixture]
public partial class TestExtensionsAutoLinksGFMSupport
{
[Test]
public void Example012()
{
// Example 12
// Section: Extensions AutoLinks GFM Support
//
// The following CommonMark:
// www.google.com/search?q=commonmark&hl=en
//
// www.google.com/search?q=commonmark&hl;
//
// Should be rendered as:
// <p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>
// <p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;hl;</p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 12, "Extensions AutoLinks GFM Support");
TestParser.TestSpec("www.google.com/search?q=commonmark&hl=en\n\nwww.google.com/search?q=commonmark&hl;", "<p><a href=\"http://www.google.com/search?q=commonmark&amp;hl=en\">www.google.com/search?q=commonmark&amp;hl=en</a></p>\n<p><a href=\"http://www.google.com/search?q=commonmark\">www.google.com/search?q=commonmark</a>&amp;hl;</p>", "autolinks|advanced");
}
}
[TestFixture]
public partial class TestExtensionsAutoLinksGFMSupport
{
[Test]
public void Example013()
{
// Example 13
// Section: Extensions AutoLinks GFM Support
//
// The following CommonMark:
// www.commonmark.org/he<lp
//
// Should be rendered as:
// <p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 13, "Extensions AutoLinks GFM Support");
TestParser.TestSpec("www.commonmark.org/he<lp", "<p><a href=\"http://www.commonmark.org/he\">www.commonmark.org/he</a>&lt;lp</p>", "autolinks|advanced");
}
}
[TestFixture]
public partial class TestExtensionsAutoLinksGFMSupport
{
[Test]
public void Example014()
{
// Example 14
// Section: Extensions AutoLinks GFM Support
//
// The following CommonMark:
// http://commonmark.org
//
// (Visit https://encrypted.google.com/search?q=Markup+(business))
//
// Anonymous FTP is available at ftp://foo.bar.baz.
//
// Should be rendered as:
// <p><a href="http://commonmark.org">http://commonmark.org</a></p>
// <p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
// <p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 14, "Extensions AutoLinks GFM Support");
TestParser.TestSpec("http://commonmark.org\n\n(Visit https://encrypted.google.com/search?q=Markup+(business))\n\nAnonymous FTP is available at ftp://foo.bar.baz.", "<p><a href=\"http://commonmark.org\">http://commonmark.org</a></p>\n<p>(Visit <a href=\"https://encrypted.google.com/search?q=Markup+(business)\">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>\n<p>Anonymous FTP is available at <a href=\"ftp://foo.bar.baz\">ftp://foo.bar.baz</a>.</p>", "autolinks|advanced");
}
}
// ## Jira Links
//

View File

@@ -1,4 +1,4 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
@@ -39,7 +39,7 @@ namespace Markdig.Tests
{
var text = new StringSlice(uri);
string link;
Assert.True(LinkHelper.TryParseUrl(ref text, out link));
Assert.True(LinkHelper.TryParseUrl(ref text, out link, true));
Assert.AreEqual("http://google.com", link);
Assert.AreEqual('.', text.CurrentChar);
}

View File

@@ -1,4 +1,4 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
@@ -30,11 +30,18 @@ namespace Markdig.Extensions.AutoLinks
};
}
private static bool IsValidPreviousCharacter(char c)
{
// All such recognized autolinks can only come at the beginning of a line, after whitespace, or any of the delimiting characters *, _, ~, and (.
return c.IsWhiteSpaceOrZero() || c == '*' || c == '_' || c == '~' || c == '(';
}
public override bool Match(InlineProcessor processor, ref StringSlice slice)
{
// Previous char must be a whitespace or a punctuation
var previousChar = slice.PeekCharExtra(-1);
if (!previousChar.IsAsciiPunctuation() && !previousChar.IsWhiteSpaceOrZero())
if (!IsValidPreviousCharacter(previousChar))
{
return false;
}
@@ -72,7 +79,7 @@ namespace Markdig.Extensions.AutoLinks
break;
case 'w':
if (!slice.MatchLowercase("ww.", 1) || previousChar == '/') // We won't match http:/www. or /www.xxx
if (!slice.MatchLowercase("ww.", 1)) // We won't match http:/www. or /www.xxx
{
return false;
}
@@ -81,7 +88,7 @@ namespace Markdig.Extensions.AutoLinks
// Parse URL
string link;
if (!LinkHelper.TryParseUrl(ref slice, out link))
if (!LinkHelper.TryParseUrl(ref slice, out link, true))
{
return false;
}

View File

@@ -1,4 +1,4 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
@@ -472,10 +472,10 @@ namespace Markdig.Helpers
}
else if (c == '&')
{
string namedEntity;
int entityNameStart;
int entityNameLength;
int numericEntity;
var match = ScanEntity(text, searchPos, text.Length - searchPos, out namedEntity,
out numericEntity);
var match = ScanEntity(new StringSlice(text, searchPos, text.Length - 1), out numericEntity, out entityNameStart, out entityNameLength);
if (match == 0)
{
searchPos++;
@@ -484,9 +484,10 @@ namespace Markdig.Helpers
{
searchPos += match;
if (namedEntity != null)
if (entityNameLength > 0)
{
var decoded = EntityHelper.DecodeEntity(namedEntity);
var namedEntity = new StringSlice(text, entityNameStart, entityNameStart + entityNameLength - 1);
var decoded = EntityHelper.DecodeEntity(namedEntity.ToString());
if (decoded != null)
{
sb.Append(text, lastPos, searchPos - match - lastPos);
@@ -533,7 +534,7 @@ namespace Markdig.Helpers
/// Scans an entity.
/// Returns number of chars matched.
/// </summary>
public static int ScanEntity(string s, int pos, int length, out string namedEntity, out int numericEntity)
public static int ScanEntity<T>(T slice, out int numericEntity, out int namedEntityStart, out int namedEntityLength) where T : ICharIterator
{
// Credits: code from CommonMark.NET
// Copyright (c) 2014, Kārlis Gaņģis All rights reserved.
@@ -545,29 +546,29 @@ namespace Markdig.Helpers
.? { return 0; }
*/
var lastPos = pos + length;
namedEntity = null;
numericEntity = 0;
namedEntityStart = 0;
namedEntityLength = 0;
if (pos + 3 >= lastPos)
return 0;
if (s[pos] != '&')
return 0;
char c;
int i;
int counter = 0;
if (s[pos + 1] == '#')
if (slice.CurrentChar != '&' || slice.PeekChar(3) == '\0')
{
c = s[pos + 2];
return 0;
}
var start = slice.Start;
char c = slice.NextChar();
int counter = 0;
if (c == '#')
{
c = slice.PeekChar();
if (c == 'x' || c == 'X')
{
c = slice.NextChar(); // skip #
// expect 1-8 hex digits starting from pos+3
for (i = pos + 3; i < lastPos; i++)
while (c != '\0')
{
c = s[i];
c = slice.NextChar();
if (c >= '0' && c <= '9')
{
if (++counter == 9) return 0;
@@ -588,7 +589,7 @@ namespace Markdig.Helpers
}
if (c == ';')
return counter == 0 ? 0 : i - pos + 1;
return counter == 0 ? 0 : slice.Start - start + 1;
return 0;
}
@@ -596,9 +597,10 @@ namespace Markdig.Helpers
else
{
// expect 1-8 digits starting from pos+2
for (i = pos + 2; i < lastPos; i++)
while (c != '\0')
{
c = s[i];
c = slice.NextChar();
if (c >= '0' && c <= '9')
{
if (++counter == 9) return 0;
@@ -607,7 +609,7 @@ namespace Markdig.Helpers
}
if (c == ';')
return counter == 0 ? 0 : i - pos + 1;
return counter == 0 ? 0 : slice.Start - start + 1;
return 0;
}
@@ -616,25 +618,26 @@ namespace Markdig.Helpers
else
{
// expect a letter and 1-31 letters or digits
c = s[pos + 1];
if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')))
return 0;
for (i = pos + 2; i < lastPos; i++)
namedEntityStart = slice.Start;
namedEntityLength++;
while (c != '\0')
{
c = s[i];
c = slice.NextChar();
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
{
if (++counter == 32)
return 0;
namedEntityLength++;
continue;
}
if (c == ';')
{
namedEntity = s.Substring(pos + 1, counter + 1);
return counter == 0 ? 0 : i - pos + 1;
return counter == 0 ? 0 : slice.Start - start + 1;
}
return 0;

View File

@@ -1,4 +1,4 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
@@ -34,8 +34,9 @@ namespace Markdig.Helpers
/// <summary>
/// Peeks at the next character, without incrementing the <see cref="Start"/> position.
/// </summary>
/// <param name="offset"></param>
/// <returns>The next character. `\0` is end of the iteration.</returns>
char PeekChar();
char PeekChar(int offset = 1);
/// <summary>
/// Gets a value indicating whether this instance is empty.

View File

@@ -4,6 +4,7 @@
using System;
using System.Runtime.CompilerServices;
using System.Text;
using Markdig.Parsers.Inlines;
using Markdig.Syntax;
namespace Markdig.Helpers
@@ -513,7 +514,7 @@ namespace Markdig.Helpers
return TryParseUrl(ref text, out link);
}
public static bool TryParseUrl<T>(ref T text, out string link) where T : ICharIterator
public static bool TryParseUrl<T>(ref T text, out string link, bool isAutoLink = false) where T : ICharIterator
{
bool isValid = false;
var buffer = StringBuilderCache.Local();
@@ -610,16 +611,30 @@ namespace Markdig.Helpers
hasEscape = false;
if (IsEndOfUri(c))
if (IsEndOfUri(c, isAutoLink))
{
isValid = true;
break;
}
if (c == '.' && IsEndOfUri(text.PeekChar()))
if (isAutoLink)
{
isValid = true;
break;
if (c == '&')
{
int entityNameStart;
int entityNameLength;
int entityValue;
if (HtmlHelper.ScanEntity(text, out entityValue, out entityNameStart, out entityNameLength) > 0)
{
isValid = true;
break;
}
}
if (IsTrailingUrlStopCharacter(c) && IsEndOfUri(text.PeekChar(), true))
{
isValid = true;
break;
}
}
buffer.Append(c);
@@ -638,9 +653,17 @@ namespace Markdig.Helpers
return isValid;
}
private static bool IsEndOfUri(char c)
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
private static bool IsTrailingUrlStopCharacter(char c)
{
return c == '\0' || c.IsSpaceOrTab() || c.IsControl(); // TODO: specs unclear. space is strict or relaxed? (includes tabs?)
// Trailing punctuation (specifically, ?, !, ., ,, :, *, _, and ~) will not be considered part of the autolink, though they may be included in the interior of the link:
return c == '?' || c == '!' || c == '.' || c == ',' || c == ':' || c == '*' || c == '*' || c == '_' || c == '~';
}
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
private static bool IsEndOfUri(char c, bool isAutoLink)
{
return c == '\0' || c.IsSpaceOrTab() || c.IsControl() || (isAutoLink && c == '<'); // TODO: specs unclear. space is strict or relaxed? (includes tabs?)
}
public static bool TryParseLinkReferenceDefinition<T>(T text, out string label, out string url,

View File

@@ -1,4 +1,4 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
@@ -197,14 +197,14 @@ namespace Markdig.Helpers
/// <seealso cref="ICharIterator" />
public struct Iterator : ICharIterator
{
private readonly StringLineGroup lines;
private int offset;
private readonly StringLineGroup _lines;
private int _offset;
public Iterator(StringLineGroup lines)
{
this.lines = lines;
this._lines = lines;
Start = -1;
offset = -1;
_offset = -1;
SliceIndex = 0;
CurrentChar = '\0';
End = -2;
@@ -228,45 +228,47 @@ namespace Markdig.Helpers
public char NextChar()
{
Start++;
offset++;
_offset++;
if (Start <= End)
{
var slice = (StringSlice)lines.Lines[SliceIndex];
if (offset < slice.Length)
var slice = (StringSlice)_lines.Lines[SliceIndex];
if (_offset < slice.Length)
{
CurrentChar = slice[slice.Start + offset];
CurrentChar = slice[slice.Start + _offset];
}
else
{
CurrentChar = '\n';
SliceIndex++;
offset = -1;
_offset = -1;
}
}
else
{
CurrentChar = '\0';
Start = End + 1;
SliceIndex = lines.Count;
offset--;
SliceIndex = _lines.Count;
_offset--;
}
return CurrentChar;
}
public char PeekChar()
public char PeekChar(int offset = 1)
{
if (Start + 1 > End)
if (offset < 0) throw new ArgumentOutOfRangeException("Negative offset are not supported for StringLineGroup", nameof(offset));
if (Start + offset > End)
{
return '\0';
}
var slice = (StringSlice)lines.Lines[SliceIndex];
if (offset + 1 >= slice.Length)
var slice = (StringSlice)_lines.Lines[SliceIndex];
if (_offset + offset >= slice.Length)
{
return '\n';
}
return slice[slice.Start + offset + 1];
return slice[slice.Start + _offset + offset];
}
public bool TrimStart()

View File

@@ -1,4 +1,4 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
@@ -112,17 +112,6 @@ namespace Markdig.Helpers
return index >= Start && index <= End ? Text[index] : (char) 0;
}
/// <summary>
/// Peeks the character immediately after the current <see cref="Start"/> position
/// or returns `\0` if after the <see cref="End"/> position.
/// </summary>
/// <returns>The next character, returns `\0` if none.</returns>
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)]
public char PeekChar()
{
return PeekChar(1);
}
/// <summary>
/// Peeks a character at the specified offset from the current beginning of the string, without taking into account <see cref="Start"/> and <see cref="End"/>
/// </summary>

View File

@@ -1,6 +1,8 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.
using System.Text;
using Markdig.Helpers;
using Markdig.Syntax;
using Markdig.Syntax.Inlines;
@@ -25,17 +27,18 @@ namespace Markdig.Parsers.Inlines
public static bool TryParse(ref StringSlice slice, out string literal, out int match)
{
literal = null;
string entityName;
int entityNameStart;
int entityNameLength;
int entityValue;
match = HtmlHelper.ScanEntity(slice.Text, slice.Start, slice.Length, out entityName, out entityValue);
match = HtmlHelper.ScanEntity(slice, out entityValue, out entityNameStart, out entityNameLength);
if (match == 0)
{
return false;
}
if (entityName != null)
if (entityNameLength > 0)
{
literal = EntityHelper.DecodeEntity(entityName);
literal = EntityHelper.DecodeEntity(new StringSlice(slice.Text, entityNameStart, entityNameStart + entityNameLength - 1).ToString());
}
else if (entityValue >= 0)
{