Fix emphasis with HTML entities (#97)

This commit is contained in:
Alexandre Mutel
2017-02-24 07:49:46 +01:00
parent a926e1a326
commit b47ae8d9ba
3 changed files with 41 additions and 1 deletions

View File

@@ -52,3 +52,16 @@ Marked text can be used to specify that a text has been marked in a document. T
.
<p><mark>Marked text</mark></p>
````````````````````````````````
## Emphasis on Html Entities
```````````````````````````````` example
This is text MyBrand ^&reg;^ and MyTrademark ^&trade;^
This is text MyBrand^&reg;^ and MyTrademark^&trade;^
This is text MyBrand~&reg;~ and MyCopyright^&copy;^
.
<p>This is text MyBrand <sup>®</sup> and MyTrademark <sup>TM</sup>
This is text MyBrand<sup>®</sup> and MyTrademark<sup>TM</sup>
This is text MyBrand<sub>®</sub> and MyCopyright<sup>©</sup></p>
````````````````````````````````

View File

@@ -17616,6 +17616,30 @@ namespace Markdig.Tests
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 5, "Extensions Marked");
TestParser.TestSpec("==Marked text==", "<p><mark>Marked text</mark></p>", "emphasisextras|advanced");
}
}
// ## Emphasis on Html Entities
[TestFixture]
public partial class TestExtensionsEmphasisonHtmlEntities
{
[Test]
public void Example006()
{
// Example 6
// Section: Extensions Emphasis on Html Entities
//
// The following CommonMark:
// This is text MyBrand ^&reg;^ and MyTrademark ^&trade;^
// This is text MyBrand^&reg;^ and MyTrademark^&trade;^
// This is text MyBrand~&reg;~ and MyCopyright^&copy;^
//
// Should be rendered as:
// <p>This is text MyBrand <sup>®</sup> and MyTrademark <sup>TM</sup>
// This is text MyBrand<sup>®</sup> and MyTrademark<sup>TM</sup>
// This is text MyBrand<sub>®</sub> and MyCopyright<sup>©</sup></p>
Console.WriteLine("Example {0}" + Environment.NewLine + "Section: {0}" + Environment.NewLine, 6, "Extensions Emphasis on Html Entities");
TestParser.TestSpec("This is text MyBrand ^&reg;^ and MyTrademark ^&trade;^\nThis is text MyBrand^&reg;^ and MyTrademark^&trade;^\nThis is text MyBrand~&reg;~ and MyCopyright^&copy;^", "<p>This is text MyBrand <sup>®</sup> and MyTrademark <sup>TM</sup>\nThis is text MyBrand<sup>®</sup> and MyTrademark<sup>TM</sup>\nThis is text MyBrand<sub>®</sub> and MyCopyright<sup>©</sup></p>", "emphasisextras|advanced");
}
}
// # Extensions
//

View File

@@ -162,6 +162,9 @@ namespace Markdig.Helpers
return IsWhitespace(c) || IsZero(c);
}
// Note that we are not considering the character & as a punctuation in HTML
// as it is used for HTML entities, print unicode, so we assume that when we have a `&`
// it is more likely followed by a valid HTML Entity that represents a non punctuation
public static void CheckUnicodeCategory(this char c, out bool space, out bool punctuation)
{
// Credits: code from CommonMark.NET
@@ -170,7 +173,7 @@ namespace Markdig.Helpers
if (c <= 'ÿ')
{
space = c == '\0' || c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085';
punctuation = c == '\0' || (c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126);
punctuation = c == '\0' || (c >= 33 && c <= 47 && c != 38) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126);
}
else
{