mirror of
https://github.com/xoofx/markdig.git
synced 2026-04-05 21:10:13 +00:00
Add CJK-friendly Emphasis Extension (#921)
* Add CJK-friendly Emphasis Extension * Add auto-generated test file * Add name for configuration * Remove useless default value assignments Co-authored-by: Miha Zupan <mihazupan.zupan1@gmail.com> * Make `CheckOpenCloseDelimiterCjkFriendly` internal only * Remove `CjkFriendlyEmphasisExtension` class * Add some comments including links * Add direct tests on `CharHelper.CheckOpenCloseDelimiterCjkFriendly` * Fix generated tests * Add `#if NET` * Skip Rune-dependent tests in .NET Standard tests * Add missing XML Documentation Comments * Fix URL * Change test condition * Add test in .NET Framework 4.8.1 * Add netstandard2.0 to SpecFileGen * Add fallback for netstandard2.0 * Fix * Revert "Fix" This reverts commit42e998b085. * Revert "Add fallback for netstandard2.0" This reverts commit7400a7bb0b. * Revert "Add netstandard2.0 to SpecFileGen" This reverts commitf9aa8e1e8d. * Revert "Add test in .NET Framework 4.8.1" This reverts commitd8d6d516ed. * Fix missing indent --------- Co-authored-by: Miha Zupan <mihazupan.zupan1@gmail.com>
This commit is contained in:
4
.github/workflows/test-netstandard.yml
vendored
4
.github/workflows/test-netstandard.yml
vendored
@@ -35,10 +35,10 @@ jobs:
|
||||
|
||||
- name: Test Debug
|
||||
run: |
|
||||
dotnet build src/Markdig.Tests/Markdig.Tests.csproj -c Debug --no-restore
|
||||
dotnet build src/Markdig.Tests/Markdig.Tests.csproj -c Debug --no-restore -p:MarkdigNoRuneTests=true
|
||||
dotnet test src/Markdig.Tests/Markdig.Tests.csproj -c Debug --no-build
|
||||
|
||||
- name: Test Release
|
||||
run: |
|
||||
dotnet build src/Markdig.Tests/Markdig.Tests.csproj -c Release --no-restore
|
||||
dotnet build src/Markdig.Tests/Markdig.Tests.csproj -c Release --no-restore -p:MarkdigNoRuneTests=true
|
||||
dotnet test src/Markdig.Tests/Markdig.Tests.csproj -c Release --no-build
|
||||
|
||||
@@ -51,6 +51,7 @@ You can **try Markdig online** and compare it to other implementations on [babel
|
||||
- [**Diagrams**](src/Markdig.Tests/Specs/DiagramsSpecs.md) extension whenever a fenced code block contains a special keyword, it will be converted to a div block with the content as-is (currently, supports [`mermaid`](https://mermaid.js.org) and [`nomnoml`](https://github.com/skanaar/nomnoml) diagrams)
|
||||
- [**YAML Front Matter**](src/Markdig.Tests/Specs/YamlSpecs.md) to parse without evaluating the front matter and to discard it from the HTML output (typically used for previewing without the front matter in MarkdownEditor)
|
||||
- [**JIRA links**](src/Markdig.Tests/Specs/JiraLinks.md) to automatically generate links for JIRA project references (Thanks to @clarkd: https://github.com/clarkd/MarkdigJiraLinker)
|
||||
- [**CJK-friendly Emphasis**](src/Markdig.Tests/Specs/CJKFriendlyEmphasis.md) to mitigate a CommonMark specification issue in CJK languages (Thanks to @tats-u: https://github.com/tats-u/markdown-cjk-friendly)
|
||||
- Starting with Markdig version `0.20.0+`, Markdig is compatible only with `NETStandard 2.0`, `NETStandard 2.1`, `NETCoreApp 2.1` and `NETCoreApp 3.1`.
|
||||
|
||||
If you are looking for support for an old .NET Framework 3.5 or 4.0, you can download Markdig `0.18.3`.
|
||||
|
||||
@@ -12,6 +12,10 @@
|
||||
<NoWarn>$(NoWarn);NETSDK1138</NoWarn>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(MarkdigNoRuneTests)' == 'true'">
|
||||
<DefineConstants>$(DefineConstants);MARKDIG_NO_RUNE_TESTS</DefineConstants>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" />
|
||||
<PackageReference Include="NUnit" />
|
||||
|
||||
67
src/Markdig.Tests/Specs/CJKFriendlyEmphasis.generated.cs
Normal file
67
src/Markdig.Tests/Specs/CJKFriendlyEmphasis.generated.cs
Normal file
@@ -0,0 +1,67 @@
|
||||
|
||||
// --------------------------------
|
||||
// CJK-friendly Emphasis
|
||||
// --------------------------------
|
||||
|
||||
using System;
|
||||
using NUnit.Framework;
|
||||
|
||||
namespace Markdig.Tests.Specs.CJKFriendlyEmphasis
|
||||
{
|
||||
[TestFixture]
|
||||
public class TestCJKFriendlyEmphasisExtension
|
||||
{
|
||||
// ## CJK-friendly Emphasis Extension
|
||||
//
|
||||
// See https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md for details about the spec of this extension.
|
||||
//
|
||||
// This extension drastically mitigates [the long-standing issue (specification flaw)](https://github.com/commonmark/commonmark-spec/issues/650) in CommonMark that emphasis in CJK languages is often not parsed as expected.
|
||||
//
|
||||
// The plain CommonMark cannot recognize even the following emphasis in CJK languages:
|
||||
[Test]
|
||||
public void CJKFriendlyEmphasisExtension_Example001()
|
||||
{
|
||||
// Example 1
|
||||
// Section: CJK-friendly Emphasis Extension
|
||||
//
|
||||
// The following Markdown:
|
||||
// **この文を強調できますか(Can I emphasize this sentence)?**残念ながらこの文のせいでできません(Unfortunately not possible due to this sentence)。
|
||||
//
|
||||
// Should be rendered as:
|
||||
// <p><strong>この文を強調できますか(Can I emphasize this sentence)?</strong>残念ながらこの文のせいでできません(Unfortunately not possible due to this sentence)。</p>
|
||||
|
||||
TestParser.TestSpec("**この文を強調できますか(Can I emphasize this sentence)?**残念ながらこの文のせいでできません(Unfortunately not possible due to this sentence)。", "<p><strong>この文を強調できますか(Can I emphasize this sentence)?</strong>残念ながらこの文のせいでできません(Unfortunately not possible due to this sentence)。</p>", "cjk-friendly-emphasis", context: "Example 1\nSection CJK-friendly Emphasis Extension\n");
|
||||
}
|
||||
|
||||
// ````````````````````````````````` example
|
||||
// 我可以强调**这个`code`**吗(Can I emphasize **this `code`**)?
|
||||
// .
|
||||
// <p>我可以强调<code>这个`code`</code>吗(Can I emphasize <strong>this <code>code</code></strong>)?</p>
|
||||
// `````````````````````````````````
|
||||
[Test]
|
||||
public void CJKFriendlyEmphasisExtension_Example002()
|
||||
{
|
||||
// Example 2
|
||||
// Section: CJK-friendly Emphasis Extension
|
||||
//
|
||||
// The following Markdown:
|
||||
// **이 용어(This term)**를 강조해 주세요. (Please emphasize **this term**.)
|
||||
//
|
||||
// Should be rendered as:
|
||||
// <p><strong>이 용어(This term)</strong>를 강조해 주세요. (Please emphasize <strong>this term</strong>.)</p>
|
||||
|
||||
TestParser.TestSpec("**이 용어(This term)**를 강조해 주세요. (Please emphasize **this term**.)", "<p><strong>이 용어(This term)</strong>를 강조해 주세요. (Please emphasize <strong>this term</strong>.)</p>", "cjk-friendly-emphasis", context: "Example 2\nSection CJK-friendly Emphasis Extension\n");
|
||||
}
|
||||
// You can compare the results with and without this extension: https://tats-u.github.io/markdown-cjk-friendly/?sc8=KirjgZPjga7mlofjgpLlvLfoqr_jgafjgY3jgb7jgZnjgYvvvIhDYW4gSSBlbXBoYXNpemUgdGhpcyBzZW50ZW5jZe-8ie-8nyoq5q6L5b-144Gq44GM44KJ44GT44Gu5paH44Gu44Gb44GE44Gn44Gn44GN44G-44Gb44KT77yIVW5mb3J0dW5hdGVseSBub3QgcG9zc2libGUgZHVlIHRvIHRoaXMgc2VudGVuY2XvvInjgIIKCuaIkeWPr-S7peW8uuiwgyoq6L-Z5LiqYGNvZGVgKirlkJfvvIhDYW4gSSBlbXBoYXNpemUgKip0aGlzIGBjb2RlYCoq77yJ77yfCgoqKuydtCDsmqnslrQoVGhpcyB0ZXJtKSoq66W8IOqwleyhsO2VtCDso7zshLjsmpQuIChQbGVhc2UgZW1waGFzaXplICoqdGhpcyB0ZXJtKiouKQo&gfm=1&engine=markdown-it
|
||||
//
|
||||
// You will find how poor the plain CommonMark is for CJK languages.
|
||||
//
|
||||
// To use this extension, configure the pipeline as follows:
|
||||
//
|
||||
// ```csharp
|
||||
// var pipeline = new MarkdownPipelineBuilder()
|
||||
// .UseCJKFriendlyEmphasis() // Add this
|
||||
// .Build();
|
||||
// ```
|
||||
}
|
||||
}
|
||||
37
src/Markdig.Tests/Specs/CJKFriendlyEmphasis.md
Normal file
37
src/Markdig.Tests/Specs/CJKFriendlyEmphasis.md
Normal file
@@ -0,0 +1,37 @@
|
||||
## CJK-friendly Emphasis Extension
|
||||
|
||||
See https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md for details about the spec of this extension.
|
||||
|
||||
This extension drastically mitigates [the long-standing issue (specification flaw)](https://github.com/commonmark/commonmark-spec/issues/650) in CommonMark that emphasis in CJK languages is often not parsed as expected.
|
||||
|
||||
The plain CommonMark cannot recognize even the following emphasis in CJK languages:
|
||||
|
||||
```````````````````````````````` example
|
||||
**この文を強調できますか(Can I emphasize this sentence)?**残念ながらこの文のせいでできません(Unfortunately not possible due to this sentence)。
|
||||
.
|
||||
<p><strong>この文を強調できますか(Can I emphasize this sentence)?</strong>残念ながらこの文のせいでできません(Unfortunately not possible due to this sentence)。</p>
|
||||
````````````````````````````````
|
||||
|
||||
````````````````````````````````` example
|
||||
我可以强调**这个`code`**吗(Can I emphasize **this `code`**)?
|
||||
.
|
||||
<p>我可以强调<code>这个`code`</code>吗(Can I emphasize <strong>this <code>code</code></strong>)?</p>
|
||||
`````````````````````````````````
|
||||
|
||||
```````````````````````````````` example
|
||||
**이 용어(This term)**를 강조해 주세요. (Please emphasize **this term**.)
|
||||
.
|
||||
<p><strong>이 용어(This term)</strong>를 강조해 주세요. (Please emphasize <strong>this term</strong>.)</p>
|
||||
````````````````````````````````
|
||||
|
||||
You can compare the results with and without this extension: https://tats-u.github.io/markdown-cjk-friendly/?sc8=KirjgZPjga7mlofjgpLlvLfoqr_jgafjgY3jgb7jgZnjgYvvvIhDYW4gSSBlbXBoYXNpemUgdGhpcyBzZW50ZW5jZe-8ie-8nyoq5q6L5b-144Gq44GM44KJ44GT44Gu5paH44Gu44Gb44GE44Gn44Gn44GN44G-44Gb44KT77yIVW5mb3J0dW5hdGVseSBub3QgcG9zc2libGUgZHVlIHRvIHRoaXMgc2VudGVuY2XvvInjgIIKCuaIkeWPr-S7peW8uuiwgyoq6L-Z5LiqYGNvZGVgKirlkJfvvIhDYW4gSSBlbXBoYXNpemUgKip0aGlzIGBjb2RlYCoq77yJ77yfCgoqKuydtCDsmqnslrQoVGhpcyB0ZXJtKSoq66W8IOqwleyhsO2VtCDso7zshLjsmpQuIChQbGVhc2UgZW1waGFzaXplICoqdGhpcyB0ZXJtKiouKQo&gfm=1&engine=markdown-it
|
||||
|
||||
You will find how poor the plain CommonMark is for CJK languages.
|
||||
|
||||
To use this extension, configure the pipeline as follows:
|
||||
|
||||
```csharp
|
||||
var pipeline = new MarkdownPipelineBuilder()
|
||||
.UseCJKFriendlyEmphasis() // Add this
|
||||
.Build();
|
||||
```
|
||||
@@ -32,5 +32,6 @@ You will find from the following links the supported extensions in markdig and t
|
||||
- [**Diagrams**](DiagramsSpecs.md)
|
||||
- [**YAML frontmatter**](YamlSpecs.md)
|
||||
- [**JIRA links**](JiraLinks.md)
|
||||
- [**CJK-friendly Emphasis**](CJKFriendlyEmphasis.md)
|
||||
|
||||
> Notice that the links above are not yet the final documentation but are "specification" files used for testing the correctness of markdig for each extension
|
||||
204
src/Markdig.Tests/TestCjkFriendlyEmphasis.cs
Normal file
204
src/Markdig.Tests/TestCjkFriendlyEmphasis.cs
Normal file
@@ -0,0 +1,204 @@
|
||||
// Copyright (c) Alexandre Mutel. All rights reserved.
|
||||
// This file is licensed under the BSD-Clause 2 license.
|
||||
// See the license.txt file in the project root for more information.
|
||||
|
||||
using Markdig.Helpers;
|
||||
using System;
|
||||
using System.Buffers;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
#if !NET || !MARKDIG_NO_RUNE_TESTS
|
||||
using System.Text;
|
||||
#endif
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Markdig.Tests
|
||||
{
|
||||
[TestFixture]
|
||||
public class TestCjkFriendlyEmphasis
|
||||
{
|
||||
private static MarkdownPipeline GetPipeline() => new MarkdownPipelineBuilder().UseCjkFriendlyEmphasis().Build();
|
||||
|
||||
private static MarkdownPipeline GetPipelineWithStrikethrough() => new MarkdownPipelineBuilder()
|
||||
.UseCjkFriendlyEmphasis()
|
||||
.UseEmphasisExtras()
|
||||
.Build();
|
||||
|
||||
[Test]
|
||||
[TestCase("これは**私のやりたかったこと。**だからするの。", "<p>これは<strong>私のやりたかったこと。</strong>だからするの。</p>\n")]
|
||||
[TestCase("**[製品ほげ](./product-foo)**と**[製品ふが](./product-bar)**をお試しください", "<p><strong><a href=\"./product-foo\">製品ほげ</a></strong>と<strong><a href=\"./product-bar\">製品ふが</a></strong>をお試しください</p>\n")]
|
||||
[TestCase("先頭の**`コード`も注意。**", "<p>先頭の<strong><code>コード</code>も注意。</strong></p>\n")]
|
||||
[TestCase("**末尾の`コード`**も注意。", "<p><strong>末尾の<code>コード</code></strong>も注意。</p>\n")]
|
||||
[TestCase("税込**¥10,000**で入手できます。", "<p>税込<strong>¥10,000</strong>で入手できます。</p>\n")]
|
||||
[TestCase("""太郎は**"こんにちわ"**といった""", "<p>太郎は<strong>"こんにちわ"</strong>といった</p>\n")]
|
||||
[TestCase("**C#**や**F#**は**「.NET」**というプラットフォーム上で動作します。", "<p><strong>C#</strong>や<strong>F#</strong>は<strong>「.NET」</strong>というプラットフォーム上で動作します。</p>\n")]
|
||||
[TestCase(".NET**(.NET Frameworkは不可)**では、", "<p>.NET<strong>(.NET Frameworkは不可)</strong>では、</p>\n")]
|
||||
[TestCase("大塚︀**(U+585A U+FE00)** 大塚**(U+FA10)**", "<p>大塚︀<strong>(U+585A U+FE00)</strong> 大塚<strong>(U+FA10)</strong></p>\n")]
|
||||
[TestCase("〽︎**(庵点)**は、\n\n","<p>〽︎<strong>(庵点)</strong>は、</p>\n")]
|
||||
[TestCase("**true。︁**false\n\n", "<p><strong>true。︁</strong>false</p>\n")]
|
||||
[TestCase("禰󠄀**(ね)**豆子", "<p>禰󠄀<strong>(ね)</strong>豆子</p>\n")]
|
||||
public void TestCjkFriendlyEmphasisJapanese(string source, string expected)
|
||||
{
|
||||
var pipeline = GetPipeline();
|
||||
var actual = Markdown.ToHtml(source, pipeline);
|
||||
Assert.AreEqual(expected, actual);
|
||||
}
|
||||
|
||||
[Test]
|
||||
[TestCase("**이 [링크](https://example.kr/)**만을 강조하고 싶다.", "<p><strong>이 <a href=\"https://example.kr/\">링크</a></strong>만을 강조하고 싶다.</p>\n")]
|
||||
[TestCase("**스크립트(script)**라고", "<p><strong>스크립트(script)</strong>라고</p>\n")]
|
||||
[TestCase("패키지를 발행하려면 **`npm publish`**를 실행하십시오.", "<p>패키지를 발행하려면 <strong><code>npm publish</code></strong>를 실행하십시오.</p>\n")]
|
||||
[TestCase("**안녕(hello)**하세요.", "<p><strong>안녕(hello)</strong>하세요.</p>\n")]
|
||||
[TestCase("ᅡ**(a)**", "<p>ᅡ<strong>(a)</strong></p>\n")]
|
||||
[TestCase("**(k)**ᄏ", "<p><strong>(k)</strong>ᄏ</p>\n")]
|
||||
public void TestCjkFriendlyEmphasisKorean(string source, string expected)
|
||||
{
|
||||
var pipeline = GetPipeline();
|
||||
var actual = Markdown.ToHtml(source, pipeline);
|
||||
Assert.AreEqual(expected, actual);
|
||||
}
|
||||
|
||||
[Test]
|
||||
[TestCase("__注意__:注意事項", "<p><strong>注意</strong>:注意事項</p>\n")]
|
||||
[TestCase("注意:__注意事項__", "<p>注意:<strong>注意事項</strong></p>\n")]
|
||||
[TestCase("正體字。︁_Traditional._", "<p>正體字。︁<em>Traditional.</em></p>\n")]
|
||||
[TestCase("正體字。︁__Hong Kong and Taiwan.__", "<p>正體字。︁<strong>Hong Kong and Taiwan.</strong></p>\n")]
|
||||
[TestCase("简体字 / 新字体。︀_Simplified._", "<p>简体字 / 新字体。︀<em>Simplified.</em></p>\n")]
|
||||
[TestCase("简体字 / 新字体。︀__Mainland China or Japan.__", "<p>简体字 / 新字体。︀<strong>Mainland China or Japan.</strong></p>\n")]
|
||||
[TestCase("“︁Git”︁__Hub__", "<p>“︁Git”︁<strong>Hub</strong></p>\n")]
|
||||
public void TestCjkFriendlyEmphasisUnderscore(string source, string expected)
|
||||
{
|
||||
var pipeline = GetPipeline();
|
||||
var actual = Markdown.ToHtml(source, pipeline);
|
||||
Assert.AreEqual(expected, actual);
|
||||
}
|
||||
|
||||
[Test]
|
||||
[TestCase("a~~a()~~あ", "<p>a<del>a()</del>あ</p>\n")]
|
||||
[TestCase("あ~~()a~~a", "<p>あ<del>()a</del>a</p>\n")]
|
||||
[TestCase("𩸽~~()a~~a", "<p>𩸽<del>()a</del>a</p>\n")]
|
||||
[TestCase("a~~a()~~𩸽", "<p>a<del>a()</del>𩸽</p>\n")]
|
||||
[TestCase("葛󠄀~~()a~~a", "<p>葛󠄀<del>()a</del>a</p>\n")]
|
||||
[TestCase("羽︀~~()a~~a", "<p>羽︀<del>()a</del>a</p>\n")]
|
||||
[TestCase("a~~「a~~」", "<p>a<del>「a</del>」</p>\n")]
|
||||
[TestCase("「~~a」~~a", "<p>「<del>a」</del>a</p>\n")]
|
||||
[TestCase("~~a~~:~~a~~", "<p><del>a</del>:<del>a</del></p>\n")]
|
||||
[TestCase("~~日本語。︀~~English.", "<p><del>日本語。︀</del>English.</p>\n")]
|
||||
[TestCase("~~“︁a”︁~~a", "<p><del>“︁a”︁</del>a</p>\n")]
|
||||
public void TestCjkFriendlyEmphasisGfmStrikethrough(string source, string expected)
|
||||
{
|
||||
var pipeline = GetPipelineWithStrikethrough();
|
||||
var actual = Markdown.ToHtml(source, pipeline);
|
||||
Assert.AreEqual(expected, actual);
|
||||
}
|
||||
|
||||
[Test]
|
||||
[TestCase("a**〰**a", "<p>a<strong>〰</strong>a</p>\n")]
|
||||
[TestCase("a**〽**a", "<p>a<strong>〽</strong>a</p>\n")]
|
||||
[TestCase("a**🈂**a", "<p>a<strong>🈂</strong>a</p>\n")]
|
||||
[TestCase("a**🈷**a", "<p>a<strong>🈷</strong>a</p>\n")]
|
||||
[TestCase("a**㊗**a", "<p>a<strong>㊗</strong>a</p>\n")]
|
||||
[TestCase("a**㊙**a", "<p>a<strong>㊙</strong>a</p>\n")]
|
||||
public void TestCjkFriendlyPseudoEmoji(string source, string expected)
|
||||
{
|
||||
var pipeline = GetPipeline();
|
||||
var actual = Markdown.ToHtml(source, pipeline);
|
||||
Assert.AreEqual(expected, actual);
|
||||
}
|
||||
|
||||
#if !NET || !MARKDIG_NO_RUNE_TESTS
|
||||
// delimiter: '*', '_' = each character, '?' = either
|
||||
// can open/close = whether the places can be in the range of emphasis
|
||||
// 2 before, previous, can close, delimiter, can open, next
|
||||
// *****Basic*****
|
||||
[TestCase("\0", " ", false, '?', false, " ")]
|
||||
[TestCase("\0", "𰻞", true, '?', false, " ")]
|
||||
[TestCase("\0", " ", false, '?', true, "𰻞")]
|
||||
[TestCase("\0", "𝜵", false, '?', true, "A")]
|
||||
[TestCase("\0", "A", true, '?', false, "𝜵")]
|
||||
[TestCase("\0", "𝜵", true, '*', true, "𰻞")]
|
||||
[TestCase("\0", "A", true, '*', true, "𰻞")]
|
||||
[TestCase("\0", "𰻞", true, '*', true, "𝜵")]
|
||||
[TestCase("\0", "𰻞", true, '*', true, "A")]
|
||||
[TestCase("\0", "𰻞", true, '*', true, "」")]
|
||||
[TestCase("\0", "「", true, '*', true, "𰻞")]
|
||||
[TestCase("\0", "A", true, '*', true, "」")]
|
||||
[TestCase("\0", "「", true, '*', true, "A")]
|
||||
[TestCase("\0", "𝜵", false, '_', true, "𰻞")]
|
||||
[TestCase("\0", "A", false, '_', false, "𰻞")]
|
||||
[TestCase("\0", "𰻞", true, '_', false, "𝜵")]
|
||||
[TestCase("\0", "𰻞", false, '_', false, "A")]
|
||||
[TestCase("\0", "𰻞", true, '_', false, "」")]
|
||||
[TestCase("\0", "「", false, '_', true, "𰻞")]
|
||||
[TestCase("\0", "A", true, '_', false, "」")]
|
||||
[TestCase("\0", "「", false, '_', true, "A")]
|
||||
// *****IVS*****
|
||||
[TestCase("𩸽", "\U000E0101", true, '*', true, "𝜵")]
|
||||
[TestCase("𩸽", "\U000E0101", true, '_', false, "𝜵")]
|
||||
[TestCase("𩸽", "\U000E0101", true, '*', true, "𝜵")]
|
||||
[TestCase("𩸽", "\U000E0101", true, '_', false, "𝜵")]
|
||||
// Non-Han + U+E01XX does not appear in the wild
|
||||
[TestCase("\0", "\U000E0101", true, '*', true, "𝜵")]
|
||||
[TestCase("\0", "\U000E0101", true, '_', false, "𝜵")]
|
||||
[TestCase("\0", "\U000E0101", true, '*', true, "𝜵")]
|
||||
[TestCase("\0", "\U000E0101", true, '_', false, "𝜵")]
|
||||
// *****SVS*****
|
||||
[TestCase("羽", "\uFE00", true, '*', true, "𝜵")]
|
||||
[TestCase("羽", "\uFE00", true, '_', false, "𝜵")]
|
||||
[TestCase("羽", "\uFE00", true, '*', true, "𝜵")]
|
||||
[TestCase("羽", "\uFE00", true, '_', false, "𝜵")]
|
||||
// Slashed zero
|
||||
[TestCase("0", "\uFE00", true, '?', false, "𝜵")]
|
||||
[TestCase("0", "\uFE00", true, '?', false, "𝜵")]
|
||||
[TestCase("“", "\uFE00", false, '?', true, "A")]
|
||||
[TestCase("“", "\uFE01", true, '*', true, "A")]
|
||||
[TestCase("“", "\uFE01", false, '_', true, "A")]
|
||||
[TestCase("\0", "“", false, '?', true, "A")]
|
||||
[TestCase("\0", "A", true, '?', false, "“")]
|
||||
// *****Emoji*****
|
||||
// Default text presentation
|
||||
[TestCase("\0", "㊙", true, '*', true, "A")]
|
||||
[TestCase("\0", "㊙", false, '_', true, "A")]
|
||||
[TestCase("\0", "A", true, '*', true, "㊙")]
|
||||
[TestCase("\0", "A", true, '_', false, "㊙")]
|
||||
// Default emoji presentation
|
||||
[TestCase("\0", "🈯", false, '?', true, "A")]
|
||||
[TestCase("\0", "A", true, '?', false, "🈯")]
|
||||
// EAW = Ambiguous (not CJK)
|
||||
[TestCase("\0", "☎", false, '?', true, "A")]
|
||||
// Text presentation sequences
|
||||
[TestCase("㊙", "\uFE0E", true, '*', true, "A")]
|
||||
[TestCase("㊙", "\uFE0E", false, '_', true, "A")]
|
||||
// Caution: default emoji presentation character + text presentation selector has not been supported yet
|
||||
[TestCase("🈯", "\uFE0E", false, '?', true, "A")]
|
||||
// Emoji presentation sequences
|
||||
[TestCase("㊙", "\uFE0F", true, '*', true, "A")]
|
||||
[TestCase("㊙", "\uFE0F", false, '_', false, "A")]
|
||||
[TestCase("🈯", "\uFE0F", true, '*', true, "A")]
|
||||
[TestCase("🈯", "\uFE0F", false, '_', false, "A")]
|
||||
// *****Korean*****
|
||||
[TestCase("\0", "한", true, '*', true, "𝜵")]
|
||||
[TestCase("\0", "𝜵", true, '*', true, "한")]
|
||||
// A part of NFD form
|
||||
[TestCase("\0", "ᆫ", true, '*', true, "𝜵")]
|
||||
[TestCase("\0", "𝜵", true, '*', true, "ᆫ")]
|
||||
[Test]
|
||||
public void TestCheckOpenCloseDelimiterCjkFriendly(string twoPrevStr, string prevStr, bool shouldBeClosable, char delim, bool shouldBeOpenable, string nextStr)
|
||||
{
|
||||
Assert.AreEqual(OperationStatus.Done, Rune.DecodeFromUtf16(twoPrevStr, out var twoPrev, out _));
|
||||
Assert.AreEqual(OperationStatus.Done, Rune.DecodeFromUtf16(prevStr, out var prev, out _));
|
||||
Assert.AreEqual(OperationStatus.Done, Rune.DecodeFromUtf16(nextStr, out var next, out _));
|
||||
|
||||
CharHelper.CheckOpenCloseDelimiterCjkFriendly(prev, next, twoPrev, delim == '*', out bool isOpen, out bool isClose);
|
||||
Assert.AreEqual(shouldBeOpenable, isOpen, "isOpen");
|
||||
Assert.AreEqual(shouldBeClosable, isClose, "isClose");
|
||||
if (delim == '?')
|
||||
{
|
||||
CharHelper.CheckOpenCloseDelimiterCjkFriendly(prev, next, twoPrev, true, out isOpen, out isClose);
|
||||
Assert.AreEqual(shouldBeOpenable, isOpen, "isOpen (*)");
|
||||
Assert.AreEqual(shouldBeClosable, isClose, "isClose (*)");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,7 @@ namespace Markdig.Tests;
|
||||
[TestFixture]
|
||||
public class TestStringSlice
|
||||
{
|
||||
#if NET
|
||||
#if !NET || !MARKDIG_NO_RUNE_TESTS
|
||||
[Test]
|
||||
public void TestRuneBmp()
|
||||
{
|
||||
|
||||
@@ -165,6 +165,117 @@ public static class CharHelper
|
||||
}
|
||||
}
|
||||
|
||||
// The signature of this method is still unstable and can be changed in the future. `internal`-only as for now.
|
||||
internal static void CheckOpenCloseDelimiterCjkFriendly(Rune pc, Rune c, Rune twoPreviousRune, bool enableWithinWord, out bool canOpen, out bool canClose)
|
||||
{
|
||||
pc.CheckUnicodeCategory(out bool prevIsWhiteSpace, out bool prevIsPunctuation);
|
||||
c.CheckUnicodeCategory(out bool nextIsWhiteSpace, out bool nextIsPunctuation);
|
||||
|
||||
// https://github.com/tats-u/markdown-cjk-friendly/commit/3c4217bea8248e9abc8be4e7c68748a88557662d
|
||||
// The above flankingness check can be simplified under the following conditions:
|
||||
// - If the delimiter run is adjacent to a whitespace character, the flankingness does not depend on the existence of a punctuation character (and (in CJK-friendly emphasis) a CJK character).
|
||||
// - If the delimiter run is `_`, some rules can be simplified. Additionally, in CJK-friendly emphasis, the flankingness does not depend on whether the delimiter run is adjacent to a CJK character.
|
||||
if (prevIsWhiteSpace || nextIsWhiteSpace)
|
||||
{
|
||||
// Fastest path
|
||||
canOpen = !nextIsWhiteSpace;
|
||||
canClose = !prevIsWhiteSpace;
|
||||
return;
|
||||
}
|
||||
|
||||
bool isMainTwoPrevious = false;
|
||||
Rune mainPreviousRune = pc;
|
||||
if (IsNonEmojiGeneralUseVariantSelector(pc))
|
||||
{
|
||||
isMainTwoPrevious = true;
|
||||
mainPreviousRune = twoPreviousRune;
|
||||
mainPreviousRune.CheckUnicodeCategory(out var _, out prevIsPunctuation);
|
||||
}
|
||||
canOpen = prevIsPunctuation;
|
||||
canClose = nextIsPunctuation;
|
||||
if (!enableWithinWord)
|
||||
{
|
||||
// Fast path for `_` (does not depend on the existence of a CJK character)
|
||||
return;
|
||||
}
|
||||
bool prevIsCjk = IsCjk(mainPreviousRune) || (isMainTwoPrevious ? IsCjkAmbiousPunctuation(mainPreviousRune, pc) : IsIdeographicVariationSelector(mainPreviousRune));
|
||||
bool nextIsCjk = IsCjk(c);
|
||||
bool eitherIsCjk = prevIsCjk || nextIsCjk;
|
||||
|
||||
canOpen |= eitherIsCjk || !nextIsPunctuation;
|
||||
canClose |= eitherIsCjk || !prevIsPunctuation;
|
||||
|
||||
// https://github.com/tats-u/markdown-cjk-friendly/blob/main/specification.md
|
||||
// https://github.com/tats-u/markdown-cjk-friendly/blob/main/ranges.md
|
||||
static bool IsNonEmojiGeneralUseVariantSelector(Rune r) => r.Value is >= 0xFE00 and <= 0xFE0E;
|
||||
static bool IsIdeographicVariationSelector(Rune r) => r.Value is >= 0xE0100 and <= 0xE01EF;
|
||||
static bool IsCjkAmbiousPunctuation(Rune main, Rune vs) => vs.Value is 0xFE01 && main.Value is 0x2018 or 0x2019 or 0x201C or 0x201D;
|
||||
// As of Unicode 17
|
||||
static bool IsCjk(Rune r) => r.Value is
|
||||
>= 0x1100 and ( // Fast path for most non-CJK characters
|
||||
<= 0x11ff
|
||||
or 0x20a9
|
||||
or >= 0x2329 and <= 0x232a
|
||||
or >= 0x2630 and <= 0x2637
|
||||
or >= 0x268a and <= 0x268f
|
||||
or >= 0x2e80 and <= 0x2e99
|
||||
or >= 0x2e9b and <= 0x2ef3
|
||||
or >= 0x2f00 and <= 0x2fd5
|
||||
or >= 0x2ff0 and <= 0x303e
|
||||
or >= 0x3041 and <= 0x3096
|
||||
or >= 0x3099 and <= 0x30ff
|
||||
or >= 0x3105 and <= 0x312f
|
||||
or >= 0x3131 and <= 0x318e
|
||||
or >= 0x3190 and <= 0x31e5
|
||||
or >= 0x31ef and <= 0x321e
|
||||
or >= 0x3220 and <= 0x3247
|
||||
or >= 0x3250 and <= 0xa48c
|
||||
or >= 0xa490 and <= 0xa4c6
|
||||
or >= 0xa960 and <= 0xa97c
|
||||
or >= 0xac00 and <= 0xd7a3
|
||||
or >= 0xd7b0 and <= 0xd7c6
|
||||
or >= 0xd7cb and <= 0xd7fb
|
||||
or >= 0xf900 and <= 0xfaff
|
||||
or >= 0xfe10 and <= 0xfe19
|
||||
or >= 0xfe30 and <= 0xfe52
|
||||
or >= 0xfe54 and <= 0xfe66
|
||||
or >= 0xfe68 and <= 0xfe6b
|
||||
or >= 0xff01 and <= 0xffbe
|
||||
or >= 0xffc2 and <= 0xffc7
|
||||
or >= 0xffca and <= 0xffcf
|
||||
or >= 0xffd2 and <= 0xffd7
|
||||
or >= 0xffda and <= 0xffdc
|
||||
or >= 0xffe0 and <= 0xffe6
|
||||
or >= 0xffe8 and <= 0xffee
|
||||
or >= 0x16fe0 and <= 0x16fe4
|
||||
or >= 0x16ff0 and <= 0x16ff6
|
||||
or >= 0x17000 and <= 0x18cd5
|
||||
or >= 0x18cff and <= 0x18d1e
|
||||
or >= 0x18d80 and <= 0x18df2
|
||||
or >= 0x1aff0 and <= 0x1aff3
|
||||
or >= 0x1aff5 and <= 0x1affb
|
||||
or >= 0x1affd and <= 0x1affe
|
||||
or >= 0x1b000 and <= 0x1b122
|
||||
or 0x1b132
|
||||
or >= 0x1b150 and <= 0x1b152
|
||||
or 0x1b155
|
||||
or >= 0x1b164 and <= 0x1b167
|
||||
or >= 0x1b170 and <= 0x1b2fb
|
||||
or >= 0x1d300 and <= 0x1d356
|
||||
or >= 0x1d360 and <= 0x1d376
|
||||
or 0x1f200
|
||||
or 0x1f202
|
||||
or >= 0x1f210 and <= 0x1f219
|
||||
or >= 0x1f21b and <= 0x1f22e
|
||||
or >= 0x1f230 and <= 0x1f231
|
||||
or 0x1f237
|
||||
or 0x1f23b
|
||||
or >= 0x1f240 and <= 0x1f248
|
||||
or >= 0x1f260 and <= 0x1f265
|
||||
or >= 0x20000 and <= 0x3fffd
|
||||
);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether roman letter partial.
|
||||
/// </summary>
|
||||
|
||||
@@ -108,7 +108,7 @@ public static class MarkdownExtensions
|
||||
pipeline.Extensions.ReplaceOrAdd<AlertExtension>(new AlertExtension() { RenderKind = renderKind });
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Uses this extension to enable autolinks from text `http://`, `https://`, `ftp://`, `mailto:`, `www.xxx.yyy`
|
||||
/// </summary>
|
||||
@@ -515,6 +515,18 @@ public static class MarkdownExtensions
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enables CJK-friendly emphasis. <c>**</c> around punctuation in CJK text will be much more likely to be parsed as emphasis as intended.
|
||||
/// </summary>
|
||||
/// <param name="pipeline">The pipeline</param>
|
||||
/// <returns>The modified pipeline</returns>
|
||||
/// <see href="https://github.com/tats-u/markdown-cjk-friendly/"/>
|
||||
public static MarkdownPipelineBuilder UseCjkFriendlyEmphasis(this MarkdownPipelineBuilder pipeline)
|
||||
{
|
||||
pipeline.InlineParsers.FindExact<EmphasisInlineParser>()?.CjkFriendlyEmphasis = true;
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This will disable the HTML support in the markdown processor (for constraint/safe parsing).
|
||||
/// </summary>
|
||||
@@ -653,6 +665,9 @@ public static class MarkdownExtensions
|
||||
case "globalization":
|
||||
pipeline.UseGlobalization();
|
||||
break;
|
||||
case "cjk-friendly-emphasis":
|
||||
pipeline.UseCjkFriendlyEmphasis();
|
||||
break;
|
||||
default:
|
||||
throw new ArgumentException($"Invalid extension `{extension}` from `{extensions}`", nameof(extensions));
|
||||
}
|
||||
|
||||
@@ -43,6 +43,12 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
|
||||
/// </summary>
|
||||
public List<EmphasisDescriptor> EmphasisDescriptors { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or toggles whether the emphasis parser should be CJK-friendly.
|
||||
/// </summary>
|
||||
/// <seealso href="https://github.com/tats-u/markdown-cjk-friendly"/>
|
||||
public bool CjkFriendlyEmphasis { get; set; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether this parser is using the specified character as an emphasis delimiter.
|
||||
/// </summary>
|
||||
@@ -161,16 +167,28 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
|
||||
var emphasisDesc = emphasisMap![delimiterChar]!;
|
||||
|
||||
Rune pc = (Rune)0;
|
||||
Rune twoPreviousChar = default;
|
||||
|
||||
if (processor.Inline is HtmlEntityInline htmlEntityInline)
|
||||
{
|
||||
if (htmlEntityInline.Transcoded.Length > 0)
|
||||
{
|
||||
pc = htmlEntityInline.Transcoded.RuneAt(htmlEntityInline.Transcoded.End);
|
||||
|
||||
if (CjkFriendlyEmphasis)
|
||||
{
|
||||
twoPreviousChar = htmlEntityInline.Transcoded.RuneAt(htmlEntityInline.Transcoded.End - pc.Utf16SequenceLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pc.Value == 0)
|
||||
{
|
||||
pc = slice.PeekRuneExtra(-1);
|
||||
if (CjkFriendlyEmphasis)
|
||||
{
|
||||
// This cannot be a delegate (Func<Rune>?) because slice is a reference
|
||||
twoPreviousChar = slice.PeekRuneExtra(-1 - pc.Utf16SequenceLength);
|
||||
}
|
||||
// delimiterChar is BMP, so slice.PeekCharExtra(-2) is (a part of) the character two positions back.
|
||||
if (pc == (Rune)delimiterChar && slice.PeekCharExtra(-2) != '\\')
|
||||
{
|
||||
@@ -199,8 +217,17 @@ public class EmphasisInlineParser : InlineParser, IPostInlineProcessor
|
||||
Rune.DecodeFromUtf16(htmlString, out c, out _);
|
||||
}
|
||||
|
||||
bool canOpen;
|
||||
bool canClose;
|
||||
// Calculate Open-Close for current character
|
||||
CharHelper.CheckOpenCloseDelimiter(pc, c, emphasisDesc.EnableWithinWord, out bool canOpen, out bool canClose);
|
||||
if (CjkFriendlyEmphasis)
|
||||
{
|
||||
CharHelper.CheckOpenCloseDelimiterCjkFriendly(pc, c, twoPreviousChar, emphasisDesc.EnableWithinWord, out canOpen, out canClose);
|
||||
}
|
||||
else
|
||||
{
|
||||
CharHelper.CheckOpenCloseDelimiter(pc, c, emphasisDesc.EnableWithinWord, out canOpen, out canClose);
|
||||
}
|
||||
|
||||
// We have potentially an open or close emphasis
|
||||
if (canOpen || canClose)
|
||||
|
||||
@@ -87,6 +87,7 @@ class Program
|
||||
new Spec("Jira Links", "JiraLinks.md", "jiralinks"),
|
||||
new Spec("Globalization", "GlobalizationSpecs.md", "globalization+advanced+emojis"),
|
||||
new Spec("Figures, Footers and Cites", "FigureFooterAndCiteSpecs.md", "figures+footers+citations|advanced"),
|
||||
new Spec("CJK-friendly Emphasis", "CJKFriendlyEmphasis.md", "cjk-friendly-emphasis"),
|
||||
|
||||
new NormalizeSpec("Headings", "Headings.md", ""),
|
||||
|
||||
@@ -358,7 +359,7 @@ class Program
|
||||
static string CompressedName(string name)
|
||||
{
|
||||
string compressedName = "";
|
||||
foreach (var part in name.Replace(',', ' ').Split(' ', StringSplitOptions.RemoveEmptyEntries))
|
||||
foreach (var part in name.Replace(',' , ' ').Replace('-', ' ').Split(' ', StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
compressedName += char.IsLower(part[0])
|
||||
? char.ToUpper(part[0]) + (part.Length > 1 ? part.Substring(1) : "")
|
||||
|
||||
Reference in New Issue
Block a user