Compare commits

...

2 Commits

9 changed files with 1549 additions and 43 deletions

View File

@@ -390,6 +390,7 @@ namespace Markdig.Tests
[TestCase("b>r", "br")]
[TestCase(@"b\r", "br")]
[TestCase(@"b""r", "br")]
[TestCase(@"Requirement 😀", "requirement")]
public void TestUrilizeNonAscii_NonValidCharactersForFragments(string input, string expectedResult)
{
Assert.AreEqual(expectedResult, LinkHelper.Urilize(input, false));

File diff suppressed because it is too large Load Diff

View File

@@ -20,60 +20,59 @@ namespace Markdig.Helpers
public static string Urilize(string headingText, bool allowOnlyAscii)
{
#if SUPPORT_NORMALIZE
// Normalzie the string if we don't allow UTF8
if (allowOnlyAscii)
{
headingText = headingText.Normalize(NormalizationForm.FormD);
}
#endif
var headingBuffer = StringBuilderCache.Local();
bool hasLetter = false;
bool previousIsSpace = false;
for (int i = 0; i < headingText.Length; i++)
{
var c = headingText[i];
if (char.IsLetter(c))
var normalized = allowOnlyAscii ? CharNormalizer.ConvertToAscii(c) : null;
for (int j = 0; j < (normalized?.Length ?? 1); j++)
{
#if SUPPORT_NORMALIZE
if (allowOnlyAscii && (c < ' ' || c >= 127))
if (normalized != null)
{
continue;
c = normalized[j];
}
#endif
c = char.IsUpper(c) ? char.ToLowerInvariant(c) : c;
headingBuffer.Append(c);
hasLetter = true;
previousIsSpace = false;
}
else if (hasLetter)
{
if (IsReservedPunctuation(c))
if (char.IsLetter(c))
{
if (previousIsSpace)
if (allowOnlyAscii && (c < ' ' || c >= 127))
{
headingBuffer.Length--;
continue;
}
if (headingBuffer[headingBuffer.Length - 1] != c)
c = char.IsUpper(c) ? char.ToLowerInvariant(c) : c;
headingBuffer.Append(c);
hasLetter = true;
previousIsSpace = false;
}
else if (hasLetter)
{
if (IsReservedPunctuation(c))
{
if (previousIsSpace)
{
headingBuffer.Length--;
}
if (headingBuffer[headingBuffer.Length - 1] != c)
{
headingBuffer.Append(c);
}
previousIsSpace = false;
}
else if (c.IsDigit())
{
headingBuffer.Append(c);
previousIsSpace = false;
}
previousIsSpace = false;
}
else if (c.IsDigit())
{
headingBuffer.Append(c);
previousIsSpace = false;
}
else if (!previousIsSpace && c.IsWhitespace())
{
var pc = headingBuffer[headingBuffer.Length - 1];
if (!IsReservedPunctuation(pc))
else if (!previousIsSpace && c.IsWhitespace())
{
headingBuffer.Append('-');
var pc = headingBuffer[headingBuffer.Length - 1];
if (!IsReservedPunctuation(pc))
{
headingBuffer.Append('-');
}
previousIsSpace = true;
}
previousIsSpace = true;
}
}
}

View File

@@ -25,6 +25,6 @@ namespace Markdig
{
public static partial class Markdown
{
public const string Version = "0.10.1";
public const string Version = "0.10.2";
}
}

View File

@@ -1,6 +1,6 @@
{
"title": "Markdig",
"version": "0.10.1",
"version": "0.10.2",
"authors": [ "Alexandre Mutel" ],
"description": "A fast, powerfull, CommonMark compliant, extensible Markdown processor for .NET with 20+ builtin extensions (pipetables, footnotes, definition lists... etc.)",
"copyright": "Alexandre Mutel",
@@ -11,7 +11,7 @@
"projectUrl": "https://github.com/lunet-io/markdig",
"iconUrl": "https://raw.githubusercontent.com/lunet-io/markdig/master/img/markdig.png",
"requireLicenseAcceptance": false,
"releaseNotes": "> 0.10.1\n- Update to latest CommonMark specs\n- Fix source span for LinkReferenceDefinition\n> 0.10.0\n- Breaking change of the IMarkdownExtension to allow to receive the MarkdownPipeline for the renderers setup\n",
"releaseNotes": "> 0.10.2\n - Fix exception when trying to urlize an url with an unicode character outside the supported range by NormD (issue #75)\n > 0.10.1\n- Update to latest CommonMark specs\n- Fix source span for LinkReferenceDefinition\n> 0.10.0\n- Breaking change of the IMarkdownExtension to allow to receive the MarkdownPipeline for the renderers setup\n",
"tags": [ "Markdown CommonMark md html md2html" ]
},
"configurations": {
@@ -34,7 +34,7 @@
"frameworks": {
"net35": {
"buildOptions": {
"define": [ "SUPPORT_NORMALIZE", "SUPPORT_FIXED_STRING" ]
"define": [ "SUPPORT_FIXED_STRING" ]
},
"frameworkAssemblies": {
"mscorlib": "",
@@ -44,7 +44,7 @@
},
"net40": {
"buildOptions": {
"define": [ "SUPPORT_NORMALIZE", "SUPPORT_FIXED_STRING" ]
"define": [ "SUPPORT_FIXED_STRING" ]
},
"frameworkAssemblies": {
"mscorlib": "",

View File

@@ -0,0 +1,104 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
namespace UnicodeNormDApp
{
class Program
{
static void Main(string[] args)
{
var httpClient = new WebClient();
var data = httpClient.DownloadString("http://www.unicode.org/Public/UCD/latest/ucd/NormalizationTest.txt");
var stringReader = new StringReader(data);
var sep = new char[] {';'};
var spaceSpec = new char[] {' '};
string line;
int count = 0;
int min = int.MaxValue;
int max = int.MinValue;
var values = new Dictionary<char, string>();
var builder = new StringBuilder();
while ((line = stringReader.ReadLine()) != null)
{
if (line.StartsWith("#") || line.StartsWith("@"))
{
continue;
}
var commentIndex = line.IndexOf('#');
var dataLine = commentIndex > 0 ? line.Substring(0, commentIndex) : line;
var columns = dataLine.Split(sep, StringSplitOptions.RemoveEmptyEntries);
if (columns.Length < 4)
{
continue;
}
// Skip multi code point
if (columns[0].IndexOf(' ') > 0)
{
continue;
}
var source = Convert.ToInt32(columns[0], 16);
if (source < min)
{
min = source;
}
if (source > max)
{
max = source;
}
var column4Space = columns[4].Split(spaceSpec, StringSplitOptions.RemoveEmptyEntries);
builder.Clear();
for (int i = 0; i < column4Space.Length; i++)
{
var nfdFirst = Convert.ToInt32(column4Space[i], 16);
// We support only single char codepoints
string unicodeString = char.ConvertFromUtf32(nfdFirst);
// We restrict to ascii only
if (unicodeString.Length == 1 && nfdFirst > 32 && nfdFirst < 127)
{
builder.Append(unicodeString[0]);
}
}
var str = builder.ToString();
var sourceString = char.ConvertFromUtf32(source);
// We don't keep spaces
if (sourceString.Length == 1 && str.Length > 0 && !values.ContainsKey(sourceString[0]))
{
//Trace.WriteLine(columns[0] + "/" + source + ": " + char.ConvertFromUtf32(source) + " => " + (char)nfdFirst);
count++;
values.Add(sourceString[0], str);
}
}
//var newValues = new Dictionary<int, char>(values.Count)
//{
// {15, 'a'}
//}
Trace.WriteLine($"CodeToAscii = new Dictionary<char, string>({values.Count})");
Trace.WriteLine("{");
foreach (var pair in values)
{
var escape = pair.Value.Replace("\\", @"\\").Replace("\"", "\\\"");
Trace.WriteLine($" {{'{pair.Key}',\"{escape}\"}},");
}
Trace.WriteLine("};");
//Trace.WriteLine("count: " + count);
//Trace.WriteLine("max: " + max);
//Trace.WriteLine("min: " + min);
}
}
}

View File

@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("UnicodeNormDApp")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("UnicodeNormDApp")]
[assembly: AssemblyCopyright("Copyright © 2016")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("33ffc0b9-0187-44f9-9424-bb5af5b4fb84")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

View File

@@ -0,0 +1,56 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{33FFC0B9-0187-44F9-9424-BB5AF5B4FB84}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>UnicodeNormDApp</RootNamespace>
<AssemblyName>UnicodeNormDApp</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Net" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

View File

@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25123.0
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Markdig", "Markdig\Markdig.xproj", "{8A58A7E2-627C-4F41-933F-5AC92ADFAB48}"
EndProject
@@ -23,6 +23,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdig.Benchmarks", "Markd
EndProject
Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Markdig.WebApp", "Markdig.WebApp\Markdig.WebApp.xproj", "{3CAD9801-9976-46BE-BACA-F6D0D21FDC00}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnicodeNormDApp", "UnicodeNormDApp\UnicodeNormDApp.csproj", "{33FFC0B9-0187-44F9-9424-BB5AF5B4FB84}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -45,6 +47,10 @@ Global
{3CAD9801-9976-46BE-BACA-F6D0D21FDC00}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3CAD9801-9976-46BE-BACA-F6D0D21FDC00}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3CAD9801-9976-46BE-BACA-F6D0D21FDC00}.Release|Any CPU.Build.0 = Release|Any CPU
{33FFC0B9-0187-44F9-9424-BB5AF5B4FB84}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{33FFC0B9-0187-44F9-9424-BB5AF5B4FB84}.Debug|Any CPU.Build.0 = Debug|Any CPU
{33FFC0B9-0187-44F9-9424-BB5AF5B4FB84}.Release|Any CPU.ActiveCfg = Release|Any CPU
{33FFC0B9-0187-44F9-9424-BB5AF5B4FB84}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE