[PR #650] [MERGED] Remove some branches in HtmlHelper.TryParseHtmlTagOpenTag by using bitmask #1165

Closed
opened 2026-01-29 14:50:45 +00:00 by claunia · 0 comments
Owner

📋 Pull Request Information

Original PR: https://github.com/xoofx/markdig/pull/650
Author: @gfoidl
Created: 7/21/2022
Status: Merged
Merged: 8/12/2022
Merged by: @xoofx

Base: masterHead: htmlhelper-TryParseHtmlTagOpenTag_remove_branches


📝 Commits (1)

  • bfd7b64 Remove some branches in HtmlHelper.TryParseHtmlTagOpenTag by using bitmasks

📊 Changes

1 file changed (+40 additions, -2 deletions)

View changed files

📝 src/Markdig/Helpers/HtmlHelper.cs (+40 -2)

📄 Description

Eliminate some branches in HtmlHelper.TryParseHtmlTagOpenTag (due to || comparisons) by using a bitmask approach instead.
Further the generated machine code has a test jump-combo, which allows optimizations at cpu-level (macro fusion, etc.).

Other places in that type seem not worthwile, as the count of cmps can't be lowered that the current count is.

Benchmarks

The benchmarks are done on a trimmed-down version of the code, to just test these parts of the code changed.

IsSpaceOrSpecialHtmlChar

|  Method |     Mean |   Error |  StdDev | Ratio | RatioSD | Code Size |
|-------- |---------:|--------:|--------:|------:|--------:|----------:|
| Default | 181.4 ns | 3.48 ns | 3.09 ns |  1.00 |    0.00 |      60 B |
|  Switch | 209.6 ns | 3.65 ns | 3.91 ns |  1.16 |    0.02 |      60 B |
|    Mask | 118.4 ns | 2.40 ns | 3.59 ns |  0.65 |    0.03 |      57 B |
benchmark code
using System.Diagnostics;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;

Bench bench = new();
Console.WriteLine(bench.Default());
Console.WriteLine(bench.Switch());
Console.WriteLine(bench.Mask());

#if !DEBUG
BenchmarkDotNet.Running.BenchmarkRunner.Run<Bench>();
#endif

[DisassemblyDiagnoser]
//[ShortRunJob]
public class Bench
{
    private static readonly char[] s_chars = { ' ', '\n', '"', '\'', '=', '<', '>', '`' };

    public Bench()
    {
        foreach (char c in s_chars)
        {
            Console.WriteLine($"{(int)c}");
        }

        Console.WriteLine();
    }

    [Benchmark(Baseline = true)]
    public int Default()
    {
        int count = 0;

        for (int cc = char.MinValue; cc <= char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch0(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    //[Benchmark]
    public int Switch()
    {
        int count = 0;

        for (int cc = char.MinValue; cc <= char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch00(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [Benchmark]
    public int Mask()
    {
        int count = 0;

        for (int cc = char.MinValue; cc <= char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch1(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch0(char c)
    {
        return c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`';
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch00(char c) => c switch
    {
        ' ' or '\n' or '"' or '\'' or '=' or '<' or '>' or '`' => true,
        _ => false,
    };

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch1(char c)
    {
        if (c > '>')
        {
            return c == '`';
        }

        const long BitMask =
              (1L << ' ')
            | (1L << '\n')
            | (1L << '"')
            | (1L << '\'')
            | (1L << '=')
            | (1L << '<')
            | (1L << '>');

        return (BitMask & (1L << c)) != 0;
    }
}

IsCharToAppend

|  Method |     Mean |   Error |  StdDev | Ratio | RatioSD | Code Size |
|-------- |---------:|--------:|--------:|------:|--------:|----------:|
| Default | 131.0 ns | 2.22 ns | 2.07 ns |  1.00 |    0.00 |      40 B |
|    Mask | 121.6 ns | 1.65 ns | 1.37 ns |  0.93 |    0.02 |      53 B |
benchmark code
using System.Diagnostics;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;

Bench bench = new();
Console.WriteLine(bench.Default());
Console.WriteLine(bench.Mask());

#if !DEBUG
BenchmarkDotNet.Running.BenchmarkRunner.Run<Bench>();
#endif

[DisassemblyDiagnoser]
//[ShortRunJob]
public class Bench
{
    private static readonly char[] s_chars = { '_', ':', '.', '-' };

    public Bench()
    {
        foreach (char c in s_chars)
        {
            Console.WriteLine($"{(int)c}");
        }

        Console.WriteLine();
    }

    [Benchmark(Baseline = true)]
    public int Default()
    {
        int count = 0;

        for (int cc = char.MinValue; cc < char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch0(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [Benchmark]
    public int Mask()
    {
        int count = 0;

        for (int cc = char.MinValue; cc < char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch1(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch0(char c)
    {
        return c == '_' || c == ':' || c == '.' || c == '-';
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch1(char c)
    {
        if ((uint)(c - '-') > '_' - '-')
        {
            return false;
        }

        const long BitMask =
              (1L << '_')
            | (1L << ':')
            | (1L << '.')
            | (1L << '-');

        return (BitMask & (1L << c)) != 0;
    }
}

🔄 This issue represents a GitHub Pull Request. It cannot be merged through Gitea due to API limitations.

## 📋 Pull Request Information **Original PR:** https://github.com/xoofx/markdig/pull/650 **Author:** [@gfoidl](https://github.com/gfoidl) **Created:** 7/21/2022 **Status:** ✅ Merged **Merged:** 8/12/2022 **Merged by:** [@xoofx](https://github.com/xoofx) **Base:** `master` ← **Head:** `htmlhelper-TryParseHtmlTagOpenTag_remove_branches` --- ### 📝 Commits (1) - [`bfd7b64`](https://github.com/xoofx/markdig/commit/bfd7b6460c8c7fc431df0dc48a3e01672dc5c1b5) Remove some branches in HtmlHelper.TryParseHtmlTagOpenTag by using bitmasks ### 📊 Changes **1 file changed** (+40 additions, -2 deletions) <details> <summary>View changed files</summary> 📝 `src/Markdig/Helpers/HtmlHelper.cs` (+40 -2) </details> ### 📄 Description Eliminate some branches in `HtmlHelper.TryParseHtmlTagOpenTag` (due to `||` comparisons) by using a bitmask approach instead. Further the generated machine code has a `test jump`-combo, which allows optimizations at cpu-level (macro fusion, etc.). Other places in that type seem not worthwile, as the count of `cmp`s can't be lowered that the current count is. ### Benchmarks The benchmarks are done on a trimmed-down version of the code, to just test these parts of the code changed. #### IsSpaceOrSpecialHtmlChar ``` | Method | Mean | Error | StdDev | Ratio | RatioSD | Code Size | |-------- |---------:|--------:|--------:|------:|--------:|----------:| | Default | 181.4 ns | 3.48 ns | 3.09 ns | 1.00 | 0.00 | 60 B | | Switch | 209.6 ns | 3.65 ns | 3.91 ns | 1.16 | 0.02 | 60 B | | Mask | 118.4 ns | 2.40 ns | 3.59 ns | 0.65 | 0.03 | 57 B | ``` <details> <summary>benchmark code</summary> ```c# using System.Diagnostics; using System.Runtime.CompilerServices; using BenchmarkDotNet.Attributes; Bench bench = new(); Console.WriteLine(bench.Default()); Console.WriteLine(bench.Switch()); Console.WriteLine(bench.Mask()); #if !DEBUG BenchmarkDotNet.Running.BenchmarkRunner.Run<Bench>(); #endif [DisassemblyDiagnoser] //[ShortRunJob] public class Bench { private static readonly char[] s_chars = { ' ', '\n', '"', '\'', '=', '<', '>', '`' }; public Bench() { foreach (char c in s_chars) { Console.WriteLine($"{(int)c}"); } Console.WriteLine(); } [Benchmark(Baseline = true)] public int Default() { int count = 0; for (int cc = char.MinValue; cc <= char.MaxValue; ++cc) { char c = (char)cc; if (IsMatch0(c)) { Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true"); count++; } } Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}"); return count; } //[Benchmark] public int Switch() { int count = 0; for (int cc = char.MinValue; cc <= char.MaxValue; ++cc) { char c = (char)cc; if (IsMatch00(c)) { Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true"); count++; } } Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}"); return count; } [Benchmark] public int Mask() { int count = 0; for (int cc = char.MinValue; cc <= char.MaxValue; ++cc) { char c = (char)cc; if (IsMatch1(c)) { Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true"); count++; } } Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}"); return count; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsMatch0(char c) { return c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`'; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsMatch00(char c) => c switch { ' ' or '\n' or '"' or '\'' or '=' or '<' or '>' or '`' => true, _ => false, }; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsMatch1(char c) { if (c > '>') { return c == '`'; } const long BitMask = (1L << ' ') | (1L << '\n') | (1L << '"') | (1L << '\'') | (1L << '=') | (1L << '<') | (1L << '>'); return (BitMask & (1L << c)) != 0; } } ``` </details> #### IsCharToAppend ``` | Method | Mean | Error | StdDev | Ratio | RatioSD | Code Size | |-------- |---------:|--------:|--------:|------:|--------:|----------:| | Default | 131.0 ns | 2.22 ns | 2.07 ns | 1.00 | 0.00 | 40 B | | Mask | 121.6 ns | 1.65 ns | 1.37 ns | 0.93 | 0.02 | 53 B | ``` <details> <summary>benchmark code</summary> ```c# using System.Diagnostics; using System.Runtime.CompilerServices; using BenchmarkDotNet.Attributes; Bench bench = new(); Console.WriteLine(bench.Default()); Console.WriteLine(bench.Mask()); #if !DEBUG BenchmarkDotNet.Running.BenchmarkRunner.Run<Bench>(); #endif [DisassemblyDiagnoser] //[ShortRunJob] public class Bench { private static readonly char[] s_chars = { '_', ':', '.', '-' }; public Bench() { foreach (char c in s_chars) { Console.WriteLine($"{(int)c}"); } Console.WriteLine(); } [Benchmark(Baseline = true)] public int Default() { int count = 0; for (int cc = char.MinValue; cc < char.MaxValue; ++cc) { char c = (char)cc; if (IsMatch0(c)) { Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true"); count++; } } Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}"); return count; } [Benchmark] public int Mask() { int count = 0; for (int cc = char.MinValue; cc < char.MaxValue; ++cc) { char c = (char)cc; if (IsMatch1(c)) { Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true"); count++; } } Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}"); return count; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsMatch0(char c) { return c == '_' || c == ':' || c == '.' || c == '-'; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsMatch1(char c) { if ((uint)(c - '-') > '_' - '-') { return false; } const long BitMask = (1L << '_') | (1L << ':') | (1L << '.') | (1L << '-'); return (BitMask & (1L << c)) != 0; } } ``` </details> --- <sub>🔄 This issue represents a GitHub Pull Request. It cannot be merged through Gitea due to API limitations.</sub>
claunia added the pull-request label 2026-01-29 14:50:45 +00:00
Sign in to join this conversation.
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: starred/markdig#1165