wip

2026-04-14 10:11:00 +00:00 · 2024-12-07 02:27:47 +01:00
7 changed files with 248 additions and 1025 deletions
--- a/src/renderer/atlas/AtlasEngine.r.cpp
+++ b/src/renderer/atlas/AtlasEngine.r.cpp
@@ -323,7 +323,7 @@ void AtlasEngine::_createSwapChain()
        .Height = _p.s->targetSize.y,
        .Format = DXGI_FORMAT_B8G8R8A8_UNORM,
        .SampleDesc = { .Count = 1 },
-        .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT,
+        .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_UNORDERED_ACCESS,
        // Sometimes up to 2 buffers are locked, for instance during screen capture or when moving the window.
        // 3 buffers seems to guarantee a stable framerate at display frequency at all times.
        .BufferCount = 3,
--- a/src/renderer/atlas/BackendD3D.cpp
+++ b/src/renderer/atlas/BackendD3D.cpp
--- a/src/renderer/atlas/BackendD3D.h
+++ b/src/renderer/atlas/BackendD3D.h
@@ -19,22 +19,8 @@ namespace Microsoft::Console::Render::Atlas
        void Render(RenderingPayload& payload) override;
        bool RequiresContinuousRedraw() noexcept override;

-        // NOTE: D3D constant buffers sizes must be a multiple of 16 bytes.
-        struct alignas(16) VSConstBuffer
-        {
-            // WARNING: Modify this carefully after understanding how HLSL struct packing works. The gist is:
-            // * Minimum alignment is 4 bytes
-            // * Members cannot straddle 16 byte boundaries
-            //   This means a structure like {u32; u32; u32; u32x2} would require
-            //   padding so that it is {u32; u32; u32; <4 byte padding>; u32x2}.
-            // * bool will probably not work the way you want it to,
-            //   because HLSL uses 32-bit bools and C++ doesn't.
-            alignas(sizeof(f32x2)) f32x2 positionScale;
-#pragma warning(suppress : 4324) // 'VSConstBuffer': structure was padded due to alignment specifier
-        };
-
        // WARNING: Same rules as for VSConstBuffer above apply.
-        struct alignas(16) PSConstBuffer
+        struct alignas(16) ConstBuffer
        {
            alignas(sizeof(f32x4)) f32x4 backgroundColor;
            alignas(sizeof(f32x2)) f32x2 backgroundCellSize;
@@ -98,6 +84,15 @@ namespace Microsoft::Console::Render::Atlas
            alignas(u32) u16x2 texcoord;
            alignas(u32) u32 color;
        };
+        
+        struct Sprite
+        {
+            u32x2 position;
+            u32x2 size;
+            u32x2 texcoord;
+            u32 color;
+            u32 padding;
+        };

        // NOTE: Don't initialize any members in this struct. This ensures that no
        // zero-initialization needs to occur when we allocate large buffers of this object.
@@ -240,11 +235,8 @@ namespace Microsoft::Console::Render::Atlas
        ATLAS_ATTR_COLD void _resetGlyphAtlas(const RenderingPayload& p, u32 minWidth, u32 minHeight);
        ATLAS_ATTR_COLD void _resizeGlyphAtlas(const RenderingPayload& p, u16 u, u16 v);
        static bool _checkMacTypeVersion(const RenderingPayload& p);
-        QuadInstance& _getLastQuad() noexcept;
-        QuadInstance& _appendQuad();
-        ATLAS_ATTR_COLD void _bumpInstancesSize();
+        void _appendQuad(Sprite sprite);
        void _flushQuads(const RenderingPayload& p);
-        ATLAS_ATTR_COLD void _recreateInstanceBuffers(const RenderingPayload& p);
        void _drawBackground(const RenderingPayload& p);
        void _uploadBackgroundBitmap(const RenderingPayload& p);
        void _drawText(RenderingPayload& p);
@@ -263,19 +255,16 @@ namespace Microsoft::Console::Render::Atlas
        void _drawSelection(const RenderingPayload& p);
        void _executeCustomShader(RenderingPayload& p);

-        wil::com_ptr<ID3D11RenderTargetView> _renderTargetView;
-        wil::com_ptr<ID3D11InputLayout> _inputLayout;
-        wil::com_ptr<ID3D11VertexShader> _vertexShader;
-        wil::com_ptr<ID3D11PixelShader> _pixelShader;
-        wil::com_ptr<ID3D11BlendState> _blendState;
-        wil::com_ptr<ID3D11Buffer> _vsConstantBuffer;
-        wil::com_ptr<ID3D11Buffer> _psConstantBuffer;
-        wil::com_ptr<ID3D11Buffer> _vertexBuffer;
-        wil::com_ptr<ID3D11Buffer> _indexBuffer;
-        wil::com_ptr<ID3D11Buffer> _instanceBuffer;
-        size_t _instanceBufferCapacity = 0;
-        Buffer<QuadInstance, 32> _instances;
-        size_t _instancesCount = 0;
+        wil::com_ptr<ID3D11UnorderedAccessView> _renderTargetView;
+        wil::com_ptr<ID3D11ComputeShader> _computeShader;
+        wil::com_ptr<ID3D11Buffer> _constantBuffer;
+        wil::com_ptr<ID3D11Texture2D> _tileBuffer;
+        wil::com_ptr<ID3D11ShaderResourceView> _tileBufferView;
+        wil::com_ptr<ID3D11Buffer> _spriteBuffer;
+        wil::com_ptr<ID3D11ShaderResourceView> _spriteBufferView;
+        std::vector<std::vector<Sprite>> _tileQueues;
+        std::vector<u32x2> _tiles;
+        std::vector<Sprite> _sprites;

        wil::com_ptr<ID3D11RenderTargetView> _customRenderTargetView;
        wil::com_ptr<ID3D11Texture2D> _customOffscreenTexture;
@@ -321,6 +310,7 @@ namespace Microsoft::Console::Render::Atlas
        til::generation_t _miscGeneration;
        u16x2 _targetSize{};
        u16x2 _viewportCellCount{};
+        u16x2 _shaderCellCount{};
        ShadingType _textShadingType = ShadingType::Default;

        // An empty-box cursor spanning a wide glyph that has different
--- a/src/renderer/atlas/atlas.vcxproj
+++ b/src/renderer/atlas/atlas.vcxproj
@@ -51,11 +51,32 @@
    <FxCompile Include="shader_common.hlsl">
      <ExcludedFromBuild>true</ExcludedFromBuild>
    </FxCompile>
-    <FxCompile Include="shader_ps.hlsl">
+    <FxCompile Include="shader_cs.hlsl">
      <ShaderType>Pixel</ShaderType>
-    </FxCompile>
-    <FxCompile Include="shader_vs.hlsl">
-      <ShaderType>Vertex</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='AuditMode|Win32'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Fuzzing|Win32'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='AuditMode|ARM64'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Fuzzing|ARM64'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='AuditMode|x64'">Compute</ShaderType>
+      <ShaderType Condition="'$(Configuration)|$(Platform)'=='Fuzzing|x64'">Compute</ShaderType>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='AuditMode|Win32'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Fuzzing|Win32'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='AuditMode|ARM64'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Fuzzing|ARM64'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='AuditMode|x64'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Fuzzing|x64'">5.0</ShaderModel>
+      <ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
    </FxCompile>
  </ItemGroup>
  <ItemGroup>
@@ -79,4 +100,4 @@
      <AdditionalOptions Condition="'$(Configuration)'=='Release'">/O3 /Zsb /Fd $(OutDir)$(ProjectName)\ /Qstrip_debug /Qstrip_reflect %(AdditionalOptions)</AdditionalOptions>
    </FxCompile>
  </ItemDefinitionGroup>
-</Project>
+</Project>
--- a/src/renderer/atlas/shader_cs.hlsl
+++ b/src/renderer/atlas/shader_cs.hlsl
@@ -0,0 +1,58 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+#include "dwrite.hlsl"
+#include "shader_common.hlsl"
+
+cbuffer ConstBuffer : register(b0)
+{
+    float4 backgroundColor;
+    float2 backgroundCellSize;
+    float2 backgroundCellCount;
+    float4 gammaRatios;
+    float enhancedContrast;
+    float underlineWidth;
+    float doubleUnderlineWidth;
+    float curlyLineHalfHeight;
+    float shadedGlyphDotSize;
+}
+
+struct Sprite
+{
+    uint2 position;
+    uint2 size;
+    uint2 texcoord;
+    uint color;
+    uint padding;
+};
+
+Texture2D<float4> glyphs : register(t0);
+Texture2D<uint2> tiles : register(t1);
+StructuredBuffer<Sprite> sprites : register(t2);
+RWTexture2D<float4> output : register(u0);
+
+[numthreads(8, 8, 1)]
+void main(uint2 groupId : SV_GroupID, uint2 dispatchThreadId : SV_DispatchThreadID)
+{
+    uint2 tile = tiles[groupId];
+    float4 color = backgroundColor;
+    float4 alphas = float4(0,0,0,0);
+
+    for (uint i = tile.x; i < tile.y; i++)
+    {
+        Sprite sprite = sprites[i];
+        // Figure out where the current dispatchThreadId is relative to the sprite.dst.
+        uint2 offset = dispatchThreadId - sprite.position;
+        // If the current dispatchThreadId is within the sprite, then sample the glyph texture.
+        if (all(offset < sprite.size))
+        {
+            uint2 src = sprite.texcoord + offset;
+            float4 glyph = glyphs[src];
+            color = alphaBlendPremultiplied(color, decodeRGBA(sprite.color));
+            alphas = max(alphas, glyph);
+        }
+    }
+
+    color *= alphas;
+    output[dispatchThreadId] = color;
+}
--- a/src/renderer/atlas/shader_ps.hlsl
+++ b/src/renderer/atlas/shader_ps.hlsl
@@ -1,224 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT license.
-
-#include "dwrite.hlsl"
-#include "shader_common.hlsl"
-
-cbuffer ConstBuffer : register(b0)
-{
-    float4 backgroundColor;
-    float2 backgroundCellSize;
-    float2 backgroundCellCount;
-    float4 gammaRatios;
-    float enhancedContrast;
-    float underlineWidth;
-    float doubleUnderlineWidth;
-    float curlyLineHalfHeight;
-    float shadedGlyphDotSize;
-}
-
-Texture2D<float4> background : register(t0);
-Texture2D<float4> glyphAtlas : register(t1);
-
-struct Output
-{
-    float4 color;
-    float4 weights;
-};
-
-// clang-format off
-Output main(PSData data) : SV_Target
-// clang-format on
-{
-    float4 color;
-    float4 weights;
-
-    switch (data.shadingType)
-    {
-    case SHADING_TYPE_TEXT_BACKGROUND:
-    {
-        float2 cell = data.position.xy / backgroundCellSize;
-        color = all(cell < backgroundCellCount) ? background[cell] : backgroundColor;
-        weights = float4(1, 1, 1, 1);
-        break;
-    }
-    case SHADING_TYPE_TEXT_GRAYSCALE:
-    {
-        // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things.
-        float4 foreground = premultiplyColor(data.color);
-        float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb);
-        float intensity = DWrite_CalcColorIntensity(data.color.rgb);
-        // These aren't.
-        float4 glyph = glyphAtlas[data.texcoord];
-        float contrasted = DWrite_EnhanceContrast(glyph.a, blendEnhancedContrast);
-        float alphaCorrected = DWrite_ApplyAlphaCorrection(contrasted, intensity, gammaRatios);
-        color = alphaCorrected * foreground;
-        weights = color.aaaa;
-        break;
-    }
-    case SHADING_TYPE_TEXT_CLEARTYPE:
-    {
-        // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things.
-        float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb);
-        // These aren't.
-        float4 glyph = glyphAtlas[data.texcoord];
-        float3 contrasted = DWrite_EnhanceContrast3(glyph.rgb, blendEnhancedContrast);
-        float3 alphaCorrected = DWrite_ApplyAlphaCorrection3(contrasted, data.color.rgb, gammaRatios);
-        weights = float4(alphaCorrected * data.color.a, 1);
-        color = weights * data.color;
-        break;
-    }
-    case SHADING_TYPE_TEXT_BUILTIN_GLYPH:
-    {
-        // The RGB components of builtin glyphs are used to control the generation of pixel patterns in this shader.
-        // Below you can see their intended effects where # indicates lit pixels.
-        //
-        // .r = stretch
-        //      0: #_#_#_#_
-        //         _#_#_#_#
-        //         #_#_#_#_
-        //         _#_#_#_#
-        //
-        //      1: #___#___
-        //         __#___#_
-        //         #___#___
-        //         __#___#_
-        //
-        // .g = invert
-        //      0: #_#_#_#_
-        //         _#_#_#_#
-        //         #_#_#_#_
-        //         _#_#_#_#
-        //
-        //      1: _#_#_#_#
-        //         #_#_#_#_
-        //         _#_#_#_#
-        //         #_#_#_#_
-        //
-        // .r = fill
-        //      0: #_#_#_#_
-        //         _#_#_#_#
-        //         #_#_#_#_
-        //         _#_#_#_#
-        //
-        //      1: ########
-        //         ########
-        //         ########
-        //         ########
-        //
-        float4 glyph = glyphAtlas[data.texcoord];
-        float2 pos = floor(data.position.xy / (shadedGlyphDotSize * data.renditionScale));
-
-        // A series of on/off/on/off/on/off pixels can be generated with:
-        //   step(frac(x * 0.5f), 0)
-        // The inner frac(x * 0.5f) will generate a series of
-        //   0, 0.5, 0, 0.5, 0, 0.5
-        // and the step() will transform that to
-        //   1,   0, 1,   0, 1,   0
-        //
-        // We can now turn that into a checkerboard pattern quite easily,
-        // if we imagine the fields of the checkerboard like this:
-        //   +---+---+---+
-        //   | 0 | 1 | 2 |
-        //   +---+---+---+
-        //   | 1 | 2 | 3 |
-        //   +---+---+---+
-        //   | 2 | 3 | 4 |
-        //   +---+---+---+
-        //
-        // Because this means we just need to set
-        //   x = pos.x + pos.y
-        // and so we end up with
-        //   step(frac(dot(pos, 0.5f)), 0)
-        //
-        // Finally, we need to implement the "stretch" explained above, which can
-        // be easily achieved by simply replacing the factor 0.5 with 0.25 like so
-        //   step(frac(x * 0.25f), 0)
-        // as this gets us
-        //   0, 0.25, 0.5, 0.75, 0, 0.25, 0.5, 0.75
-        // = 1,    0,   0,    0, 1,    0,   0,    0
-        //
-        // Of course we only want to apply that to the X axis, which means
-        // below we end up having 2 different multipliers for the dot().
-        float stretched = step(frac(dot(pos, float2(glyph.r * -0.25f + 0.5f, 0.5f))), 0) * glyph.a;
-        // Thankfully the remaining 2 operations are a lot simpler.
-        float inverted = abs(glyph.g - stretched);
-        float filled = max(glyph.b, inverted);
-
-        color = premultiplyColor(data.color) * filled;
-        weights = color.aaaa;
-        break;
-    }
-    case SHADING_TYPE_TEXT_PASSTHROUGH:
-    {
-        color = glyphAtlas[data.texcoord];
-        weights = color.aaaa;
-        break;
-    }
-    case SHADING_TYPE_DOTTED_LINE:
-    {
-        bool on = frac(data.position.x / (3.0f * underlineWidth * data.renditionScale.x)) < (1.0f / 3.0f);
-        color = on * premultiplyColor(data.color);
-        weights = color.aaaa;
-        break;
-    }
-    case SHADING_TYPE_DASHED_LINE:
-    {
-        bool on = frac(data.position.x / (6.0f * underlineWidth * data.renditionScale.x)) < (4.0f / 6.0f);
-        color = on * premultiplyColor(data.color);
-        weights = color.aaaa;
-        break;
-    }
-    case SHADING_TYPE_CURLY_LINE:
-    {
-        // The curly line has the same thickness as a double underline.
-        // We halve it to make the math a bit easier.
-        float strokeWidthHalf = doubleUnderlineWidth * data.renditionScale.y * 0.5f;
-        float center = curlyLineHalfHeight * data.renditionScale.y;
-        float amplitude = center - strokeWidthHalf;
-        // We multiply the frequency by pi/2 to get a sine wave which has an integer period.
-        // This makes every period of the wave look exactly the same.
-        float frequency = 1.57079632679489661923f / (curlyLineHalfHeight * data.renditionScale.x);
-        // At very small sizes, like when the wave is just 3px tall and 1px wide, it'll look too fat and/or blurry.
-        // Because we multiplied our frequency with pi, the extrema of the curve and its intersections with the
-        // centerline always occur right between two pixels. This causes both to be lit with the same color.
-        // By adding a small phase shift, we can break this symmetry up. It'll make the wave look a lot more crispy.
-        float phase = 1.57079632679489661923f;
-        float sine = sin(data.position.x * frequency + phase);
-        // We use the distance to the sine curve as its alpha value - the closer the more opaque.
-        // To give it a smooth appearance we don't want to simply calculate the vertical distance to the curve:
-        //   abs(pixel.y - sin(pixel.x))
-        //
-        // ...because while a pixel may be vertically far away it may be horizontally close to the sine curve.
-        // We need a proper distance calculation. This makes a large difference at especially small font sizes.
-        //
-        // While calculating the distance to a sine curve is complex, calculating the distance to its tangent is easy,
-        // because tangents are straight lines and line-point distance are trivial. The tangent of sin(x) is cos(x).
-        // The line-point distance is the vertical distance multiplied by the cos(angle) of the line.
-        // To turn out tangent cos(x) into an angle we need to calculate atan(cos(x)). This nets us:
-        //   abs(pixel.y - sin(pixel.x)) * cos(atan(cos(pixel.x))
-        //
-        // The expanded sine form of cos(atan(cos(x))) is 1 / sqrt(2 - sin(x)^2), which results in:
-        //   abs(pixel.y - sin(pixel.x)) * rsqrt(2 - sin(pixel.x)^2)
-        float distance = abs(center - data.texcoord.y - sine * amplitude) * rsqrt(2 - sine * sine);
-        // Since pixel coordinates are always offset by half a pixel (i.e. data.texcoord is 1.5f, 2.5f, 3.5f, ...)
-        // the distance is also off by half a pixel. We undo that by adding half a pixel to the distance.
-        // This gives the line its proper thickness appearance.
-        float a = 1 - saturate(distance - strokeWidthHalf + 0.5f);
-        color = a * premultiplyColor(data.color);
-        weights = color.aaaa;
-        break;
-    }
-    default:
-    {
-        color = premultiplyColor(data.color);
-        weights = color.aaaa;
-        break;
-    }
-    }
-
-    Output output;
-    output.color = color;
-    output.weights = weights;
-    return output;
-}
--- a/src/renderer/atlas/shader_vs.hlsl
+++ b/src/renderer/atlas/shader_vs.hlsl
@@ -1,25 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT license.
-
-#include "shader_common.hlsl"
-
-cbuffer ConstBuffer : register(b0)
-{
-    float2 positionScale;
-}
-
-// clang-format off
-PSData main(VSData data)
-// clang-format on
-{
-    PSData output;
-    output.color = data.color;
-    output.shadingType = data.shadingType;
-    output.renditionScale = data.renditionScale;
-    // positionScale is expected to be float2(2.0f / sizeInPixel.x, -2.0f / sizeInPixel.y). Together with the
-    // addition below this will transform our "position" from pixel into normalized device coordinate (NDC) space.
-    output.position.xy = (data.position + data.vertex.xy * data.size) * positionScale + float2(-1.0f, 1.0f);
-    output.position.zw = float2(0, 1);
-    output.texcoord = data.texcoord + data.vertex.xy * data.size;
-    return output;
-}