AtlasEngine: Improve glyph generation performance (#13477)

#13458 added the ability to reuse tiles from our glyph atlas texture
so that we stop running out of GPU memory for complex Unicode.
This however can result in our glyph generation being a performance issue in
edge cases, to the point that the application may feel outright unuseable.

CJK glyphs for instance can easily exceed the maximum atlas texture size
(twice the window size), but take a significant amount of CPU and GPU time to
rasterize and draw, which results in "jelly scrolling" down to ~1 FPS.
This PR improves the situation of the latter half by directly drawing
glyphs into the texture atlas without an intermediate scratchpad texture.

This reduces GPU usage by 96% on my system (33% -> 2%) which improves general
render performance by ~100% (15 -> 30 FPS). CPU usage remains the same however,
but that's not really something we can do anything about at this time.
The atlas texture is already our primary means to reduce the CPU cost after all.

## Validation Steps Performed
* Disable V-Sync for OpenConsole in NVIDIA Control Panel
* Enable `debugGlyphGenerationPerformance`
* Print the entire CJK block U+4E00..U+9FFF
* Measure the above GPU usage and FPS improvements 
  (Alternatively: Just scroll around and judge the "jellyness".)
This commit is contained in:
Leonard Hecker
2022-07-11 20:31:21 +02:00
committed by GitHub
parent 32379c29f0
commit d74b66a0a7
3 changed files with 24 additions and 78 deletions

View File

@@ -929,8 +929,7 @@ void AtlasEngine::_recreateFontDependentResources()
{
// We're likely resizing the atlas anyways and can
// thus also release any of these buffers prematurely.
_r.d2dRenderTarget.reset(); // depends on _r.atlasScratchpad
_r.atlasScratchpad.reset();
_r.d2dRenderTarget.reset(); // depends on _r.atlasBuffer
_r.atlasView.reset();
_r.atlasBuffer.reset();
}
@@ -970,8 +969,6 @@ void AtlasEngine::_recreateFontDependentResources()
_r.strikethroughPos = _api.fontMetrics.strikethroughPos;
_r.lineThickness = _api.fontMetrics.lineThickness;
_r.dpi = _api.dpi;
_r.maxEncounteredCellCount = 0;
_r.scratchpadCellWidth = 0;
}
{
// See AtlasEngine::UpdateFont.
@@ -1446,7 +1443,6 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si
const auto it = _r.glyphs.insert(std::move(key), std::move(value));
valueRef = &it->second;
_r.glyphQueue.emplace_back(&it->first, &it->second);
_r.maxEncounteredCellCount = std::max(_r.maxEncounteredCellCount, cellCount);
}
// For some reason MSVC doesn't understand that valueRef is overwritten in the branch above, resulting in:

View File

@@ -857,11 +857,9 @@ namespace Microsoft::Console::Render
void _setShaderResources() const;
void _updateConstantBuffer() const noexcept;
void _adjustAtlasSize();
void _reserveScratchpadSize(u16 minWidth);
void _processGlyphQueue();
void _drawGlyph(const AtlasQueueItem& item) const;
void _drawCursor();
void _copyScratchpadTile(uint32_t scratchpadIndex, u16x2 target, uint32_t copyFlags = 0) const noexcept;
static constexpr bool debugGlyphGenerationPerformance = false;
static constexpr bool debugGeneralPerformance = false || debugGlyphGenerationPerformance;
@@ -906,7 +904,6 @@ namespace Microsoft::Console::Render
// D2D resources
wil::com_ptr<ID3D11Texture2D> atlasBuffer;
wil::com_ptr<ID3D11ShaderResourceView> atlasView;
wil::com_ptr<ID3D11Texture2D> atlasScratchpad;
wil::com_ptr<ID2D1RenderTarget> d2dRenderTarget;
wil::com_ptr<ID2D1Brush> brush;
wil::com_ptr<IDWriteTextFormat> textFormats[2][2];
@@ -921,8 +918,6 @@ namespace Microsoft::Console::Render
u16 strikethroughPos = 0;
u16 lineThickness = 0;
u16 dpi = USER_DEFAULT_SCREEN_DPI; // invalidated by ApiInvalidations::Font, caches _api.dpi
u16 maxEncounteredCellCount = 0;
u16 scratchpadCellWidth = 0;
u16x2 atlasSizeInPixel; // invalidated by ApiInvalidations::Font
TileHashMap glyphs;
TileAllocator tileAllocator;

View File

@@ -31,7 +31,6 @@ using namespace Microsoft::Console::Render;
try
{
_adjustAtlasSize();
_reserveScratchpadSize(_r.maxEncounteredCellCount);
_processGlyphQueue();
if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::Cursor))
@@ -86,6 +85,7 @@ try
}
catch (const wil::ResultException& exception)
{
// TODO: this writes to _api.
return _handleException(exception);
}
CATCH_RETURN()
@@ -159,7 +159,7 @@ void AtlasEngine::_adjustAtlasSize()
desc.ArraySize = 1;
desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
desc.SampleDesc = { 1, 0 };
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, atlasBuffer.addressof()));
THROW_IF_FAILED(_r.device->CreateShaderResourceView(atlasBuffer.get(), nullptr, atlasView.addressof()));
}
@@ -184,38 +184,8 @@ void AtlasEngine::_adjustAtlasSize()
_r.atlasView = std::move(atlasView);
_setShaderResources();
WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting);
}
void AtlasEngine::_reserveScratchpadSize(u16 minWidth)
{
if (minWidth <= _r.scratchpadCellWidth)
{
return;
}
// The new size is the greater of ... cells wide:
// * 2
// * minWidth
// * current size * 1.5
const auto newWidth = std::max<UINT>(std::max<UINT>(2, minWidth), _r.scratchpadCellWidth + (_r.scratchpadCellWidth >> 1));
_r.d2dRenderTarget.reset();
_r.atlasScratchpad.reset();
{
D3D11_TEXTURE2D_DESC desc{};
desc.Width = _r.cellSize.x * newWidth;
desc.Height = _r.cellSize.y;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
desc.SampleDesc = { 1, 0 };
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, _r.atlasScratchpad.put()));
}
{
const auto surface = _r.atlasScratchpad.query<IDXGISurface>();
const auto surface = _r.atlasBuffer.query<IDXGISurface>();
wil::com_ptr<IDWriteRenderingParams1> renderingParams;
DWrite_GetRenderParams(_sr.dwriteFactory.get(), &_r.gamma, &_r.cleartypeEnhancedContrast, &_r.grayscaleEnhancedContrast, renderingParams.addressof());
@@ -243,8 +213,8 @@ void AtlasEngine::_reserveScratchpadSize(u16 minWidth)
_r.brush = brush.query<ID2D1Brush>();
}
_r.scratchpadCellWidth = _r.maxEncounteredCellCount;
WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer);
WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting);
}
void AtlasEngine::_processGlyphQueue()
@@ -254,10 +224,12 @@ void AtlasEngine::_processGlyphQueue()
return;
}
_r.d2dRenderTarget->BeginDraw();
for (const auto& pair : _r.glyphQueue)
{
_drawGlyph(pair);
}
THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw());
_r.glyphQueue.clear();
}
@@ -280,7 +252,7 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const
textLayout->SetTypography(_r.typography.get(), { 0, charsLength });
}
auto options = D2D1_DRAW_TEXT_OPTIONS_NONE;
auto options = D2D1_DRAW_TEXT_OPTIONS_CLIP;
// D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT enables a bunch of internal machinery
// which doesn't have to run if we know we can't use it anyways in the shader.
WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT, coloredGlyph);
@@ -294,31 +266,29 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const
_r.d2dRenderTarget->SetTextAntialiasMode(coloredGlyph ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE);
}
_r.d2dRenderTarget->BeginDraw();
// We could call
// _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED);
// now to reduce the surface that needs to be cleared, but this decreases
// performance by 10% (tested using debugGlyphGenerationPerformance).
_r.d2dRenderTarget->Clear();
_r.d2dRenderTarget->DrawTextLayout({}, textLayout.get(), _r.brush.get(), options);
THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw());
for (u32 i = 0; i < cells; ++i)
{
// Specifying NO_OVERWRITE means that the system can assume that existing references to the surface that
// may be in flight on the GPU will not be affected by the update, so the copy can proceed immediately
// (avoiding either a batch flush or the system maintaining multiple copies of the resource behind the scenes).
//
// Since our shader only draws whatever is in the atlas, and since we don't replace glyph tiles that are in use,
// we can safely (?) tell the GPU that we don't overwrite parts of our atlas that are in use.
_copyScratchpadTile(i, coords[i], D3D11_COPY_NO_OVERWRITE);
const auto coord = coords[i];
D2D1_RECT_F rect;
rect.left = static_cast<float>(coord.x) * static_cast<float>(USER_DEFAULT_SCREEN_DPI) / static_cast<float>(_r.dpi);
rect.top = static_cast<float>(coord.y) * static_cast<float>(USER_DEFAULT_SCREEN_DPI) / static_cast<float>(_r.dpi);
rect.right = rect.left + _r.cellSizeDIP.x;
rect.bottom = rect.top + _r.cellSizeDIP.y;
D2D1_POINT_2F origin;
origin.x = rect.left - i * _r.cellSizeDIP.x;
origin.y = rect.top;
_r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED);
_r.d2dRenderTarget->Clear();
_r.d2dRenderTarget->DrawTextLayout(origin, textLayout.get(), _r.brush.get(), options);
_r.d2dRenderTarget->PopAxisAlignedClip();
}
}
void AtlasEngine::_drawCursor()
{
_reserveScratchpadSize(1);
// lineWidth is in D2D's DIPs. For instance if we have a 150-200% zoom scale we want to draw a 2px wide line.
// At 150% scale lineWidth thus needs to be 1.33333... because at a zoom scale of 1.5 this results in a 2px wide line.
const auto lineWidth = std::max(1.0f, static_cast<float>((_r.dpi + USER_DEFAULT_SCREEN_DPI / 2) / USER_DEFAULT_SCREEN_DPI * USER_DEFAULT_SCREEN_DPI) / static_cast<float>(_r.dpi));
@@ -377,19 +347,4 @@ void AtlasEngine::_drawCursor()
}
THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw());
_copyScratchpadTile(0, {});
}
void AtlasEngine::_copyScratchpadTile(uint32_t scratchpadIndex, u16x2 target, uint32_t copyFlags) const noexcept
{
D3D11_BOX box;
box.left = scratchpadIndex * _r.cellSize.x;
box.top = 0;
box.front = 0;
box.right = box.left + _r.cellSize.x;
box.bottom = _r.cellSize.y;
box.back = 1;
#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function '...' which may throw exceptions (f.6).
_r.deviceContext->CopySubresourceRegion1(_r.atlasBuffer.get(), 0, target.x, target.y, 0, _r.atlasScratchpad.get(), 0, &box, copyFlags);
}