AtlasEngine: Implement LRU invalidation for glyph tiles (#13458)

So far AtlasEngine would only grow the backing texture atlas once it gets full,
without the ability to reuse tiles once it gets full. This commit adds LRU
capabilities to the glyph-to-tile hashmap, allowing us to reuse the least
recently used tiles for new ones once the atlas texture is full.
This commit uses a quadratic growth factor with power-of-2 textures,
resulting in a backing atlas of 1x to 2x the size of the window.
While AtlasEngine is still incapable of shrinking the texture, it'll now at
least not grow to 128MB or result in weird glitches under most circumstances.

## Validation Steps Performed
* Print `utf8_sequence_0-0x2ffff_assigned_printable_unseparated.txt`
  from https://github.com/bits/UTF-8-Unicode-Test-Documents
* Scroll back up to the top
* PowerShell input line is still there rendering as ASCII. 
This commit is contained in:
Leonard Hecker
2022-07-11 14:23:48 +02:00
committed by GitHub
parent bbc570d107
commit 66f4f9d9ea
3 changed files with 314 additions and 166 deletions

View File

@@ -25,20 +25,6 @@
using namespace Microsoft::Console::Render;
#pragma warning(push)
#pragma warning(disable : 26447) // The function is declared 'noexcept' but calls function 'operator()()' which may throw exceptions (f.6).
__declspec(noinline) static void showOOMWarning() noexcept
{
[[maybe_unused]] static const auto once = []() {
std::thread t{ []() noexcept {
MessageBoxW(nullptr, L"This application is using a highly experimental text rendering engine and has run out of memory. Text rendering will start to behave irrationally and you should restart this process.", L"Out Of Memory", MB_ICONERROR | MB_OK);
} };
t.detach();
return false;
}();
}
#pragma warning(pop)
struct TextAnalyzer final : IDWriteTextAnalysisSource, IDWriteTextAnalysisSink
{
constexpr TextAnalyzer(const std::vector<wchar_t>& text, std::vector<AtlasEngine::TextAnalyzerResult>& results) noexcept :
@@ -365,12 +351,14 @@ try
}
}
_api.dirtyRect = til::rect{
0,
_api.invalidatedRows.x,
_api.cellCount.x,
_api.invalidatedRows.y,
};
if constexpr (debugGlyphGenerationPerformance)
{
_api.dirtyRect = til::rect{ 0, 0, _api.cellCount.x, _api.cellCount.y };
}
else
{
_api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _api.cellCount.x, _api.invalidatedRows.y };
}
return S_OK;
}
@@ -394,7 +382,7 @@ CATCH_RETURN()
[[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept
{
return continuousRedraw;
return debugGeneralPerformance;
}
void AtlasEngine::WaitUntilCanRender() noexcept
@@ -559,9 +547,10 @@ try
const auto point = options.coordCursor;
// TODO: options.coordCursor can contain invalid out of bounds coordinates when
// the window is being resized and the cursor is on the last line of the viewport.
const auto x = gsl::narrow_cast<uint16_t>(clamp<int>(point.X, 0, _r.cellCount.x - 1));
const auto y = gsl::narrow_cast<uint16_t>(clamp<int>(point.Y, 0, _r.cellCount.y - 1));
const auto right = gsl::narrow_cast<uint16_t>(x + 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar)));
const auto x = gsl::narrow_cast<uint16_t>(clamp(point.X, 0, _r.cellCount.x - 1));
const auto y = gsl::narrow_cast<uint16_t>(clamp(point.Y, 0, _r.cellCount.y - 1));
const auto cursorWidth = 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar));
const auto right = gsl::narrow_cast<uint16_t>(clamp(x + cursorWidth, 0, _r.cellCount.x - 0));
const auto bottom = gsl::narrow_cast<uint16_t>(y + 1);
_setCellFlags({ x, y, right, bottom }, CellFlags::Cursor, CellFlags::Cursor);
}
@@ -775,7 +764,7 @@ void AtlasEngine::_createSwapChain()
// D3D swap chain setup (the thing that allows us to present frames on the screen)
{
const auto supportsFrameLatencyWaitableObject = IsWindows8Point1OrGreater();
const auto supportsFrameLatencyWaitableObject = !debugGeneralPerformance && IsWindows8Point1OrGreater();
// With C++20 we'll finally have designated initializers.
DXGI_SWAP_CHAIN_DESC1 desc{};
@@ -899,6 +888,7 @@ void AtlasEngine::_recreateSizeDependentResources()
// (40x on AMD Zen1-3, which have a rep movsb performance issue. MSFT:33358259.)
_r.cells = Buffer<Cell, 32>{ totalCellCount };
_r.cellCount = _api.cellCount;
_r.tileAllocator.setMaxArea(_api.sizeInPixel);
// .clear() doesn't free the memory of these buffers.
// This code allows them to shrink again.
@@ -947,32 +937,14 @@ void AtlasEngine::_recreateFontDependentResources()
// D3D
{
// TODO: Consider using IDXGIAdapter3::QueryVideoMemoryInfo() and IDXGIAdapter3::RegisterVideoMemoryBudgetChangeNotificationEvent()
// That way we can make better to use of a user's available video memory.
static constexpr size_t sizePerPixel = 4;
static constexpr size_t sizeLimit = D3D10_REQ_RESOURCE_SIZE_IN_MEGABYTES * 1024 * 1024;
const size_t dimensionLimit = _r.device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ? D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION : D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
const size_t csx = _api.fontMetrics.cellSize.x;
const size_t csy = _api.fontMetrics.cellSize.y;
const auto xLimit = (dimensionLimit / csx) * csx;
const auto pixelsPerCellRow = xLimit * csy;
const auto yLimitDueToDimension = (dimensionLimit / csy) * csy;
const auto yLimitDueToSize = ((sizeLimit / sizePerPixel) / pixelsPerCellRow) * csy;
const auto yLimit = std::min(yLimitDueToDimension, yLimitDueToSize);
const auto scaling = GetScaling();
_r.cellSizeDIP.x = static_cast<float>(_api.fontMetrics.cellSize.x) / scaling;
_r.cellSizeDIP.y = static_cast<float>(_api.fontMetrics.cellSize.y) / scaling;
_r.cellSize = _api.fontMetrics.cellSize;
_r.cellCount = _api.cellCount;
// x/yLimit are strictly smaller than dimensionLimit, which is smaller than a u16.
_r.atlasSizeInPixelLimit = u16x2{ gsl::narrow_cast<u16>(xLimit), gsl::narrow_cast<u16>(yLimit) };
_r.atlasSizeInPixel = { 0, 0 };
// The first Cell at {0, 0} is always our cursor texture.
// --> The first glyph starts at {1, 0}.
_r.atlasPosition.x = _api.fontMetrics.cellSize.x;
_r.atlasPosition.y = 0;
_r.tileAllocator = TileAllocator{ _r.cellSize, _api.sizeInPixel };
_r.glyphs = {};
_r.glyphQueue = {};
@@ -1118,26 +1090,6 @@ void AtlasEngine::_setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noe
}
}
AtlasEngine::u16x2 AtlasEngine::_allocateAtlasTile() noexcept
{
const auto ret = _r.atlasPosition;
_r.atlasPosition.x += _r.cellSize.x;
if (_r.atlasPosition.x >= _r.atlasSizeInPixelLimit.x)
{
_r.atlasPosition.x = 0;
_r.atlasPosition.y += _r.cellSize.y;
if (_r.atlasPosition.y >= _r.atlasSizeInPixelLimit.y)
{
_r.atlasPosition.x = _r.cellSize.x;
_r.atlasPosition.y = 0;
showOOMWarning();
}
}
return ret;
}
void AtlasEngine::_flushBufferLine()
{
if (_api.bufferLine.empty())
@@ -1449,11 +1401,10 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si
auto attributes = _api.attributes;
attributes.cellCount = cellCount;
const auto [it, inserted] = _r.glyphs.emplace(std::piecewise_construct, std::forward_as_tuple(attributes, gsl::narrow<u16>(charCount), chars), std::forward_as_tuple());
const auto& key = it->first;
auto& value = it->second;
AtlasKey key{ attributes, gsl::narrow<u16>(charCount), chars };
const AtlasValue* valueRef = _r.glyphs.find(key);
if (inserted)
if (!valueRef)
{
// Do fonts exist *in practice* which contain both colored and uncolored glyphs? I'm pretty sure...
// However doing it properly means using either of:
@@ -1481,17 +1432,28 @@ void AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, si
WI_SetFlagIf(flags, CellFlags::ColoredGlyph, fontFace2 && fontFace2->IsColorFont());
}
const auto coords = value.initialize(flags, cellCount);
// The AtlasValue constructor fills the `coords` variable with a pointer to an array
// of at least `cellCount` elements. I did this so that I don't have to type out
// `value.data()->coords` again, despite the constructor having all the data necessary.
u16x2* coords;
AtlasValue value{ flags, cellCount, &coords };
for (u16 i = 0; i < cellCount; ++i)
{
coords[i] = _allocateAtlasTile();
coords[i] = _r.tileAllocator.allocate(_r.glyphs);
}
_r.glyphQueue.push_back(AtlasQueueItem{ &key, &value });
const auto it = _r.glyphs.insert(std::move(key), std::move(value));
valueRef = &it->second;
_r.glyphQueue.emplace_back(&it->first, &it->second);
_r.maxEncounteredCellCount = std::max(_r.maxEncounteredCellCount, cellCount);
}
const auto valueData = value.data();
// For some reason MSVC doesn't understand that valueRef is overwritten in the branch above, resulting in:
// C26430: Symbol 'valueRef' is not tested for nullness on all paths (f.23).
__assume(valueRef != nullptr);
const auto valueData = valueRef->data();
const auto coords = &valueData->coords[0];
const auto data = _getCell(x1, _api.lastPaintBufferLineCoord.y);

View File

@@ -106,12 +106,6 @@ namespace Microsoft::Console::Render
T y{};
ATLAS_POD_OPS(vec2)
constexpr vec2 operator/(const vec2& rhs) noexcept
{
assert(rhs.x != 0 && rhs.y != 0);
return { gsl::narrow_cast<T>(x / rhs.x), gsl::narrow_cast<T>(y / rhs.y) };
}
};
template<typename T>
@@ -243,6 +237,26 @@ namespace Microsoft::Console::Render
return _size;
}
T* begin() noexcept
{
return _data;
}
T* begin() const noexcept
{
return _data;
}
T* end() noexcept
{
return _data + _size;
}
T* end() const noexcept
{
return _data + _size;
}
private:
// These two functions don't need to use scoped objects or standard allocators,
// since this class is in fact an scoped allocator object itself.
@@ -302,23 +316,8 @@ namespace Microsoft::Console::Render
constexpr SmallObjectOptimizer() = default;
SmallObjectOptimizer(const SmallObjectOptimizer& other)
{
const auto otherData = other.data();
const auto otherSize = other.size();
const auto data = initialize(otherSize);
memcpy(data, otherData, otherSize);
}
SmallObjectOptimizer& operator=(const SmallObjectOptimizer& other)
{
if (this != &other)
{
delete this;
new (this) SmallObjectOptimizer(other);
}
return &this;
}
SmallObjectOptimizer(const SmallObjectOptimizer& other) = delete;
SmallObjectOptimizer& operator=(const SmallObjectOptimizer& other) = delete;
SmallObjectOptimizer(SmallObjectOptimizer&& other) noexcept
{
@@ -484,14 +483,6 @@ namespace Microsoft::Console::Render
}
};
struct AtlasKeyHasher
{
size_t operator()(const AtlasKey& key) const noexcept
{
return key.hash();
}
};
struct AtlasValueData
{
CellFlags flags = CellFlags::None;
@@ -500,15 +491,14 @@ namespace Microsoft::Console::Render
struct AtlasValue
{
constexpr AtlasValue() = default;
u16x2* initialize(CellFlags flags, u16 cellCount)
AtlasValue(CellFlags flags, u16 cellCount, u16x2** coords)
{
__assume(coords != nullptr);
const auto size = dataSize(cellCount);
const auto data = _data.initialize(size);
WI_SetFlagIf(flags, CellFlags::Inlined, _data.would_inline(size));
data->flags = flags;
return &data->coords[0];
*coords = &data->coords[0];
}
const AtlasValueData* data() const noexcept
@@ -531,6 +521,248 @@ namespace Microsoft::Console::Render
const AtlasValue* value;
};
struct AtlasKeyHasher
{
using is_transparent = int;
size_t operator()(const AtlasKey& v) const noexcept
{
return v.hash();
}
size_t operator()(const std::list<std::pair<AtlasKey, AtlasValue>>::iterator& v) const noexcept
{
return operator()(v->first);
}
};
struct AtlasKeyEq
{
using is_transparent = int;
bool operator()(const AtlasKey& a, const std::list<std::pair<AtlasKey, AtlasValue>>::iterator& b) const noexcept
{
return a == b->first;
}
bool operator()(const std::list<std::pair<AtlasKey, AtlasValue>>::iterator& a, const std::list<std::pair<AtlasKey, AtlasValue>>::iterator& b) const noexcept
{
return operator()(a->first, b);
}
};
struct TileHashMap
{
TileHashMap() noexcept = default;
AtlasValue* find(const AtlasKey& key)
{
const auto it = _map.find(key);
if (it != _map.end())
{
// Move the key to the head of the LRU queue.
_lru.splice(_lru.begin(), _lru, *it);
return &(*it)->second;
}
return nullptr;
}
std::list<std::pair<AtlasKey, AtlasValue>>::iterator insert(AtlasKey&& key, AtlasValue&& value)
{
// Insert the key/value right at the head of the LRU queue, just like find().
//
// && decays to & if the argument is named, because C++ is a simple language
// and so you have to std::move it again, because C++ is a simple language.
_lru.emplace_front(std::move(key), std::move(value));
auto it = _lru.begin();
_map.emplace(it);
return it;
}
void popOldestTiles(std::vector<u16x2>& out) noexcept
{
Expects(!_lru.empty());
const auto it = --_lru.end();
const auto key = it->first.data();
const auto value = it->second.data();
const auto beg = &value->coords[0];
const auto cellCount = key->attributes.cellCount;
const auto offset = out.size();
out.resize(offset + cellCount);
std::copy_n(beg, cellCount, out.begin() + offset);
_map.erase(it);
_lru.pop_back();
}
private:
// Please don't copy this code. It's a proof-of-concept.
// If you need a LRU hash-map, write a custom one with an intrusive
// prev/next linked list (it's easier than you might think!).
std::list<std::pair<AtlasKey, AtlasValue>> _lru;
std::unordered_set<std::list<std::pair<AtlasKey, AtlasValue>>::iterator, AtlasKeyHasher, AtlasKeyEq> _map;
};
// TileAllocator yields `tileSize`-sized tiles for our texture atlas.
// While doing so it'll grow the atlas size() by a factor of 2 if needed.
// Once the setMaxArea() is exceeded it'll stop growing and instead
// snatch tiles back from the oldest TileHashMap entries.
//
// The quadratic growth works by alternating the size()
// between an 1:1 and 2:1 aspect ratio, like so:
// (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256)
// These initial tile positions allocate() returns are in a Z
// pattern over the available space in the atlas texture.
// You can log the `return _pos;` in allocate() using "Tracepoint"s
// in Visual Studio if you'd like to understand the Z pattern better.
struct TileAllocator
{
TileAllocator() = default;
explicit TileAllocator(u16x2 tileSize, u16x2 windowSize) noexcept :
_tileSize{ tileSize }
{
const auto initialSize = std::max(u16{ _absoluteMinSize }, std::bit_ceil(std::max(tileSize.x, tileSize.y)));
_size = { initialSize, initialSize };
_limit = { gsl::narrow_cast<u16>(initialSize - _tileSize.x), gsl::narrow_cast<u16>(initialSize - _tileSize.y) };
setMaxArea(windowSize);
}
u16x2 size() const noexcept
{
return _size;
}
void setMaxArea(u16x2 windowSize) noexcept
{
// _generate() uses a quadratic growth factor for _size's area.
// Once it exceeds the _maxArea, it'll start snatching tiles back from the
// TileHashMap using its LRU queue. Since _size will at least reach half
// of _maxSize (because otherwise it could still grow by a factor of 2)
// and by ensuring that _maxArea is at least twice the window size
// we make it impossible* for _generate() to return false before
// TileHashMap contains at least as many tiles as the window contains.
// If that wasn't the case we'd snatch and reuse tiles that are still in use.
// * lhecker's legal department:
// No responsibility is taken for the correctness of this information.
setMaxArea(static_cast<size_t>(windowSize.x) * static_cast<size_t>(windowSize.y) * 2);
}
void setMaxArea(size_t max) noexcept
{
// We need to reserve at least 1 extra `tileArea`, because the tile
// at position {0,0} is already reserved for the cursor texture.
const auto tileArea = static_cast<size_t>(_tileSize.x) * static_cast<size_t>(_tileSize.y);
_maxArea = clamp(max + tileArea, _absoluteMinArea, _absoluteMaxArea);
_updateCanGenerate();
}
u16x2 allocate(TileHashMap& map) noexcept
{
if (_generate())
{
return _pos;
}
if (_cache.empty())
{
map.popOldestTiles(_cache);
}
const auto pos = _cache.back();
_cache.pop_back();
return pos;
}
private:
// This method generates the Z pattern coordinates
// described above in the TileAllocator comment.
bool _generate() noexcept
{
if (!_canGenerate)
{
return false;
}
// We need to backup _pos/_size in case our resize below exceeds _maxArea.
// In that case we have to restore _pos/_size so that if _maxArea is increased
// (window resize for instance), we can pick up where we previously left off.
const auto pos = _pos;
_pos.x += _tileSize.x;
if (_pos.x <= _limit.x)
{
return true;
}
_pos.y += _tileSize.y;
if (_pos.y <= _limit.y)
{
_pos.x = _originX;
return true;
}
// Same as for pos.
const auto size = _size;
// This implements a quadratic growth factor for _size, by
// alternating between an 1:1 and 2:1 aspect ratio, like so:
// (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256)
// This behavior is strictly dependent on setMaxArea(u16x2)'s
// behavior. See it's comment for an explanation.
if (_size.x == _size.y)
{
_size.x *= 2;
_pos.y = 0;
}
else
{
_size.y *= 2;
_pos.x = 0;
}
_updateCanGenerate();
if (_canGenerate)
{
_limit = { gsl::narrow_cast<u16>(_size.x - _tileSize.x), gsl::narrow_cast<u16>(_size.y - _tileSize.y) };
_originX = _pos.x;
}
else
{
_size = size;
_pos = pos;
}
return _canGenerate;
}
void _updateCanGenerate() noexcept
{
_canGenerate = static_cast<size_t>(_size.x) * static_cast<size_t>(_size.y) <= _maxArea;
}
static constexpr u16 _absoluteMinSize = 256;
static constexpr size_t _absoluteMinArea = _absoluteMinSize * _absoluteMinSize;
// TODO: Consider using IDXGIAdapter3::QueryVideoMemoryInfo() and IDXGIAdapter3::RegisterVideoMemoryBudgetChangeNotificationEvent()
// That way we can make better to use of a user's available video memory.
static constexpr size_t _absoluteMaxArea = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION * D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
std::vector<u16x2> _cache;
size_t _maxArea = _absoluteMaxArea;
u16x2 _tileSize;
u16x2 _size;
u16x2 _limit;
// Since _pos starts at {0, 0}, it'll result in the first allocate()d tile to be at {_tileSize.x, 0}.
// Coincidentially that's exactly what we want as the cursor texture lives at {0, 0}.
u16x2 _pos;
u16 _originX = 0;
// Indicates whether we've exhausted our Z pattern across the atlas texture.
// If this is false, we have to snatch tiles back from TileHashMap.
bool _canGenerate = true;
};
struct CachedCursorOptions
{
u32 cursorColor = INVALID_COLOR;
@@ -613,7 +845,6 @@ namespace Microsoft::Console::Render
const Buffer<DWRITE_FONT_AXIS_VALUE>& _getTextFormatAxis(bool bold, bool italic) const noexcept;
Cell* _getCell(u16 x, u16 y) noexcept;
void _setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noexcept;
u16x2 _allocateAtlasTile() noexcept;
void _flushBufferLine();
void _emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2);
@@ -634,7 +865,6 @@ namespace Microsoft::Console::Render
static constexpr bool debugGlyphGenerationPerformance = false;
static constexpr bool debugGeneralPerformance = false || debugGlyphGenerationPerformance;
static constexpr bool continuousRedraw = false || debugGeneralPerformance;
static constexpr u16 u16min = 0x0000;
static constexpr u16 u16max = 0xffff;
@@ -693,10 +923,9 @@ namespace Microsoft::Console::Render
u16 dpi = USER_DEFAULT_SCREEN_DPI; // invalidated by ApiInvalidations::Font, caches _api.dpi
u16 maxEncounteredCellCount = 0;
u16 scratchpadCellWidth = 0;
u16x2 atlasSizeInPixelLimit; // invalidated by ApiInvalidations::Font
u16x2 atlasSizeInPixel; // invalidated by ApiInvalidations::Font
u16x2 atlasPosition;
std::unordered_map<AtlasKey, AtlasValue, AtlasKeyHasher> glyphs;
TileHashMap glyphs;
TileAllocator tileAllocator;
std::vector<AtlasQueueItem> glyphQueue;
f32 gamma = 0;

View File

@@ -64,7 +64,7 @@ try
// See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method:
// > For every frame it renders, the app should wait on this handle before starting any rendering operations.
// > Note that this requirement includes the first frame the app renders with the swap chain.
assert(_r.frameLatencyWaitableObjectUsed);
assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed);
// > IDXGISwapChain::Present: Partial Presentation (using a dirty rects or scroll) is not supported
// > for SwapChains created with DXGI_SWAP_EFFECT_DISCARD or DXGI_SWAP_EFFECT_FLIP_DISCARD.
@@ -139,65 +139,22 @@ void AtlasEngine::_updateConstantBuffer() const noexcept
void AtlasEngine::_adjustAtlasSize()
{
if (_r.atlasPosition.y < _r.atlasSizeInPixel.y && _r.atlasPosition.x < _r.atlasSizeInPixel.x)
// Only grow the atlas texture if our tileAllocator needs it to be larger.
// We have no way of shrinking our tileAllocator at the moment,
// so technically a `requiredSize != _r.atlasSizeInPixel`
// comparison would be sufficient, but better safe than sorry.
const auto requiredSize = _r.tileAllocator.size();
if (requiredSize.y <= _r.atlasSizeInPixel.y && requiredSize.x <= _r.atlasSizeInPixel.x)
{
return;
}
const u32 limitX = _r.atlasSizeInPixelLimit.x;
const u32 limitY = _r.atlasSizeInPixelLimit.y;
const u32 posX = _r.atlasPosition.x;
const u32 posY = _r.atlasPosition.y;
const u32 cellX = _r.cellSize.x;
const u32 cellY = _r.cellSize.y;
const auto perCellArea = cellX * cellY;
// The texture atlas is filled like this:
// x →
// y +--------------+
// ↓ |XXXXXXXXXXXXXX|
// |XXXXXXXXXXXXXX|
// |XXXXX↖ |
// | | |
// +------|-------+
// This is where _r.atlasPosition points at.
//
// Each X is a glyph texture tile that's occupied.
// We can compute the area of pixels consumed by adding the first
// two lines of X (rectangular) together with the last line of X.
const auto currentArea = posY * limitX + posX * cellY;
// minArea reserves enough room for 64 cells in all cases (mainly during startup).
const auto minArea = 64 * perCellArea;
auto newArea = std::max(minArea, currentArea);
// I want the texture to grow exponentially similar to std::vector, as this
// ensures we don't need to resize the texture again right after having done.
// This rounds newArea up to the next power of 2.
unsigned long int index;
_BitScanReverse(&index, newArea); // newArea can't be 0
newArea = u32{ 1 } << (index + 1);
const auto pixelPerRow = limitX * cellY;
// newArea might be just large enough that it spans N full rows of cells and one additional row
// just barely. This algorithm rounds up newArea to the _next_ multiple of cellY.
const auto wantedHeight = (newArea + pixelPerRow - 1) / pixelPerRow * cellY;
// The atlas might either be a N rows of full width (xLimit) or just one
// row (where wantedHeight == cellY) that doesn't quite fill it's maximum width yet.
const auto wantedWidth = wantedHeight != cellY ? limitX : newArea / perCellArea * cellX;
// We know that limitX/limitY were u16 originally, and thus it's safe to narrow_cast it back.
const auto height = gsl::narrow_cast<u16>(std::min(limitY, wantedHeight));
const auto width = gsl::narrow_cast<u16>(std::min(limitX, wantedWidth));
assert(width != 0);
assert(height != 0);
wil::com_ptr<ID3D11Texture2D> atlasBuffer;
wil::com_ptr<ID3D11ShaderResourceView> atlasView;
{
D3D11_TEXTURE2D_DESC desc{};
desc.Width = width;
desc.Height = height;
desc.Width = requiredSize.x;
desc.Height = requiredSize.y;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
@@ -222,7 +179,7 @@ void AtlasEngine::_adjustAtlasSize()
_r.deviceContext->CopySubresourceRegion1(atlasBuffer.get(), 0, 0, 0, 0, _r.atlasBuffer.get(), 0, &box, D3D11_COPY_NO_OVERWRITE);
}
_r.atlasSizeInPixel = u16x2{ width, height };
_r.atlasSizeInPixel = requiredSize;
_r.atlasBuffer = std::move(atlasBuffer);
_r.atlasView = std::move(atlasView);
_setShaderResources();
@@ -346,7 +303,7 @@ void AtlasEngine::_drawGlyph(const AtlasQueueItem& item) const
_r.d2dRenderTarget->DrawTextLayout({}, textLayout.get(), _r.brush.get(), options);
THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw());
for (uint32_t i = 0; i < cells; ++i)
for (u32 i = 0; i < cells; ++i)
{
// Specifying NO_OVERWRITE means that the system can assume that existing references to the surface that
// may be in flight on the GPU will not be affected by the update, so the copy can proceed immediately