mirror of
https://github.com/microsoft/terminal.git
synced 2026-04-14 18:21:02 +00:00
## Summary of the Pull Request Fixes a bug where a partially visible URL would not be detected. This is fixed by expanding the search space by 1 viewport height in both directions. The `_patternIntervalTree` now operates in the absolute-buffer space as opposed to the viewport-relative space. It's a bit of an annoying change, but the alternative would be to keep track of the offset used by the method above, which I find more annoying, personally. As a part of this change, I made it a bit more clear when something is viewport-relative vs buffer-absolute. Regarding mark mode hyperlink navigation, now that everything is in the absolute-buffer space, I'm able to fix some of the issues in #13854. I removed `_selectionIsTargetingUrl` and fixed/validated navigating to hyperlinks that are partially visible. ## Validation Steps Performed Detects URL that is... ✅ fully visible ✅ partially cropped off the top ✅ partially cropped off the bottom ✅ Above scenarios work with mark mode hyperlink navigation ✅Tests added Closes #18177 Closes #13854
441 lines
16 KiB
C++
441 lines
16 KiB
C++
// Copyright (c) Microsoft Corporation.
|
|
// Licensed under the MIT license.
|
|
|
|
#include "precomp.h"
|
|
#include "UTextAdapter.h"
|
|
|
|
#include "textBuffer.hpp"
|
|
|
|
// All of these are somewhat annoying when trying to implement RefcountBuffer.
|
|
// You can't stuff a unique_ptr into ut->q (= void*) after all.
|
|
#pragma warning(disable : 26402) // Return a scoped object instead of a heap-allocated if it has a move constructor (r.3).
|
|
#pragma warning(disable : 26403) // Reset or explicitly delete an owner<T> pointer '...' (r.3).
|
|
#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique<T> instead (r.11).
|
|
|
|
struct RowRange
|
|
{
|
|
til::CoordType begin;
|
|
til::CoordType end;
|
|
};
|
|
|
|
struct RefcountBuffer
|
|
{
|
|
size_t references;
|
|
size_t capacity;
|
|
wchar_t data[1];
|
|
|
|
static RefcountBuffer* EnsureCapacityForOverwrite(RefcountBuffer* buffer, size_t capacity)
|
|
{
|
|
// We must not just ensure that `buffer` has at least `capacity`, but also that its reference count is <= 1, because otherwise we would resize a shared buffer.
|
|
if (buffer != nullptr && buffer->references <= 1 && buffer->capacity >= capacity)
|
|
{
|
|
return buffer;
|
|
}
|
|
|
|
const auto oldCapacity = buffer ? buffer->capacity << 1 : 0;
|
|
const auto newCapacity = std::max(capacity + 128, oldCapacity);
|
|
const auto newBuffer = static_cast<RefcountBuffer*>(::operator new(sizeof(RefcountBuffer) - sizeof(data) + newCapacity * sizeof(wchar_t)));
|
|
|
|
if (!newBuffer)
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
if (buffer)
|
|
{
|
|
buffer->Release();
|
|
}
|
|
|
|
// Copying the old buffer's data is not necessary because utextAccess() will scribble right over it.
|
|
newBuffer->references = 1;
|
|
newBuffer->capacity = newCapacity;
|
|
return newBuffer;
|
|
}
|
|
|
|
void AddRef() noexcept
|
|
{
|
|
// With our usage patterns, either of these two would indicate
|
|
// an unbalanced AddRef/Release or a memory corruption.
|
|
assert(references > 0 && references < 1000);
|
|
references++;
|
|
}
|
|
|
|
void Release() noexcept
|
|
{
|
|
// With our usage patterns, either of these two would indicate
|
|
// an unbalanced AddRef/Release or a memory corruption.
|
|
assert(references > 0 && references < 1000);
|
|
if (--references == 0)
|
|
{
|
|
::operator delete(this);
|
|
}
|
|
}
|
|
};
|
|
|
|
constexpr size_t& accessLength(UText* ut) noexcept
|
|
{
|
|
static_assert(sizeof(ut->p) == sizeof(size_t));
|
|
return *std::bit_cast<size_t*>(&ut->p);
|
|
}
|
|
|
|
constexpr RefcountBuffer*& accessBuffer(UText* ut) noexcept
|
|
{
|
|
static_assert(sizeof(ut->q) == sizeof(RefcountBuffer*));
|
|
return *std::bit_cast<RefcountBuffer**>(&ut->q);
|
|
}
|
|
|
|
constexpr RowRange& accessRowRange(UText* ut) noexcept
|
|
{
|
|
static_assert(sizeof(ut->a) == sizeof(RowRange));
|
|
return *std::bit_cast<RowRange*>(&ut->a);
|
|
}
|
|
|
|
constexpr til::CoordType& accessCurrentRow(UText* ut) noexcept
|
|
{
|
|
return ut->b;
|
|
}
|
|
|
|
// An excerpt from the ICU documentation:
|
|
//
|
|
// Clone a UText. Much like opening a UText where the source text is itself another UText.
|
|
//
|
|
// A shallow clone replicates only the UText data structures; it does not make
|
|
// a copy of the underlying text. Shallow clones can be used as an efficient way to
|
|
// have multiple iterators active in a single text string that is not being modified.
|
|
//
|
|
// A shallow clone operation must not fail except for truly exceptional conditions such
|
|
// as memory allocation failures.
|
|
//
|
|
// @param dest A UText struct to be filled in with the result of the clone operation,
|
|
// or NULL if the clone function should heap-allocate a new UText struct.
|
|
// @param src The UText to be cloned.
|
|
// @param deep true to request a deep clone, false for a shallow clone.
|
|
// @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR should
|
|
// be returned if the text provider is unable to clone the original text.
|
|
// @return The newly created clone, or NULL if the clone operation failed.
|
|
static UText* U_CALLCONV utextClone(UText* dest, const UText* src, UBool deep, UErrorCode* status) noexcept
|
|
{
|
|
__assume(status != nullptr);
|
|
|
|
if (deep)
|
|
{
|
|
*status = U_UNSUPPORTED_ERROR;
|
|
return dest;
|
|
}
|
|
|
|
dest = utext_setup(dest, 0, status);
|
|
if (*status > U_ZERO_ERROR)
|
|
{
|
|
return dest;
|
|
}
|
|
|
|
memcpy(dest, src, sizeof(UText));
|
|
if (const auto buf = accessBuffer(dest))
|
|
{
|
|
buf->AddRef();
|
|
}
|
|
return dest;
|
|
}
|
|
|
|
// An excerpt from the ICU documentation:
|
|
//
|
|
// Gets the length of the text.
|
|
//
|
|
// @param ut the UText to get the length of.
|
|
// @return the length, in the native units of the original text string.
|
|
static int64_t U_CALLCONV utextNativeLength(UText* ut) noexcept
|
|
try
|
|
{
|
|
auto length = accessLength(ut);
|
|
|
|
if (!length)
|
|
{
|
|
const auto& textBuffer = *static_cast<const TextBuffer*>(ut->context);
|
|
const auto range = accessRowRange(ut);
|
|
|
|
for (til::CoordType y = range.begin; y < range.end; ++y)
|
|
{
|
|
const auto& row = textBuffer.GetRowByOffset(y);
|
|
// Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here.
|
|
length += row.GetText().size() + !row.WasWrapForced();
|
|
}
|
|
|
|
accessLength(ut) = length;
|
|
}
|
|
|
|
return gsl::narrow_cast<int64_t>(length);
|
|
}
|
|
catch (...)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
// An excerpt from the ICU documentation:
|
|
//
|
|
// Get the description of the text chunk containing the text at a requested native index.
|
|
// The UText's iteration position will be left at the requested index.
|
|
// If the index is out of bounds, the iteration position will be left
|
|
// at the start or end of the string, as appropriate.
|
|
//
|
|
// @param ut the UText being accessed.
|
|
// @param nativeIndex Requested index of the text to be accessed.
|
|
// @param forward If true, then the returned chunk must contain text starting from the index, so that start<=index<limit.
|
|
// If false, then the returned chunk must contain text before the index, so that start<index<=limit.
|
|
// @return True if the requested index could be accessed. The chunk will contain the requested text.
|
|
// False value if a chunk cannot be accessed (the requested index is out of bounds).
|
|
static UBool U_CALLCONV utextAccess(UText* ut, int64_t nativeIndex, UBool forward) noexcept
|
|
try
|
|
{
|
|
auto neededIndex = nativeIndex;
|
|
// This will make it simpler for us to search the row that contains the nativeIndex,
|
|
// because we'll now only need to check for `start<=index<limit` and nothing else.
|
|
if (!forward)
|
|
{
|
|
neededIndex--;
|
|
}
|
|
|
|
const auto& textBuffer = *static_cast<const TextBuffer*>(ut->context);
|
|
const auto range = accessRowRange(ut);
|
|
const auto startOld = ut->chunkNativeStart;
|
|
const auto limitOld = ut->chunkNativeLimit;
|
|
auto start = startOld;
|
|
auto limit = limitOld;
|
|
|
|
if (neededIndex < startOld || neededIndex >= limitOld)
|
|
{
|
|
auto y = accessCurrentRow(ut);
|
|
std::wstring_view text;
|
|
bool wasWrapForced = false;
|
|
|
|
if (neededIndex < start)
|
|
{
|
|
do
|
|
{
|
|
--y;
|
|
if (y < range.begin)
|
|
{
|
|
break;
|
|
}
|
|
|
|
const auto& row = textBuffer.GetRowByOffset(y);
|
|
text = row.GetText();
|
|
wasWrapForced = row.WasWrapForced();
|
|
|
|
limit = start;
|
|
// Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here.
|
|
start -= text.size() + !wasWrapForced;
|
|
} while (neededIndex < start);
|
|
}
|
|
else
|
|
{
|
|
do
|
|
{
|
|
++y;
|
|
if (y >= range.end)
|
|
{
|
|
break;
|
|
}
|
|
|
|
const auto& row = textBuffer.GetRowByOffset(y);
|
|
text = row.GetText();
|
|
wasWrapForced = row.WasWrapForced();
|
|
|
|
start = limit;
|
|
// Later down below we'll add a newline to the text if !wasWrapForced, so we need to account for that here.
|
|
limit += text.size() + !wasWrapForced;
|
|
} while (neededIndex >= limit);
|
|
}
|
|
|
|
assert(start >= 0);
|
|
// If we have already calculated the total length we can also assert that the limit is in range.
|
|
assert(ut->p == nullptr || static_cast<size_t>(limit) <= accessLength(ut));
|
|
|
|
// Even if we went out-of-bounds, we still need to update the chunkContents to contain the first/last chunk.
|
|
if (limit != limitOld)
|
|
{
|
|
if (!wasWrapForced)
|
|
{
|
|
const auto newSize = text.size() + 1;
|
|
const auto buffer = RefcountBuffer::EnsureCapacityForOverwrite(accessBuffer(ut), newSize);
|
|
|
|
memcpy(&buffer->data[0], text.data(), text.size() * sizeof(wchar_t));
|
|
til::at(buffer->data, text.size()) = L'\n';
|
|
|
|
text = { &buffer->data[0], newSize };
|
|
accessBuffer(ut) = buffer;
|
|
}
|
|
|
|
accessCurrentRow(ut) = y;
|
|
ut->chunkNativeStart = start;
|
|
ut->chunkNativeLimit = limit;
|
|
ut->chunkLength = gsl::narrow_cast<int32_t>(text.size());
|
|
#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1).
|
|
ut->chunkContents = reinterpret_cast<const char16_t*>(text.data());
|
|
ut->nativeIndexingLimit = ut->chunkLength;
|
|
}
|
|
}
|
|
|
|
// The ICU documentation is a little bit misleading. It states:
|
|
// > @param forward [...] If true, start<=index<limit. If false, [...] start<index<=limit.
|
|
// but that's just for finding the target chunk. The chunkOffset is not actually constrained to that!
|
|
// std::clamp will perform a<=b<=c, which is what we want.
|
|
const auto clampedIndex = std::clamp(nativeIndex, start, limit);
|
|
auto offset = gsl::narrow_cast<int32_t>(clampedIndex - start);
|
|
// Don't leave the offset on a trailing surrogate pair. See U16_SET_CP_START.
|
|
// This assumes that the TextBuffer contains valid UTF-16 which may theoretically not be the case.
|
|
if (offset > 0 && offset < ut->chunkLength && U16_IS_TRAIL(til::at(ut->chunkContents, offset)))
|
|
{
|
|
offset--;
|
|
}
|
|
ut->chunkOffset = offset;
|
|
|
|
return neededIndex >= start && neededIndex < limit;
|
|
}
|
|
catch (...)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// An excerpt from the ICU documentation:
|
|
//
|
|
// Extract text from a UText into a UChar buffer.
|
|
// The size (number of 16 bit UChars) in the data to be extracted is returned.
|
|
// The full amount is returned, even when the specified buffer size is smaller.
|
|
// The extracted string must be NUL-terminated if there is sufficient space in the destination buffer.
|
|
//
|
|
// @param ut the UText from which to extract data.
|
|
// @param nativeStart the native index of the first character to extract.
|
|
// @param nativeLimit the native string index of the position following the last character to extract.
|
|
// @param dest the UChar (UTF-16) buffer into which the extracted text is placed
|
|
// @param destCapacity The size, in UChars, of the destination buffer. May be zero for precomputing the required size.
|
|
// @param status receives any error status. If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for preflighting.
|
|
// @return Number of UChars in the data. Does not include a trailing NUL.
|
|
//
|
|
// NOTE: utextExtract's correctness hasn't been verified yet. The code remains, just incase its functionality is needed in the future.
|
|
#pragma warning(suppress : 4505) // 'utextExtract': unreferenced function with internal linkage has been removed
|
|
static int32_t U_CALLCONV utextExtract(UText* ut, int64_t nativeStart, int64_t nativeLimit, char16_t* dest, int32_t destCapacity, UErrorCode* status) noexcept
|
|
try
|
|
{
|
|
__assume(status != nullptr);
|
|
|
|
if (*status > U_ZERO_ERROR)
|
|
{
|
|
return 0;
|
|
}
|
|
if (destCapacity < 0 || (dest == nullptr && destCapacity > 0) || nativeStart > nativeLimit)
|
|
{
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
if (!utextAccess(ut, nativeStart, true))
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
nativeLimit = std::min(ut->chunkNativeLimit, nativeLimit);
|
|
|
|
if (destCapacity <= 0)
|
|
{
|
|
return gsl::narrow_cast<int32_t>(nativeLimit - nativeStart);
|
|
}
|
|
|
|
const auto& textBuffer = *static_cast<const TextBuffer*>(ut->context);
|
|
const auto y = accessCurrentRow(ut);
|
|
const auto offset = ut->chunkNativeStart - nativeStart;
|
|
const auto text = textBuffer.GetRowByOffset(y).GetText().substr(gsl::narrow_cast<size_t>(std::max<int64_t>(0, offset)));
|
|
const auto destCapacitySizeT = gsl::narrow_cast<size_t>(destCapacity);
|
|
const auto length = std::min(destCapacitySizeT, text.size());
|
|
|
|
memcpy(dest, text.data(), length * sizeof(char16_t));
|
|
|
|
if (length < destCapacitySizeT)
|
|
{
|
|
#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
|
|
dest[length] = 0;
|
|
}
|
|
|
|
return gsl::narrow_cast<int32_t>(length);
|
|
}
|
|
catch (...)
|
|
{
|
|
// The only thing that can fail is GetRowByOffset() which in turn can only fail when VirtualAlloc() fails.
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
static void U_CALLCONV utextClose(UText* ut) noexcept
|
|
{
|
|
if (const auto buffer = accessBuffer(ut))
|
|
{
|
|
buffer->Release();
|
|
}
|
|
}
|
|
|
|
static constexpr UTextFuncs utextFuncs{
|
|
.tableSize = sizeof(UTextFuncs),
|
|
.clone = utextClone,
|
|
.nativeLength = utextNativeLength,
|
|
.access = utextAccess,
|
|
.close = utextClose,
|
|
};
|
|
|
|
// Creates a UText from the given TextBuffer that spans rows [rowBeg,RowEnd).
|
|
Microsoft::Console::ICU::unique_utext Microsoft::Console::ICU::UTextFromTextBuffer(const TextBuffer& textBuffer, til::CoordType rowBeg, til::CoordType rowEnd) noexcept
|
|
{
|
|
#pragma warning(suppress : 26477) // Use 'nullptr' rather than 0 or NULL (es.47).
|
|
unique_utext ut{ UTEXT_INITIALIZER };
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
utext_setup(&ut, 0, &status);
|
|
FAIL_FAST_IF(status > U_ZERO_ERROR);
|
|
|
|
rowBeg = std::max<til::CoordType>(0, rowBeg);
|
|
rowEnd = std::min(textBuffer.GetSize().BottomExclusive(), rowEnd);
|
|
|
|
ut.providerProperties = (1 << UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE) | (1 << UTEXT_PROVIDER_STABLE_CHUNKS);
|
|
ut.pFuncs = &utextFuncs;
|
|
ut.context = &textBuffer;
|
|
accessCurrentRow(&ut) = rowBeg - 1; // the utextAccess() below will advance this by 1.
|
|
accessRowRange(&ut) = { rowBeg, rowEnd };
|
|
|
|
utextAccess(&ut, 0, true);
|
|
return ut;
|
|
}
|
|
|
|
// Returns a half-open [beg,end) range given a text start and end position.
|
|
// This function is designed to be used with uregex_start64/uregex_end64.
|
|
til::point_span Microsoft::Console::ICU::BufferRangeFromMatch(UText* ut, URegularExpression* re)
|
|
{
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
const auto nativeIndexBeg = uregex_start64(re, 0, &status);
|
|
const auto nativeIndexEnd = uregex_end64(re, 0, &status);
|
|
|
|
const auto& textBuffer = *static_cast<const TextBuffer*>(ut->context);
|
|
til::point_span ret;
|
|
|
|
if (utextAccess(ut, nativeIndexBeg, true))
|
|
{
|
|
const auto y = accessCurrentRow(ut);
|
|
ret.start.x = textBuffer.GetRowByOffset(y).GetLeadingColumnAtCharOffset(ut->chunkOffset);
|
|
ret.start.y = y;
|
|
}
|
|
else
|
|
{
|
|
ret.start.y = accessRowRange(ut).begin;
|
|
}
|
|
|
|
if (utextAccess(ut, nativeIndexEnd, true))
|
|
{
|
|
const auto y = accessCurrentRow(ut);
|
|
ret.end.x = textBuffer.GetRowByOffset(y).GetLeadingColumnAtCharOffset(ut->chunkOffset);
|
|
ret.end.y = y;
|
|
}
|
|
else
|
|
{
|
|
ret.end = ret.start;
|
|
}
|
|
|
|
return ret;
|
|
}
|