2019-05-02 15:29:04 -07:00
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
# include "precomp.h"
# include "Row.hpp"
2022-11-11 20:34:58 +01:00
2023-08-11 15:17:18 +02:00
# include <isa_availability.h>
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
2024-06-26 20:40:27 +02:00
# include "../../types/inc/CodepointWidthDetector.hpp"
2022-11-11 20:34:58 +01:00
2023-08-25 00:56:40 +02:00
// It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting
// performance (including Debug performance). Other languages are a little bit more ergonomic there than C++.
# pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).)
# pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4).
# pragma warning(disable : 26472) // Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1).
2023-06-15 16:45:35 +02:00
extern " C " int __isa_available ;
2023-08-25 00:56:40 +02:00
constexpr auto clamp ( auto value , auto lo , auto hi )
{
return value < lo ? lo : ( value > hi ? hi : value ) ;
}
2022-11-11 20:34:58 +01:00
// The STL is missing a std::iota_n analogue for std::iota, so I made my own.
template < typename OutIt , typename Diff , typename T >
constexpr OutIt iota_n ( OutIt dest , Diff count , T val )
{
for ( ; count ; - - count , + + dest , + + val )
{
* dest = val ;
}
return dest ;
}
// ROW::ReplaceCharacters needs to calculate `val + count` after
// calling iota_n() and this function achieves both things at once.
template < typename OutIt , typename Diff , typename T >
constexpr OutIt iota_n_mut ( OutIt dest , Diff count , T & val )
{
for ( ; count ; - - count , + + dest , + + val )
{
* dest = val ;
}
return dest ;
}
// Same as std::fill, but purpose-built for very small `last - first`
// where a trivial loop outperforms vectorization.
template < typename FwdIt , typename T >
constexpr FwdIt fill_small ( FwdIt first , FwdIt last , const T val )
{
for ( ; first ! = last ; + + first )
{
* first = val ;
}
return first ;
}
// Same as std::fill_n, but purpose-built for very small `count`
// where a trivial loop outperforms vectorization.
template < typename OutIt , typename Diff , typename T >
constexpr OutIt fill_n_small ( OutIt dest , Diff count , const T val )
{
for ( ; count ; - - count , + + dest )
{
* dest = val ;
}
return dest ;
}
// Same as std::copy_n, but purpose-built for very short `count`
// where a trivial loop outperforms vectorization.
template < typename InIt , typename Diff , typename OutIt >
constexpr OutIt copy_n_small ( InIt first , Diff count , OutIt dest )
{
for ( ; count ; - - count , + + dest , + + first )
{
* dest = * first ;
}
return dest ;
}
2019-05-02 15:29:04 -07:00
2024-11-15 15:50:07 -08:00
CharToColumnMapper : : CharToColumnMapper ( const wchar_t * chars , const uint16_t * charOffsets , ptrdiff_t charsLength , til : : CoordType currentColumn , til : : CoordType columnCount ) noexcept :
2023-08-25 00:56:40 +02:00
_chars { chars } ,
_charOffsets { charOffsets } ,
2024-11-15 15:50:07 -08:00
_charsLength { charsLength } ,
_currentColumn { currentColumn } ,
_columnCount { columnCount }
2023-08-25 00:56:40 +02:00
{
}
// If given a position (`offset`) inside the ROW's text, this function will return the corresponding column.
// This function in particular returns the glyph's first column.
2024-02-29 22:59:15 +01:00
til : : CoordType CharToColumnMapper : : GetLeadingColumnAt ( ptrdiff_t targetOffset ) noexcept
2023-08-25 00:56:40 +02:00
{
2024-11-15 15:50:07 -08:00
targetOffset = clamp ( targetOffset , 0 , _charsLength ) ;
2024-02-29 22:59:15 +01:00
// This code needs to fulfill two conditions on top of the obvious (a forward/backward search):
// A: We never want to stop on a column that is marked with CharOffsetsTrailer (= "GetLeadingColumn").
// B: With these parameters we always want to stop at currentOffset=4:
// _charOffsets={4, 6}
// currentOffset=4 *OR* 6
// targetOffset=5
// This is because we're being asked for a "LeadingColumn", while the caller gave us the offset of a
// trailing surrogate pair or similar. Returning the column of the leading half is the correct choice.
2023-08-25 00:56:40 +02:00
auto col = _currentColumn ;
2024-02-29 22:59:15 +01:00
auto currentOffset = _charOffsets [ col ] ;
2023-08-25 00:56:40 +02:00
2024-02-29 22:59:15 +01:00
// A plain forward-search until we find our targetOffset.
// This loop may iterate too far and thus violate our example in condition B, however...
while ( targetOffset > ( currentOffset & CharOffsetsMask ) )
2023-08-25 00:56:40 +02:00
{
2024-02-29 22:59:15 +01:00
currentOffset = _charOffsets [ + + col ] ;
2023-08-25 00:56:40 +02:00
}
2024-02-29 22:59:15 +01:00
// This backward-search is not just a counter-part to the above, but simultaneously also handles conditions A and B.
// It abuses the fact that columns marked with CharOffsetsTrailer are >0x8000 and targetOffset is always <0x8000.
// This means we skip all "trailer" columns when iterating backwards, and only stop on a non-trailer (= condition A).
// Condition B is fixed simply because we iterate backwards after the forward-search (in that exact order).
while ( targetOffset < currentOffset )
2023-08-25 00:56:40 +02:00
{
2024-02-29 22:59:15 +01:00
currentOffset = _charOffsets [ - - col ] ;
2023-08-25 00:56:40 +02:00
}
_currentColumn = col ;
return col ;
}
// If given a position (`offset`) inside the ROW's text, this function will return the corresponding column.
// This function in particular returns the glyph's last column (this matters for wide glyphs).
til : : CoordType CharToColumnMapper : : GetTrailingColumnAt ( ptrdiff_t offset ) noexcept
{
auto col = GetLeadingColumnAt ( offset ) ;
2024-11-15 15:50:07 -08:00
if ( col < _columnCount )
2023-08-25 00:56:40 +02:00
{
2024-11-15 15:50:07 -08:00
// This loop is a little redundant with the forward search loop in GetLeadingColumnAt()
// but it's realistically not worth caring about this. This code is not a bottleneck.
for ( ; WI_IsFlagSet ( _charOffsets [ col + 1 ] , CharOffsetsTrailer ) ; + + col )
{
}
2023-08-25 00:56:40 +02:00
}
return col ;
}
2023-09-26 02:28:51 +02:00
// If given a pointer inside the ROW's text buffer, this function will return the corresponding column.
// This function in particular returns the glyph's first column.
2023-08-25 00:56:40 +02:00
til : : CoordType CharToColumnMapper : : GetLeadingColumnAt ( const wchar_t * str ) noexcept
{
return GetLeadingColumnAt ( str - _chars ) ;
}
2023-09-26 02:28:51 +02:00
// If given a pointer inside the ROW's text buffer, this function will return the corresponding column.
// This function in particular returns the glyph's last column (this matters for wide glyphs).
2023-08-25 00:56:40 +02:00
til : : CoordType CharToColumnMapper : : GetTrailingColumnAt ( const wchar_t * str ) noexcept
{
return GetTrailingColumnAt ( str - _chars ) ;
}
2019-05-02 15:29:04 -07:00
// Routine Description:
// - constructor
// Arguments:
// - rowWidth - the width of the row, cell elements
// - fillAttribute - the default text attribute
// Return Value:
// - constructed object
2022-11-11 20:34:58 +01:00
ROW : : ROW ( wchar_t * charsBuffer , uint16_t * charOffsetsBuffer , uint16_t rowWidth , const TextAttribute & fillAttribute ) :
_charsBuffer { charsBuffer } ,
_chars { charsBuffer , rowWidth } ,
_charOffsets { charOffsetsBuffer , : : base : : strict_cast < size_t > ( rowWidth ) + 1u } ,
_attr { rowWidth , fillAttribute } ,
_columnCount { rowWidth }
{
2023-06-10 15:17:18 +02:00
_init ( ) ;
2022-11-11 20:34:58 +01:00
}
void ROW : : SetWrapForced ( const bool wrap ) noexcept
{
_wrapForced = wrap ;
}
bool ROW : : WasWrapForced ( ) const noexcept
2019-05-02 15:29:04 -07:00
{
2022-11-11 20:34:58 +01:00
return _wrapForced ;
}
void ROW : : SetDoubleBytePadded ( const bool doubleBytePadded ) noexcept
{
_doubleBytePadded = doubleBytePadded ;
}
bool ROW : : WasDoubleBytePadded ( ) const noexcept
{
return _doubleBytePadded ;
}
void ROW : : SetLineRendition ( const LineRendition lineRendition ) noexcept
{
_lineRendition = lineRendition ;
}
LineRendition ROW : : GetLineRendition ( ) const noexcept
{
return _lineRendition ;
2019-05-02 15:29:04 -07:00
}
2023-08-25 00:56:40 +02:00
// Returns the index 1 past the last (technically) valid column in the row.
// The interplay between the old console and newer VT APIs which support line renditions is
// still unclear so it might be necessary to add two kinds of this function in the future.
// Console APIs treat the buffer as a large NxM matrix after all.
til : : CoordType ROW : : GetReadableColumnCount ( ) const noexcept
2023-06-30 21:51:07 +02:00
{
2023-08-25 00:56:40 +02:00
if ( _lineRendition = = LineRendition : : SingleWidth ) [[likely]]
{
return _columnCount - _doubleBytePadded ;
}
return ( _columnCount - ( _doubleBytePadded < < 1 ) ) > > 1 ;
2023-06-30 21:51:07 +02:00
}
2019-05-02 15:29:04 -07:00
// Routine Description:
// - Sets all properties of the ROW to default values
// Arguments:
// - Attr - The default attribute (color) to fill
// Return Value:
// - <none>
2023-06-15 17:34:29 +02:00
void ROW : : Reset ( const TextAttribute & attr ) noexcept
2019-05-02 15:29:04 -07:00
{
2022-11-11 20:34:58 +01:00
_charsHeap . reset ( ) ;
_chars = { _charsBuffer , _columnCount } ;
2023-06-15 17:34:29 +02:00
// Constructing and then moving objects into place isn't free.
// Modifying the existing object is _much_ faster.
* _attr . runs ( ) . unsafe_shrink_to_size ( 1 ) = til : : rle_pair { attr , _columnCount } ;
2024-07-01 11:57:49 +01:00
_imageSlice = nullptr ;
Add support for double-width/double-height lines in conhost (#8664)
This PR adds support for the VT line rendition attributes, which allow
for double-width and double-height line renditions. These renditions are
enabled with the `DECDWL` (double-width line) and `DECDHL`
(double-height line) escape sequences. Both reset to the default
rendition with the `DECSWL` (single-width line) escape sequence. For now
this functionality is only supported by the GDI renderer in conhost.
There are a lot of changes, so this is just a general overview of the
main areas affected.
Previously it was safe to assume that the screen had a fixed width, at
least for a given point in time. But now we need to deal with the
possibility of different lines have different widths, so all the
functions that are constrained by the right border (text wrapping,
cursor movement operations, and sequences like `EL` and `ICH`) now need
to lookup the width of the active line in order to behave correctly.
Similarly it used to be safe to assume that buffer and screen
coordinates were the same thing, but that is no longer true. Lots of
places now need to translate back and forth between coordinate systems
dependent on the line rendition. This includes clipboard handling, the
conhost color selection and search, accessibility location tracking and
screen reading, IME editor positioning, "snapping" the viewport, and of
course all the rendering calculations.
For the rendering itself, I've had to introduce a new
`PrepareLineTransform` method that the render engines can use to setup
the necessary transform matrix for a given line rendition. This is also
now used to handle the horizontal viewport offset, since that could no
longer be achieved just by changing the target coordinates (on a double
width line, the viewport offset may be halfway through a character).
I've also had to change the renderer's existing `InvalidateCursor`
method to take a `SMALL_RECT` rather than a `COORD`, to allow for the
cursor being a variable width. Technically this was already a problem,
because the cursor could occupy two screen cells when over a
double-width character, but now it can be anything between one and four
screen cells (e.g. a double-width character on the double-width line).
In terms of architectural changes, there is now a new `lineRendition`
field in the `ROW` class that keeps track of the line rendition for each
row, and several new methods in the `ROW` and `TextBuffer` classes for
manipulating that state. This includes a few helper methods for handling
the various issues discussed above, e.g. position clamping and
translating between coordinate systems.
## Validation Steps Performed
I've manually confirmed all the double-width and double-height tests in
_Vttest_ are now working as expected, and the _VT100 Torture Test_ now
renders correctly (at least the line rendition aspects). I've also got
my own test scripts that check many of the line rendition boundary cases
and have confirmed that those are now passing.
I've manually tested as many areas of the conhost UI that I could think
of, that might be affected by line rendition, including things like
searching, selection, copying, and color highlighting. For
accessibility, I've confirmed that the _Magnifier_ and _Narrator_
correctly handle double-width lines. And I've also tested the Japanese
IME, which while not perfect, is at least useable.
Closes #7865
2021-02-18 05:44:50 +00:00
_lineRendition = LineRendition : : SingleWidth ;
2021-01-20 13:16:56 -08:00
_wrapForced = false ;
_doubleBytePadded = false ;
Rewrite how marks are stored & add reflow (#16937)
This is pretty much a huge refactoring of how marks are stored in the
buffer.
Gone is the list of `ScrollMark`s in the buffer that store regions of
text as points marking the ends. Those would be nigh impossible to
reflow nicely.
Instead, we're going to use `TextAttribute`s to store the kind of output
we've got - `Prompt`, `Command`, `Output`, or, the default, `None`.
Those already reflow nicely!
But we also need to store things like, the exit code for the command.
That's why we've now added `ScrollbarData` to `ROW`s. There's really
only going to be one prompt->output on a single row. So, we only need to
store one ScrollbarData per-row. When a command ends, we can just go
update the mark on the row that started that command.
But iterating over the whole buffer to find the next/previous
prompt/command/output region sounds complicated. So, to avoid everyone
needing to do some variant of that, we've added `MarkExtents` (which is
literally just the same mark structure as before). TextBuffer can figure
out where all the mark regions are, and hand that back to callers. This
allows ControlCore to be basically unchanged.
_But collecting up all the regions for all the marks sounds expensive!
We need to update the scrollbar frequently, we can't just collect those
up every time!_ No we can't! But we also don't need to. The scrollbar
doesn't need to know where all the marks start and end and if they have
commands and this and that - no. We only need to know the rows that have
marks on them. So, we've now also got `ScrollMark` to represent just a
mark on a scrollbar at a specific row on the buffer. We can get those
quickly.
* [x] I added a bunch of tests for this.
* [x] I played with it and it feels good, even after a reflow (finally)
* See:
* #11000
* #15057 (I'm not marking this as closed. The stacked PR will close
this, when I move marks to Stable)
2024-04-05 13:16:10 -07:00
_promptData = std : : nullopt ;
2022-11-11 20:34:58 +01:00
_init ( ) ;
}
void ROW : : _init ( ) noexcept
{
2023-06-15 16:45:35 +02:00
# pragma warning(push)
# pragma warning(disable : 26462) // The value pointed to by '...' is assigned only once, mark it as a pointer to const (con.4).
# pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
# pragma warning(disable : 26490) // Don't use reinterpret_cast (type.1).
// Fills _charsBuffer with whitespace and correspondingly _charOffsets
// with successive numbers from 0 to _columnCount+1.
# if defined(TIL_SSE_INTRINSICS)
alignas ( __m256i ) static constexpr uint16_t whitespaceData [ ] { 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 , 0x20 } ;
alignas ( __m256i ) static constexpr uint16_t offsetsData [ ] { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 } ;
alignas ( __m256i ) static constexpr uint16_t increment16Data [ ] { 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 , 16 } ;
alignas ( __m128i ) static constexpr uint16_t increment8Data [ ] { 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 } ;
// The AVX loop operates on 32 bytes at a minimum. Since _charsBuffer/_charOffsets uses 2 byte large
// wchar_t/uint16_t respectively, this translates to 16-element writes, which equals a _columnCount of 15,
// because it doesn't include the past-the-end char-offset as described in the _charOffsets member comment.
if ( __isa_available > = __ISA_AVAILABLE_AVX2 & & _columnCount > = 15 )
{
auto chars = _charsBuffer ;
auto charOffsets = _charOffsets . data ( ) ;
// The backing buffer for both chars and charOffsets is guaranteed to be 16-byte aligned,
// but AVX operations are 32-byte large. As such, when we write out the last chunk, we
// have to align it to the ends of the 2 buffers. This results in a potential overlap of
// 16 bytes between the last write in the main loop below and the final write afterwards.
//
// An example:
// If you have a terminal between 16 and 23 columns the buffer has a size of 48 bytes.
// The main loop below will iterate once, as it writes out bytes 0-31 and then exits.
// The final write afterwards cannot write bytes 32-63 because that would write
// out of bounds. Instead it writes bytes 16-47, overwriting 16 overlapping bytes.
// This is better than branching and switching to SSE2, because both things are slow.
//
// Since we want to exit the main loop with at least 1 write left to do as the final write,
// we need to subtract 1 alignment from the buffer length (= 16 bytes). Since _columnCount is
// in wchar_t's we subtract -8. The same applies to the ~7 here vs ~15. If you squint slightly
// you'll see how this is effectively the inverse of what CalculateCharsBufferStride does.
const auto tailColumnOffset = gsl : : narrow_cast < uint16_t > ( ( _columnCount - 8u ) & ~ 7 ) ;
const auto charsEndLoop = chars + tailColumnOffset ;
const auto charOffsetsEndLoop = charOffsets + tailColumnOffset ;
const auto whitespace = _mm256_load_si256 ( reinterpret_cast < const __m256i * > ( & whitespaceData [ 0 ] ) ) ;
auto offsetsLoop = _mm256_load_si256 ( reinterpret_cast < const __m256i * > ( & offsetsData [ 0 ] ) ) ;
const auto offsets = _mm256_add_epi16 ( offsetsLoop , _mm256_set1_epi16 ( tailColumnOffset ) ) ;
if ( chars < charsEndLoop )
{
const auto increment = _mm256_load_si256 ( reinterpret_cast < const __m256i * > ( & increment16Data [ 0 ] ) ) ;
do
{
_mm256_storeu_si256 ( reinterpret_cast < __m256i * > ( chars ) , whitespace ) ;
_mm256_storeu_si256 ( reinterpret_cast < __m256i * > ( charOffsets ) , offsetsLoop ) ;
offsetsLoop = _mm256_add_epi16 ( offsetsLoop , increment ) ;
chars + = 16 ;
charOffsets + = 16 ;
} while ( chars < charsEndLoop ) ;
}
_mm256_storeu_si256 ( reinterpret_cast < __m256i * > ( charsEndLoop ) , whitespace ) ;
_mm256_storeu_si256 ( reinterpret_cast < __m256i * > ( charOffsetsEndLoop ) , offsets ) ;
}
else
{
auto chars = _charsBuffer ;
auto charOffsets = _charOffsets . data ( ) ;
const auto charsEnd = chars + _columnCount ;
const auto whitespace = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( & whitespaceData [ 0 ] ) ) ;
const auto increment = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( & increment8Data [ 0 ] ) ) ;
auto offsets = _mm_load_si128 ( reinterpret_cast < const __m128i * > ( & offsetsData [ 0 ] ) ) ;
do
{
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( chars ) , whitespace ) ;
_mm_storeu_si128 ( reinterpret_cast < __m128i * > ( charOffsets ) , offsets ) ;
offsets = _mm_add_epi16 ( offsets , increment ) ;
chars + = 8 ;
charOffsets + = 8 ;
// If _columnCount is something like 120, the actual backing buffer for charOffsets is 121 items large.
// --> The while loop uses <= to emit at least 1 more write.
} while ( chars < = charsEnd ) ;
}
# elif defined(TIL_ARM_NEON_INTRINSICS)
alignas ( uint16x8_t ) static constexpr uint16_t offsetsData [ ] { 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 } ;
auto chars = _charsBuffer ;
auto charOffsets = _charOffsets . data ( ) ;
const auto charsEnd = chars + _columnCount ;
const auto whitespace = vdupq_n_u16 ( L ' ' ) ;
const auto increment = vdupq_n_u16 ( 8 ) ;
auto offsets = vld1q_u16 ( & offsetsData [ 0 ] ) ;
do
{
vst1q_u16 ( chars , whitespace ) ;
vst1q_u16 ( charOffsets , offsets ) ;
offsets = vaddq_u16 ( offsets , increment ) ;
chars + = 8 ;
charOffsets + = 8 ;
// If _columnCount is something like 120, the actual backing buffer for charOffsets is 121 items large.
// --> The while loop uses <= to emit at least 1 more write.
} while ( chars < = charsEnd ) ;
# else
# error "Vectorizing this function improves overall performance by up to 40%. Don't remove this warning, just add the vectorized code."
std : : fill_n ( _charsBuffer , _columnCount , UNICODE_SPACE ) ;
2022-11-11 20:34:58 +01:00
std : : iota ( _charOffsets . begin ( ) , _charOffsets . end ( ) , uint16_t { 0 } ) ;
2023-06-15 16:45:35 +02:00
# endif
# pragma warning(push)
2019-05-02 15:29:04 -07:00
}
2023-06-10 15:17:18 +02:00
void ROW : : CopyFrom ( const ROW & source )
{
_lineRendition = source . _lineRendition ;
_wrapForced = source . _wrapForced ;
2023-09-26 02:28:51 +02:00
RowCopyTextFromState state {
. source = source ,
. sourceColumnLimit = source . GetReadableColumnCount ( ) ,
} ;
CopyTextFrom ( state ) ;
2024-04-10 17:12:40 +02:00
_attr = source . Attributes ( ) ;
_attr . resize_trailing_extent ( _columnCount ) ;
2023-06-10 15:17:18 +02:00
}
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
// Returns the previous possible cursor position, preceding the given column.
// Returns 0 if column is less than or equal to 0.
til : : CoordType ROW : : NavigateToPrevious ( til : : CoordType column ) const noexcept
{
return _adjustBackward ( _clampedColumn ( column - 1 ) ) ;
}
// Returns the next possible cursor position, following the given column.
// Returns the row width if column is beyond the width of the row.
til : : CoordType ROW : : NavigateToNext ( til : : CoordType column ) const noexcept
{
2023-09-26 02:28:51 +02:00
return _adjustForward ( _clampedColumnInclusive ( column + 1 ) ) ;
}
// Returns the starting column of the glyph at the given column.
// In other words, if you have 3 wide glyphs
// AA BB CC
// 01 23 45 <-- column
// then `AdjustToGlyphStart(3)` returns 2.
til : : CoordType ROW : : AdjustToGlyphStart ( til : : CoordType column ) const noexcept
{
return _adjustBackward ( _clampedColumn ( column ) ) ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
}
2024-01-30 03:50:33 +05:30
// Returns the (exclusive) ending column of the glyph at the given column.
// In other words, if you have 3 wide glyphs
// AA BB CC
// 01 23 45 <-- column
// Examples:
// - `AdjustToGlyphEnd(4)` returns 6.
// - `AdjustToGlyphEnd(3)` returns 4.
til : : CoordType ROW : : AdjustToGlyphEnd ( til : : CoordType column ) const noexcept
{
return _adjustForward ( _clampedColumnInclusive ( column ) ) ;
}
2019-05-02 15:29:04 -07:00
// Routine Description:
// - clears char data in column in row
// Arguments:
// - column - 0-indexed column index
// Return Value:
// - <none>
2022-11-11 20:34:58 +01:00
void ROW : : ClearCell ( const til : : CoordType column )
2019-05-02 15:29:04 -07:00
{
2022-11-11 20:34:58 +01:00
static constexpr std : : wstring_view space { L " " } ;
ReplaceCharacters ( column , 1 , space ) ;
2019-05-02 15:29:04 -07:00
}
// Routine Description:
// - writes cell data to the row
// Arguments:
// - it - custom console iterator to use for seeking input data. bool() false when it becomes invalid while seeking.
// - index - column in row to start writing at
make filling chars (and, thus, erase line/char) unset wrap (#2831)
EraseInLine calls `FillConsoleOutputCharacterW()`. In filling the row with
chars, we were setting the wrap flag. We need to specifically not do this on
ANY _FILL_ operation. Now a fill operation UNSETS the wrap flag if we fill to
the end of the line.
Originally, we had a boolean `setWrap` that would mean...
- **true**: if writing to the end of the row, SET the wrap value to true
- **false**: if writing to the end of the row, DON'T CHANGE the wrap value
Now we're making this bool a std::optional to allow for a ternary state. This
allows for us to handle the following cases completely. Refer to the table
below:
,- current wrap value
| ,- are we filling the last cell in the row?
| | ,- new wrap value
| | | ,- comments
|-- |-- |-- |
| 0 | 0 | 0 |
| 0 | 1 | 0 |
| 0 | 1 | 1 | THIS CASE WAS HANDLED CORRECTLY
| 1 | 0 | 0 | THIS CASE WAS UNHANDLED
| 1 | 0 | 1 |
| 1 | 1 | 1 |
To handle that special case (1-0-0), we need to UNSET the wrap. So now, we have
~setWrap~ `wrap` mean the following:
- **true**: if writing to the end of the row, SET the wrap value to TRUE
- **false**: if writing to the end of the row, SET the wrap value to FALSE
- **nullopt**: leave the wrap value as it is
Closes #1126
2019-09-30 18:16:31 -07:00
// - wrap - change the wrap flag if we hit the end of the row while writing and there's still more data in the iterator.
2019-05-02 15:29:04 -07:00
// - limitRight - right inclusive column ID for the last write in this row. (optional, will just write to the end of row if nullopt)
// Return Value:
// - iterator to first cell that was not written to this row.
2022-11-11 20:34:58 +01:00
OutputCellIterator ROW : : WriteCells ( OutputCellIterator it , const til : : CoordType columnBegin , const std : : optional < bool > wrap , std : : optional < til : : CoordType > limitRight )
2019-05-02 15:29:04 -07:00
{
2022-11-11 20:34:58 +01:00
THROW_HR_IF ( E_INVALIDARG , columnBegin > = size ( ) ) ;
THROW_HR_IF ( E_INVALIDARG , limitRight . value_or ( 0 ) > = size ( ) ) ;
2019-05-02 15:29:04 -07:00
// If we're given a right-side column limit, use it. Otherwise, the write limit is the final column index available in the char row.
2022-11-11 20:34:58 +01:00
const auto finalColumnInRow = limitRight . value_or ( size ( ) - 1 ) ;
2019-05-02 15:29:04 -07:00
2021-05-20 19:27:50 +02:00
auto currentColor = it - > TextAttr ( ) ;
uint16_t colorUses = 0 ;
2022-11-11 20:34:58 +01:00
auto colorStarts = gsl : : narrow_cast < uint16_t > ( columnBegin ) ;
2022-04-25 17:40:47 +02:00
auto currentIndex = colorStarts ;
2019-05-02 15:29:04 -07:00
2021-05-20 19:27:50 +02:00
while ( it & & currentIndex < = finalColumnInRow )
{
// Fill the color if the behavior isn't set to keeping the current color.
if ( it - > TextAttrBehavior ( ) ! = TextAttributeBehavior : : Current )
2019-05-02 15:29:04 -07:00
{
2021-05-20 19:27:50 +02:00
// If the color of this cell is the same as the run we're currently on,
// just increment the counter.
if ( currentColor = = it - > TextAttr ( ) )
{
+ + colorUses ;
}
else
2019-05-02 15:29:04 -07:00
{
2021-05-20 19:27:50 +02:00
// Otherwise, commit this color into the run and save off the new one.
// Now commit the new color runs into the attr row.
2022-11-11 20:34:58 +01:00
_attr . replace ( colorStarts , currentIndex , currentColor ) ;
2021-05-20 19:27:50 +02:00
currentColor = it - > TextAttr ( ) ;
colorUses = 1 ;
colorStarts = currentIndex ;
2019-05-02 15:29:04 -07:00
}
2021-05-20 19:27:50 +02:00
}
// Fill the text if the behavior isn't set to saying there's only a color stored in this iterator.
if ( it - > TextAttrBehavior ( ) ! = TextAttributeBehavior : : StoredOnly )
{
2022-11-11 20:34:58 +01:00
const auto fillingFirstColumn = currentIndex = = 0 ;
2022-04-25 17:40:47 +02:00
const auto fillingLastColumn = currentIndex = = finalColumnInRow ;
2022-11-11 20:34:58 +01:00
const auto attr = it - > DbcsAttr ( ) ;
const auto & chars = it - > Chars ( ) ;
2021-05-20 19:27:50 +02:00
2022-11-11 20:34:58 +01:00
switch ( attr )
2019-05-02 15:29:04 -07:00
{
2022-11-11 20:34:58 +01:00
case DbcsAttribute : : Leading :
if ( fillingLastColumn )
{
// The wide char doesn't fit. Pad with whitespace.
// Don't increment the iterator. Instead we'll return from this function and the
// caller can call WriteCells() again on the next row with the same iterator position.
ClearCell ( currentIndex ) ;
SetDoubleBytePadded ( true ) ;
}
else
{
ReplaceCharacters ( currentIndex , 2 , chars ) ;
+ + it ;
}
break ;
case DbcsAttribute : : Trailing :
if ( fillingFirstColumn )
{
// The wide char doesn't fit. Pad with whitespace.
// Ignore the character. There's no correct alternative way to handle this situation.
ClearCell ( currentIndex ) ;
}
2023-02-16 00:40:24 +01:00
else if ( it . Position ( ) = = 0 )
2022-11-11 20:34:58 +01:00
{
2023-02-16 00:40:24 +01:00
// A common way to back up and restore the buffer is via `ReadConsoleOutputW` and
// `WriteConsoleOutputW` respectively. But the area might bisect/intersect/clip wide characters and
// only backup either their leading or trailing half. In general, in the rest of conhost, we're
// throwing away the trailing half of all `CHAR_INFO`s (during text rendering, as well as during
// `ReadConsoleOutputW`), so to make this code behave the same and prevent surprises, we need to
// make sure to only look at the trailer if it's the first `CHAR_INFO` the user is trying to write.
2022-11-11 20:34:58 +01:00
ReplaceCharacters ( currentIndex - 1 , 2 , chars ) ;
}
+ + it ;
break ;
default :
ReplaceCharacters ( currentIndex , 1 , chars ) ;
2019-05-02 15:29:04 -07:00
+ + it ;
2022-11-11 20:34:58 +01:00
break ;
2019-05-02 15:29:04 -07:00
}
2021-05-20 19:27:50 +02:00
// If we're asked to (un)set the wrap status and we just filled the last column with some text...
// NOTE:
// - wrap = std::nullopt --> don't change the wrap value
// - wrap = true --> we're filling cells as a steam, consider this a wrap
// - wrap = false --> we're filling cells as a block, unwrap
if ( wrap . has_value ( ) & & fillingLastColumn )
{
// set wrap status on the row to parameter's value.
SetWrapForced ( * wrap ) ;
}
2019-05-02 15:29:04 -07:00
}
2021-05-20 19:27:50 +02:00
else
2019-05-02 15:29:04 -07:00
{
2021-05-20 19:27:50 +02:00
+ + it ;
2019-05-02 15:29:04 -07:00
}
2021-05-20 19:27:50 +02:00
// Move to the next cell for the next time through the loop.
+ + currentIndex ;
}
// Now commit the final color into the attr row
if ( colorUses )
{
2022-11-11 20:34:58 +01:00
_attr . replace ( colorStarts , currentIndex , currentColor ) ;
2019-05-02 15:29:04 -07:00
}
return it ;
}
2022-11-11 20:34:58 +01:00
2023-06-23 01:24:10 +02:00
void ROW : : SetAttrToEnd ( const til : : CoordType columnBegin , const TextAttribute attr )
2022-11-11 20:34:58 +01:00
{
_attr . replace ( _clampedColumnInclusive ( columnBegin ) , _attr . size ( ) , attr ) ;
}
void ROW : : ReplaceAttributes ( const til : : CoordType beginIndex , const til : : CoordType endIndex , const TextAttribute & newAttr )
{
_attr . replace ( _clampedColumnInclusive ( beginIndex ) , _clampedColumnInclusive ( endIndex ) , newAttr ) ;
}
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
[[msvc::forceinline]] ROW : : WriteHelper : : WriteHelper ( ROW & row , til : : CoordType columnBegin , til : : CoordType columnLimit , const std : : wstring_view & chars ) noexcept :
row { row } ,
chars { chars }
{
colBeg = row . _clampedColumnInclusive ( columnBegin ) ;
colLimit = row . _clampedColumnInclusive ( columnLimit ) ;
chBegDirty = row . _uncheckedCharOffset ( colBeg ) ;
colBegDirty = row . _adjustBackward ( colBeg ) ;
leadingSpaces = colBeg - colBegDirty ;
chBeg = chBegDirty + leadingSpaces ;
colEnd = colBeg ;
colEndDirty = 0 ;
charsConsumed = 0 ;
}
[[msvc::forceinline]] bool ROW : : WriteHelper : : IsValid ( ) const noexcept
{
return colBeg < colLimit & & ! chars . empty ( ) ;
}
2022-11-11 20:34:58 +01:00
void ROW : : ReplaceCharacters ( til : : CoordType columnBegin , til : : CoordType width , const std : : wstring_view & chars )
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
try
2022-11-11 20:34:58 +01:00
{
2024-06-26 20:40:27 +02:00
assert ( width > = 1 & & width < = 2 ) ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
WriteHelper h { * this , columnBegin , _columnCount , chars } ;
if ( ! h . IsValid ( ) )
{
return ;
}
h . ReplaceCharacters ( width ) ;
h . Finish ( ) ;
}
catch ( . . . )
{
// Due to this function writing _charOffsets first, then calling _resizeChars (which may throw) and only then finally
// filling in _chars, we might end up in a situation were _charOffsets contains offsets outside of the _chars array.
// --> Restore this row to a known "okay"-state.
Reset ( TextAttribute { } ) ;
throw ;
}
2022-11-11 20:34:58 +01:00
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
[[msvc::forceinline]] void ROW : : WriteHelper : : ReplaceCharacters ( til : : CoordType width ) noexcept
{
const auto colEndNew = gsl : : narrow_cast < uint16_t > ( colEnd + width ) ;
if ( colEndNew > colLimit )
2022-11-11 20:34:58 +01:00
{
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
colEndDirty = colLimit ;
}
else
{
til : : at ( row . _charOffsets , colEnd + + ) = chBeg ;
for ( ; colEnd < colEndNew ; + + colEnd )
{
til : : at ( row . _charOffsets , colEnd ) = gsl : : narrow_cast < uint16_t > ( chBeg | CharOffsetsTrailer ) ;
}
colEndDirty = colEnd ;
charsConsumed = chars . size ( ) ;
}
}
void ROW : : ReplaceText ( RowWriteState & state )
try
{
WriteHelper h { * this , state . columnBegin , state . columnLimit , state . text } ;
if ( ! h . IsValid ( ) )
{
state . columnEnd = h . colBeg ;
state . columnBeginDirty = h . colBeg ;
state . columnEndDirty = h . colBeg ;
2022-11-11 20:34:58 +01:00
return ;
}
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
h . ReplaceText ( ) ;
h . Finish ( ) ;
2022-11-11 20:34:58 +01:00
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
state . text = state . text . substr ( h . charsConsumed ) ;
// Here's why we set `state.columnEnd` to `colLimit` if there's remaining text:
// Callers should be able to use `state.columnEnd` as the next cursor position, as well as the parameter for a
// follow-up call to ReplaceAttributes(). But if we fail to insert a wide glyph into the last column of a row,
// that last cell (which now contains padding whitespace) should get the same attributes as the rest of the
// string so that the row looks consistent. This requires us to return `colLimit` instead of `colLimit - 1`.
// Additionally, this has the benefit that callers can detect line wrapping by checking `columnEnd >= columnLimit`.
state . columnEnd = state . text . empty ( ) ? h . colEnd : h . colLimit ;
state . columnBeginDirty = h . colBegDirty ;
state . columnEndDirty = h . colEndDirty ;
}
catch ( . . . )
{
Reset ( TextAttribute { } ) ;
throw ;
}
2022-11-11 20:34:58 +01:00
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
[[msvc::forceinline]] void ROW : : WriteHelper : : ReplaceText ( ) noexcept
{
2023-07-05 21:26:15 +02:00
// This function starts with a fast-pass for ASCII. ASCII is still predominant in technical areas.
//
// We can infer the "end" from the amount of columns we're given (colLimit - colBeg),
// because ASCII is always 1 column wide per character.
auto it = chars . begin ( ) ;
const auto end = it + std : : min < size_t > ( chars . size ( ) , colLimit - colBeg ) ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
size_t ch = chBeg ;
2022-11-11 20:34:58 +01:00
2023-07-05 21:26:15 +02:00
while ( it ! = end )
2022-11-11 20:34:58 +01:00
{
2023-07-05 21:26:15 +02:00
if ( * it > = 0x80 ) [[unlikely]]
{
_replaceTextUnicode ( ch , it ) ;
return ;
}
til : : at ( row . _charOffsets , colEnd ) = gsl : : narrow_cast < uint16_t > ( ch ) ;
+ + colEnd ;
+ + ch ;
+ + it ;
}
colEndDirty = colEnd ;
charsConsumed = ch - chBeg ;
}
[[msvc::forceinline]] void ROW : : WriteHelper : : _replaceTextUnicode ( size_t ch , std : : wstring_view : : const_iterator it ) noexcept
{
2024-06-26 20:40:27 +02:00
auto & cwd = CodepointWidthDetector : : Singleton ( ) ;
2023-07-05 21:26:15 +02:00
2024-06-26 20:40:27 +02:00
// Check if the new text joins with the existing contents of the row to form a single grapheme cluster.
if ( it = = chars . begin ( ) )
2023-07-05 21:26:15 +02:00
{
2024-06-26 20:40:27 +02:00
auto colPrev = colBeg ;
while ( colPrev > 0 & & row . _uncheckedIsTrailer ( - - colPrev ) )
{
}
2023-07-05 21:26:15 +02:00
2024-06-26 20:40:27 +02:00
const auto chPrev = row . _uncheckedCharOffset ( colPrev ) ;
const std : : wstring_view charsPrev { row . _chars . data ( ) + chPrev , ch - chPrev } ;
2023-07-05 21:26:15 +02:00
2024-06-26 20:40:27 +02:00
GraphemeState state ;
cwd . GraphemeNext ( state , charsPrev ) ;
cwd . GraphemeNext ( state , chars ) ;
if ( state . len > 0 )
2023-07-05 21:26:15 +02:00
{
2024-06-26 20:40:27 +02:00
colBegDirty = colPrev ;
colEnd = colPrev ;
const auto width = std : : max ( 1 , state . width ) ;
const auto colEndNew = gsl : : narrow_cast < uint16_t > ( colEnd + width ) ;
if ( colEndNew > colLimit )
2023-07-05 21:26:15 +02:00
{
2024-06-26 20:40:27 +02:00
colEndDirty = colLimit ;
charsConsumed = ch - chBeg ;
return ;
2023-07-05 21:26:15 +02:00
}
2024-06-26 20:40:27 +02:00
// Fill our char-offset buffer with 1 entry containing the mapping from the
// current column (colEnd) to the start of the glyph in the string (ch)...
til : : at ( row . _charOffsets , colEnd + + ) = gsl : : narrow_cast < uint16_t > ( chPrev ) ;
// ...followed by 0-N entries containing an indication that the
// columns are just a wide-glyph extension of the preceding one.
while ( colEnd < colEndNew )
{
til : : at ( row . _charOffsets , colEnd + + ) = gsl : : narrow_cast < uint16_t > ( chPrev | CharOffsetsTrailer ) ;
}
2023-07-05 21:26:15 +02:00
2024-06-26 20:40:27 +02:00
ch + = state . len ;
it + = state . len ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
}
2024-06-26 20:40:27 +02:00
}
else
{
// The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â".
// In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character
// and let MeasureNext() find the next proper grapheme boundary.
- - colEnd ;
- - ch ;
- - it ;
}
if ( const auto end = chars . end ( ) ; it ! = end )
{
GraphemeState state { . beg = & * it } ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
2024-06-26 20:40:27 +02:00
do
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
{
2024-06-26 20:40:27 +02:00
cwd . GraphemeNext ( state , chars ) ;
const auto width = std : : max ( 1 , state . width ) ;
const auto colEndNew = gsl : : narrow_cast < uint16_t > ( colEnd + width ) ;
if ( colEndNew > colLimit )
{
colEndDirty = colLimit ;
charsConsumed = ch - chBeg ;
return ;
}
// Fill our char-offset buffer with 1 entry containing the mapping from the
// current column (colEnd) to the start of the glyph in the string (ch)...
til : : at ( row . _charOffsets , colEnd + + ) = gsl : : narrow_cast < uint16_t > ( ch ) ;
// ...followed by 0-N entries containing an indication that the
// columns are just a wide-glyph extension of the preceding one.
while ( colEnd < colEndNew )
{
til : : at ( row . _charOffsets , colEnd + + ) = gsl : : narrow_cast < uint16_t > ( ch | CharOffsetsTrailer ) ;
}
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
2024-06-26 20:40:27 +02:00
ch + = state . len ;
it + = state . len ;
} while ( it ! = end ) ;
2022-11-11 20:34:58 +01:00
}
2023-07-05 21:26:15 +02:00
colEndDirty = colEnd ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
charsConsumed = ch - chBeg ;
}
2023-06-14 21:34:42 +02:00
void ROW : : CopyTextFrom ( RowCopyTextFromState & state )
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
try
{
2023-06-14 21:34:42 +02:00
auto & source = state . source ;
const auto sourceColBeg = source . _clampedColumnInclusive ( state . sourceColumnBegin ) ;
const auto sourceColLimit = source . _clampedColumnInclusive ( state . sourceColumnLimit ) ;
std : : span < const uint16_t > charOffsets ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
std : : wstring_view chars ;
2023-06-14 21:34:42 +02:00
if ( sourceColBeg < sourceColLimit )
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
{
2023-06-14 21:34:42 +02:00
charOffsets = source . _charOffsets . subspan ( sourceColBeg , static_cast < size_t > ( sourceColLimit ) - sourceColBeg + 1 ) ;
2023-09-26 02:28:51 +02:00
const auto beg = size_t { charOffsets . front ( ) } & CharOffsetsMask ;
const auto end = size_t { charOffsets . back ( ) } & CharOffsetsMask ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
// We _are_ using span. But C++ decided that string_view and span aren't convertible.
// _chars is a std::span for performance and because it refers to raw, shared memory.
# pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
2023-09-26 02:28:51 +02:00
chars = { source . _chars . data ( ) + beg , end - beg } ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
}
2023-06-14 21:34:42 +02:00
WriteHelper h { * this , state . columnBegin , state . columnLimit , chars } ;
if ( ! h . IsValid ( ) | |
// If we were to copy text from ourselves, we'd overwrite
// our _charOffsets and break Finish() which reads from it.
this = = & state . source | |
// Any valid charOffsets array is at least 2 elements long (the 1st element is the start offset and the 2nd
// element is the length of the first glyph) and begins/ends with a non-trailer offset. We don't really
// need to test for the end offset, since `WriteHelper::WriteWithOffsets` already takes care of that.
charOffsets . size ( ) < 2 | | WI_IsFlagSet ( charOffsets . front ( ) , CharOffsetsTrailer ) )
2022-11-11 20:34:58 +01:00
{
2023-06-14 21:34:42 +02:00
state . columnEnd = h . colBeg ;
state . columnBeginDirty = h . colBeg ;
state . columnEndDirty = h . colBeg ;
state . sourceColumnEnd = source . _columnCount ;
return ;
2022-11-11 20:34:58 +01:00
}
2023-06-14 21:34:42 +02:00
2023-06-10 15:17:18 +02:00
h . CopyTextFrom ( charOffsets ) ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
h . Finish ( ) ;
2022-11-11 20:34:58 +01:00
2023-06-14 21:34:42 +02:00
// state.columnEnd is computed identical to ROW::ReplaceText. Check it out for more information.
state . columnEnd = h . charsConsumed = = chars . size ( ) ? h . colEnd : h . colLimit ;
state . columnBeginDirty = h . colBegDirty ;
state . columnEndDirty = h . colEndDirty ;
state . sourceColumnEnd = sourceColBeg + h . colEnd - h . colBeg ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
}
catch ( . . . )
{
Reset ( TextAttribute { } ) ;
throw ;
}
2023-06-10 15:17:18 +02:00
[[msvc::forceinline]] void ROW : : WriteHelper : : CopyTextFrom ( const std : : span < const uint16_t > & charOffsets ) noexcept
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
{
// Since our `charOffsets` input is already in columns (just like the `ROW::_charOffsets`),
// we can directly look up the end char-offset, but...
const auto colEndDirtyInput = std : : min ( gsl : : narrow_cast < uint16_t > ( colLimit - colBeg ) , gsl : : narrow < uint16_t > ( charOffsets . size ( ) - 1 ) ) ;
2022-11-11 20:34:58 +01:00
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
// ...since the colLimit might intersect with a wide glyph in `charOffset`, we need to adjust our input-colEnd.
auto colEndInput = colEndDirtyInput ;
for ( ; WI_IsFlagSet ( til : : at ( charOffsets , colEndInput ) , CharOffsetsTrailer ) ; - - colEndInput )
2022-11-11 20:34:58 +01:00
{
}
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
const auto baseOffset = til : : at ( charOffsets , 0 ) ;
const auto endOffset = til : : at ( charOffsets , colEndInput ) ;
const auto inToOutOffset = gsl : : narrow_cast < uint16_t > ( chBeg - baseOffset ) ;
2023-06-23 00:17:46 +02:00
# pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
const auto dst = row . _charOffsets . data ( ) + colEnd ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
2023-06-23 00:17:46 +02:00
_copyOffsets ( dst , charOffsets . data ( ) , colEndInput , inToOutOffset ) ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
2023-06-23 00:17:46 +02:00
colEnd + = colEndInput ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
colEndDirty = gsl : : narrow_cast < uint16_t > ( colBeg + colEndDirtyInput ) ;
charsConsumed = endOffset - baseOffset ;
}
2023-06-23 00:17:46 +02:00
# pragma warning(push)
# pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
[[msvc::forceinline]] void ROW : : WriteHelper : : _copyOffsets ( uint16_t * __restrict dst , const uint16_t * __restrict src , uint16_t size , uint16_t offset ) noexcept
{
__assume ( src ! = nullptr ) ;
__assume ( dst ! = nullptr ) ;
// All tested compilers (including MSVC) will neatly unroll and vectorize
// this loop, which is why it's written in this particular way.
for ( const auto end = src + size ; src ! = end ; + + src , + + dst )
{
const uint16_t ch = * src ;
const uint16_t off = ch & CharOffsetsMask ;
const uint16_t trailer = ch & CharOffsetsTrailer ;
const uint16_t newOff = off + offset ;
* dst = newOff | trailer ;
}
}
# pragma warning(pop)
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
[[msvc::forceinline]] void ROW : : WriteHelper : : Finish ( )
{
colEndDirty = row . _adjustForward ( colEndDirty ) ;
const uint16_t trailingSpaces = colEndDirty - colEnd ;
const auto chEndDirtyOld = row . _uncheckedCharOffset ( colEndDirty ) ;
const auto chEndDirty = chBegDirty + charsConsumed + leadingSpaces + trailingSpaces ;
if ( chEndDirty ! = chEndDirtyOld )
2022-11-11 20:34:58 +01:00
{
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
row . _resizeChars ( colEndDirty , chBegDirty , chEndDirty , chEndDirtyOld ) ;
}
2022-11-11 20:34:58 +01:00
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
{
// std::copy_n compiles to memmove. We can do better. It also gets rid of an extra branch,
// because std::copy_n avoids calling memmove if the count is 0. It's never 0 for us.
const auto itBeg = row . _chars . begin ( ) + chBeg ;
memcpy ( & * itBeg , chars . data ( ) , charsConsumed * sizeof ( wchar_t ) ) ;
2022-11-11 20:34:58 +01:00
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
if ( leadingSpaces )
{
fill_n_small ( row . _chars . begin ( ) + chBegDirty , leadingSpaces , L ' ' ) ;
iota_n ( row . _charOffsets . begin ( ) + colBegDirty , leadingSpaces , chBegDirty ) ;
}
if ( trailingSpaces )
{
fill_n_small ( itBeg + charsConsumed , trailingSpaces , L ' ' ) ;
iota_n ( row . _charOffsets . begin ( ) + colEnd , trailingSpaces , gsl : : narrow_cast < uint16_t > ( chBeg + charsConsumed ) ) ;
}
}
2022-11-11 20:34:58 +01:00
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
// This updates `_doubleBytePadded` whenever we write the last column in the row. `_doubleBytePadded` tells our text
// reflow algorithm whether it should ignore the last column. This is important when writing wide characters into
// the terminal: If the last wide character in a row only fits partially, we should render whitespace, but
// during text reflow pretend as if no whitespace exists. After all, the user didn't write any whitespace there.
//
// The way this is written, it'll set `_doubleBytePadded` to `true` no matter whether a wide character didn't fit,
// or if the last 2 columns contain a wide character and a narrow character got written into the left half of it.
// In both cases `trailingSpaces` is 1 and fills the last column and `_doubleBytePadded` will be `true`.
if ( colEndDirty = = row . _columnCount )
{
row . SetDoubleBytePadded ( colEnd < row . _columnCount ) ;
2022-11-11 20:34:58 +01:00
}
}
// This function represents the slow path of ReplaceCharacters(),
// as it reallocates the backing buffer and shifts the char offsets.
// The parameters are difficult to explain, but their names are identical to
// local variables in ReplaceCharacters() which I've attempted to document there.
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
void ROW : : _resizeChars ( uint16_t colEndDirty , uint16_t chBegDirty , size_t chEndDirty , uint16_t chEndDirtyOld )
2022-11-11 20:34:58 +01:00
{
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
const auto diff = chEndDirty - chEndDirtyOld ;
2022-11-11 20:34:58 +01:00
const auto currentLength = _charSize ( ) ;
const auto newLength = currentLength + diff ;
if ( newLength < = _chars . size ( ) )
{
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
std : : copy_n ( _chars . begin ( ) + chEndDirtyOld , currentLength - chEndDirtyOld , _chars . begin ( ) + chEndDirty ) ;
2022-11-11 20:34:58 +01:00
}
else
{
const auto minCapacity = std : : min < size_t > ( UINT16_MAX , _chars . size ( ) + ( _chars . size ( ) > > 1 ) ) ;
const auto newCapacity = gsl : : narrow < uint16_t > ( std : : max ( newLength , minCapacity ) ) ;
auto charsHeap = std : : make_unique_for_overwrite < wchar_t [ ] > ( newCapacity ) ;
const std : : span chars { charsHeap . get ( ) , newCapacity } ;
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
std : : copy_n ( _chars . begin ( ) , chBegDirty , chars . begin ( ) ) ;
std : : copy_n ( _chars . begin ( ) + chEndDirtyOld , currentLength - chEndDirtyOld , chars . begin ( ) + chEndDirty ) ;
2022-11-11 20:34:58 +01:00
_charsHeap = std : : move ( charsHeap ) ;
_chars = chars ;
}
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
auto it = _charOffsets . begin ( ) + colEndDirty ;
2022-11-11 20:34:58 +01:00
const auto end = _charOffsets . end ( ) ;
for ( ; it ! = end ; + + it )
{
* it = gsl : : narrow_cast < uint16_t > ( * it + diff ) ;
}
}
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
til : : small_rle < TextAttribute , uint16_t , 1 > & ROW : : Attributes ( ) noexcept
{
return _attr ;
}
2022-11-11 20:34:58 +01:00
const til : : small_rle < TextAttribute , uint16_t , 1 > & ROW : : Attributes ( ) const noexcept
{
return _attr ;
}
TextAttribute ROW : : GetAttrByColumn ( const til : : CoordType column ) const
{
A minor TSF refactoring (#17067)
Next in the popular series of minor refactorings:
Out with the old, in with the new!
This PR removes all of the existing TSF code, both for conhost and
Windows Terminal. conhost's TSF implementation was awful:
It allocated an entire text buffer _per line_ of input.
Additionally, its implementation spanned a whopping 40 files and
almost 5000 lines of code. Windows Terminal's implementation was
absolutely fine in comparison, but it was user unfriendly due to
two reasons: Its usage of the `CoreTextServices` WinRT API indirectly
meant that it used a non-transitory TSF document, which is not the
right choice for a terminal. A `TF_SS_TRANSITORY` document (-context)
indicates to TSF that it cannot undo a previously completed composition
which is exactly what we need: Once composition has completed we send
the result to the shell and we cannot undo this later on.
The WinRT API does not allow us to use `TF_SS_TRANSITORY` and so it's
unsuitable for our application. Additionally, the implementation used
XAML to render the composition instead of being part of our text
renderer, which resulted in the text looking weird and hard to read.
The new implementation spans just 8 files and is ~1000 lines which
should make it significantly easier to maintain. The architecture is
not particularly great, but it's certainly better than what we had.
The implementation is almost entirely identical between both conhost
and Windows Terminal and thus they both also behave identical.
It fixes an uncountable number of subtle bugs in the conhost TSF
implementation, as it failed to check for status codes after calls.
It also adds several new features, like support for wavy underlines
(as used by the Japanese IME), dashed underlines (the default for
various languages now, like Vietnamese), colored underlines,
colored foreground/background controlled by the IME, and more!
I have tried to replicate the following issues and have a high
confidence that they're resolved now:
Closes #1304
Closes #3730
Closes #4052
Closes #5007 (as it is not applicable anymore)
Closes #5110
Closes #6186
Closes #6192
Closes #13805
Closes #14349
Closes #14407
Closes #16180
For the following issues I'm not entirely sure if it'll fix it,
but I suspect it's somewhat likely:
#13681
#16305
#16817
Lastly, there's one remaining bug that I don't know how to resolve.
However, that issue also plagues conhost and Windows Terminal
right now, so it's at least not a regression:
* Press Win+. (emoji picker) and close it
* Move the window around
* Press Win+.
This will open the emoji picker at the old window location.
It also occurs when the cursor moves within the window.
While this is super annoying, I could not find a way to fix it.
## Validation Steps Performed
* See the above closed issues
* Use Vietnamese Telex and type "xin choaf"
Results in "xin chào" ✅
* Use the MS Japanese IME and press Alt+`
Toggles between the last 2 modes ✅
* Use the MS Japanese IME, type "kyouhaishaheiku", and press Space
* The text is converted, underlined and the first part is
doubly underlined ✅
* Left/Right moves between the 3 segments ✅
* Home/End moves between start/end ✅
* Esc puts a wavy line under the current segment ✅
* Use the Korean IME, type "gksgks"
This results in "한한" ✅
* Use the Korean IME, type "gks", and press Right Ctrl
Opens a popup which allows you to navigate with Arrow/Tab keys ✅
2024-04-18 19:47:28 +02:00
return _attr . at ( _clampedColumn ( column ) ) ;
2022-11-11 20:34:58 +01:00
}
std : : vector < uint16_t > ROW : : GetHyperlinks ( ) const
{
std : : vector < uint16_t > ids ;
for ( const auto & run : _attr . runs ( ) )
{
if ( run . value . IsHyperlink ( ) )
{
ids . emplace_back ( run . value . GetHyperlinkId ( ) ) ;
}
}
return ids ;
}
2024-07-23 21:39:12 +02:00
ImageSlice * ROW : : SetImageSlice ( ImageSlice : : Pointer imageSlice ) noexcept
2024-07-01 11:57:49 +01:00
{
2024-07-23 21:39:12 +02:00
_imageSlice = std : : move ( imageSlice ) ;
return GetMutableImageSlice ( ) ;
2024-07-01 11:57:49 +01:00
}
2024-07-23 21:39:12 +02:00
const ImageSlice * ROW : : GetImageSlice ( ) const noexcept
2024-07-01 11:57:49 +01:00
{
2024-07-23 21:39:12 +02:00
return _imageSlice . get ( ) ;
}
ImageSlice * ROW : : GetMutableImageSlice ( ) noexcept
{
const auto ptr = _imageSlice . get ( ) ;
if ( ! ptr )
{
return nullptr ;
}
ptr - > BumpRevision ( ) ;
return ptr ;
2024-07-01 11:57:49 +01:00
}
2022-11-11 20:34:58 +01:00
uint16_t ROW : : size ( ) const noexcept
{
return _columnCount ;
}
2024-01-30 03:50:33 +05:30
// Routine Description:
// - Retrieves the column that is one after the last non-space character in the row.
til : : CoordType ROW : : GetLastNonSpaceColumn ( ) const noexcept
{
const auto text = GetText ( ) ;
2024-07-12 04:24:29 +02:00
const auto beg = text . data ( ) ;
const auto end = beg + text . size ( ) ;
# pragma warning(suppress : 26429) // Symbol 'it' is never tested for nullness, it can be marked as not_null (f.23).
2024-01-30 03:50:33 +05:30
auto it = end ;
for ( ; it ! = beg ; - - it )
{
// it[-1] is safe as `it` is always greater than `beg` (loop invariant).
2024-07-12 04:24:29 +02:00
if ( it [ - 1 ] ! = L ' ' )
2024-01-30 03:50:33 +05:30
{
break ;
}
}
// We're supposed to return the measurement in cells and not characters
// and therefore simply calculating `it - beg` would be wrong.
//
// An example: The row is 10 cells wide and `it` points to the second character.
// `it - beg` would return 1, but it's possible it's actually 1 wide glyph and 8 whitespace.
return gsl : : narrow_cast < til : : CoordType > ( GetReadableColumnCount ( ) - ( end - it ) ) ;
}
2022-11-11 20:34:58 +01:00
til : : CoordType ROW : : MeasureLeft ( ) const noexcept
{
const auto text = GetText ( ) ;
const auto beg = text . begin ( ) ;
const auto end = text . end ( ) ;
auto it = beg ;
for ( ; it ! = end ; + + it )
{
if ( * it ! = L ' ' )
{
break ;
}
}
return gsl : : narrow_cast < til : : CoordType > ( it - beg ) ;
}
2024-01-30 03:50:33 +05:30
// Routine Description:
// - Retrieves the column that is one after the last valid character in the row.
2022-11-11 20:34:58 +01:00
til : : CoordType ROW : : MeasureRight ( ) const noexcept
{
2023-09-26 02:28:51 +02:00
if ( _wrapForced )
{
auto width = _columnCount ;
if ( _doubleBytePadded )
{
width - - ;
}
return width ;
}
2024-01-30 03:50:33 +05:30
return GetLastNonSpaceColumn ( ) ;
2022-11-11 20:34:58 +01:00
}
bool ROW : : ContainsText ( ) const noexcept
{
const auto text = GetText ( ) ;
const auto beg = text . begin ( ) ;
const auto end = text . end ( ) ;
auto it = beg ;
for ( ; it ! = end ; + + it )
{
if ( * it ! = L ' ' )
{
return true ;
}
}
return false ;
}
std : : wstring_view ROW : : GlyphAt ( til : : CoordType column ) const noexcept
{
auto col = _clampedColumn ( column ) ;
// Safety: col is [0, _columnCount).
const auto beg = _uncheckedCharOffset ( col ) ;
// Safety: col cannot be incremented past _columnCount, because the last
// _charOffset at index _columnCount will never get the CharOffsetsTrailer flag.
while ( _uncheckedIsTrailer ( + + col ) )
{
}
// Safety: col is now (0, _columnCount].
const auto end = _uncheckedCharOffset ( col ) ;
return { _chars . begin ( ) + beg , _chars . begin ( ) + end } ;
}
DbcsAttribute ROW : : DbcsAttrAt ( til : : CoordType column ) const noexcept
{
const auto col = _clampedColumn ( column ) ;
auto attr = DbcsAttribute : : Single ;
// Safety: col is [0, _columnCount).
if ( _uncheckedIsTrailer ( col ) )
{
attr = DbcsAttribute : : Trailing ;
}
// Safety: col+1 is [1, _columnCount].
else if ( _uncheckedIsTrailer ( : : base : : strict_cast < size_t > ( col ) + 1u ) )
{
attr = DbcsAttribute : : Leading ;
}
return { attr } ;
}
std : : wstring_view ROW : : GetText ( ) const noexcept
{
2023-08-25 00:56:40 +02:00
const auto width = size_t { til : : at ( _charOffsets , GetReadableColumnCount ( ) ) } & CharOffsetsMask ;
return { _chars . data ( ) , width } ;
2022-11-11 20:34:58 +01:00
}
2024-10-21 15:04:53 -07:00
// Arguments:
// - columnBegin: inclusive
// - columnEnd: exclusive
2023-06-30 21:51:07 +02:00
std : : wstring_view ROW : : GetText ( til : : CoordType columnBegin , til : : CoordType columnEnd ) const noexcept
{
2024-06-26 20:40:27 +02:00
const auto columns = GetReadableColumnCount ( ) ;
2023-08-25 00:56:40 +02:00
const auto colBeg = clamp ( columnBegin , 0 , columns ) ;
2024-10-21 15:04:53 -07:00
const auto colEnd = clamp ( columnEnd , colBeg , columns ) ;
2023-06-30 21:51:07 +02:00
const size_t chBeg = _uncheckedCharOffset ( gsl : : narrow_cast < size_t > ( colBeg ) ) ;
const size_t chEnd = _uncheckedCharOffset ( gsl : : narrow_cast < size_t > ( colEnd ) ) ;
# pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
return { _chars . data ( ) + chBeg , chEnd - chBeg } ;
}
2023-08-25 00:56:40 +02:00
til : : CoordType ROW : : GetLeadingColumnAtCharOffset ( const ptrdiff_t offset ) const noexcept
{
return _createCharToColumnMapper ( offset ) . GetLeadingColumnAt ( offset ) ;
}
til : : CoordType ROW : : GetTrailingColumnAtCharOffset ( const ptrdiff_t offset ) const noexcept
{
return _createCharToColumnMapper ( offset ) . GetTrailingColumnAt ( offset ) ;
}
2022-11-11 20:34:58 +01:00
DelimiterClass ROW : : DelimiterClassAt ( til : : CoordType column , const std : : wstring_view & wordDelimiters ) const noexcept
{
const auto col = _clampedColumn ( column ) ;
// Safety: col is [0, _columnCount).
const auto glyph = _uncheckedChar ( _uncheckedCharOffset ( col ) ) ;
if ( glyph < = L ' ' )
{
return DelimiterClass : : ControlChar ;
}
else if ( wordDelimiters . find ( glyph ) ! = std : : wstring_view : : npos )
{
return DelimiterClass : : DelimiterChar ;
}
else
{
return DelimiterClass : : RegularChar ;
}
}
template < typename T >
constexpr uint16_t ROW : : _clampedColumn ( T v ) const noexcept
{
2023-08-25 00:56:40 +02:00
return static_cast < uint16_t > ( clamp ( v , 0 , _columnCount - 1 ) ) ;
2022-11-11 20:34:58 +01:00
}
template < typename T >
constexpr uint16_t ROW : : _clampedColumnInclusive ( T v ) const noexcept
{
2023-08-25 00:56:40 +02:00
return static_cast < uint16_t > ( clamp ( v , 0 , _columnCount ) ) ;
2022-11-11 20:34:58 +01:00
}
2023-08-25 00:56:40 +02:00
uint16_t ROW : : _charSize ( ) const noexcept
2022-11-11 20:34:58 +01:00
{
2023-08-25 00:56:40 +02:00
// Safety: _charOffsets is an array of `_columnCount + 1` entries.
return _charOffsets [ _columnCount ] ;
2022-11-11 20:34:58 +01:00
}
2023-08-25 00:56:40 +02:00
// Safety: off must be [0, _charSize()].
template < typename T >
wchar_t ROW : : _uncheckedChar ( T off ) const noexcept
2022-11-11 20:34:58 +01:00
{
2023-08-25 00:56:40 +02:00
return _chars [ off ] ;
2022-11-11 20:34:58 +01:00
}
// Safety: col must be [0, _columnCount].
2023-08-25 00:56:40 +02:00
template < typename T >
uint16_t ROW : : _uncheckedCharOffset ( T col ) const noexcept
2022-11-11 20:34:58 +01:00
{
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
assert ( col < _charOffsets . size ( ) ) ;
2023-08-25 00:56:40 +02:00
return _charOffsets [ col ] & CharOffsetsMask ;
2022-11-11 20:34:58 +01:00
}
// Safety: col must be [0, _columnCount].
2023-08-25 00:56:40 +02:00
template < typename T >
bool ROW : : _uncheckedIsTrailer ( T col ) const noexcept
2022-11-11 20:34:58 +01:00
{
Add an efficient text stream write function (#14821)
This adds PR adds a couple foundational functions and classes to make
our TextBuffer more performant and allow us to improve our Unicode
correctness in the future, by getting rid of our dependence on
`OutputCellIterator`. In the future we can then replace the simple
UTF-16 code point iterator with a proper grapheme cluster iterator.
While my focus is technically on Unicode correctness, the ~4x VT
throughput increase in OpenConsole is pretty nice too.
This PR adds:
* A new, simpler ROW iterator (unused in this PR)
* Cursor movement functions (`NavigateToPrevious`, `NavigateToNext`)
They're based on functions that align the cursor to the start/end
of the _current_ cell, so such functions can be added as well.
* `ReplaceText` to write a raw string of text with the possibility to
specify a right margin.
* `CopyRangeFrom` will allow us to make reflow much faster, as it's able
to bulk-copy already measured strings without re-measuring them.
Related to #8000
## Validation Steps Performed
* enwik8.txt, zhwik8.txt, emoji-test.txt, all work with proper
wide glyph reflow at the end of a row ✅
* This produces "a 咪" where only "a" has a white background:
```sh
printf '\e7こん\e8\x1b[107ma\x1b[m\n'
```
* This produces "abん":
```sh
stdbuf -o0 printf '\x1b7こん\x1b8a'; printf 'b\n'
```
* This produces "xy" at the end of the line:
```sh
stdbuf -o0 printf '\e[999C\bこ\bx'; printf 'y\n'
```
* This produces red whitespace followed by "こ " in the default
background color at the end of the line, and "ん" on the next line:
```sh
printf '\e[41m\e[K\e[m\e[999C\e[2Dこん\n'
```
2023-03-24 23:20:53 +01:00
assert ( col < _charOffsets . size ( ) ) ;
2023-08-25 00:56:40 +02:00
return WI_IsFlagSet ( _charOffsets [ col ] , CharOffsetsTrailer ) ;
}
template < typename T >
T ROW : : _adjustBackward ( T column ) const noexcept
{
// Safety: This is a little bit more dangerous. The first column is supposed
// to never be a trailer and so this loop should exit if column == 0.
for ( ; _uncheckedIsTrailer ( column ) ; - - column )
{
}
return column ;
}
template < typename T >
T ROW : : _adjustForward ( T column ) const noexcept
{
// Safety: This is a little bit more dangerous. The last column is supposed
// to never be a trailer and so this loop should exit if column == _columnCount.
for ( ; _uncheckedIsTrailer ( column ) ; + + column )
{
}
return column ;
}
// Creates a CharToColumnMapper given an offset into _chars.data().
2024-11-15 15:50:07 -08:00
// In other words, for a 120 column ROW with just ASCII text, the offset should be [0,120].
2023-08-25 00:56:40 +02:00
CharToColumnMapper ROW : : _createCharToColumnMapper ( ptrdiff_t offset ) const noexcept
{
const auto charsSize = _charSize ( ) ;
2024-11-15 15:50:07 -08:00
const auto lastChar = gsl : : narrow_cast < ptrdiff_t > ( charsSize ) ;
2023-08-25 00:56:40 +02:00
// We can sort of guess what column belongs to what offset because BMP glyphs are very common and
// UTF-16 stores them in 1 char. In other words, usually a ROW will have N chars for N columns.
const auto guessedColumn = gsl : : narrow_cast < til : : CoordType > ( clamp ( offset , 0 , _columnCount ) ) ;
2024-11-15 15:50:07 -08:00
return CharToColumnMapper { _chars . data ( ) , _charOffsets . data ( ) , lastChar , guessedColumn , _columnCount } ;
2022-11-11 20:34:58 +01:00
}
Rewrite how marks are stored & add reflow (#16937)
This is pretty much a huge refactoring of how marks are stored in the
buffer.
Gone is the list of `ScrollMark`s in the buffer that store regions of
text as points marking the ends. Those would be nigh impossible to
reflow nicely.
Instead, we're going to use `TextAttribute`s to store the kind of output
we've got - `Prompt`, `Command`, `Output`, or, the default, `None`.
Those already reflow nicely!
But we also need to store things like, the exit code for the command.
That's why we've now added `ScrollbarData` to `ROW`s. There's really
only going to be one prompt->output on a single row. So, we only need to
store one ScrollbarData per-row. When a command ends, we can just go
update the mark on the row that started that command.
But iterating over the whole buffer to find the next/previous
prompt/command/output region sounds complicated. So, to avoid everyone
needing to do some variant of that, we've added `MarkExtents` (which is
literally just the same mark structure as before). TextBuffer can figure
out where all the mark regions are, and hand that back to callers. This
allows ControlCore to be basically unchanged.
_But collecting up all the regions for all the marks sounds expensive!
We need to update the scrollbar frequently, we can't just collect those
up every time!_ No we can't! But we also don't need to. The scrollbar
doesn't need to know where all the marks start and end and if they have
commands and this and that - no. We only need to know the rows that have
marks on them. So, we've now also got `ScrollMark` to represent just a
mark on a scrollbar at a specific row on the buffer. We can get those
quickly.
* [x] I added a bunch of tests for this.
* [x] I played with it and it feels good, even after a reflow (finally)
* See:
* #11000
* #15057 (I'm not marking this as closed. The stacked PR will close
this, when I move marks to Stable)
2024-04-05 13:16:10 -07:00
const std : : optional < ScrollbarData > & ROW : : GetScrollbarData ( ) const noexcept
{
return _promptData ;
}
void ROW : : SetScrollbarData ( std : : optional < ScrollbarData > data ) noexcept
{
_promptData = data ;
}
void ROW : : StartPrompt ( ) noexcept
{
if ( ! _promptData . has_value ( ) )
{
// You'd be tempted to write:
//
// _promptData = ScrollbarData{
// .category = MarkCategory::Prompt,
// .color = std::nullopt,
// .exitCode = std::nullopt,
// };
//
// But that's not very optimal! Read this thread for a breakdown of how
// weird std::optional can be some times:
//
// https://github.com/microsoft/terminal/pull/16937#discussion_r1553660833
_promptData . emplace ( MarkCategory : : Prompt ) ;
}
}
void ROW : : EndOutput ( std : : optional < unsigned int > error ) noexcept
{
if ( _promptData . has_value ( ) )
{
_promptData - > exitCode = error ;
if ( error . has_value ( ) )
{
_promptData - > category = * error = = 0 ? MarkCategory : : Success : MarkCategory : : Error ;
}
}
}