Virtual ISO: Move UTF-16 encoding to UTF-8 decoding
This commit is contained in:
@@ -153,12 +153,14 @@ viso_pwrite(const void *ptr, uint64_t offset, size_t size, size_t count, FILE *s
|
|||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
|
viso_convert_utf8(wchar_t *dest, const char *src, ssize_t buf_size)
|
||||||
{
|
{
|
||||||
wchar_t c, *p = dest;
|
uint32_t c;
|
||||||
int next;
|
wchar_t *p = dest;
|
||||||
|
size_t next;
|
||||||
|
|
||||||
while (buf_size-- > 0) {
|
while (buf_size-- > 0) {
|
||||||
|
/* Interpret source codepoint. */
|
||||||
c = *src;
|
c = *src;
|
||||||
if (!c) {
|
if (!c) {
|
||||||
/* Terminator. */
|
/* Terminator. */
|
||||||
@@ -178,6 +180,23 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
|
|||||||
/* Pass through sub-UTF-8 codepoints. */
|
/* Pass through sub-UTF-8 codepoints. */
|
||||||
src++;
|
src++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Convert codepoints >= U+10000 to UTF-16 surrogate pairs.
|
||||||
|
This has to be done here because wchar_t on some platforms
|
||||||
|
(Windows) is not wide enough to store such high codepoints. */
|
||||||
|
if (c >= 0x10000) {
|
||||||
|
if ((c <= 0x10ffff) && (buf_size-- > 0)) {
|
||||||
|
/* Encode surrogate pair. */
|
||||||
|
c -= 0x10000;
|
||||||
|
*p++ = 0xd800 | (c >> 10);
|
||||||
|
c = 0xdc00 | (c & 0x3ff);
|
||||||
|
} else {
|
||||||
|
/* Codepoint overflow or no room for a pair. */
|
||||||
|
c = '_';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write destination codepoint. */
|
||||||
*p++ = c;
|
*p++ = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -190,6 +209,7 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
|
|||||||
{ \
|
{ \
|
||||||
st c; \
|
st c; \
|
||||||
while (buf_size-- > 0) { \
|
while (buf_size-- > 0) { \
|
||||||
|
/* Interpret source codepoint. */ \
|
||||||
c = *src++; \
|
c = *src++; \
|
||||||
switch (c) { \
|
switch (c) { \
|
||||||
case 0x00: \
|
case 0x00: \
|
||||||
@@ -249,22 +269,12 @@ viso_convert_utf8(wchar_t *dest, const char *src, int buf_size)
|
|||||||
\
|
\
|
||||||
default: \
|
default: \
|
||||||
/* Not valid for D or A, but valid for filenames. */ \
|
/* Not valid for D or A, but valid for filenames. */ \
|
||||||
if ((charset < VISO_CHARSET_FN) || (c > 0x10ffff)) { \
|
if ((charset < VISO_CHARSET_FN) || (c > 0xffff)) \
|
||||||
c = '_'; \
|
c = '_'; \
|
||||||
} else if (c >= 0x10000) { \
|
|
||||||
/* Outside 16-bit UCS-2 space, but within 20-bit UTF-16. */ \
|
|
||||||
if (buf_size-- > 0) { \
|
|
||||||
/* Encode UTF-16 surrogate pair. */ \
|
|
||||||
c -= 0x10000; \
|
|
||||||
*dest++ = cnv(0xd800 | (c >> 10)); \
|
|
||||||
c = 0xdc00 | (c & 0x3ff); \
|
|
||||||
} else { \
|
|
||||||
/* No room for an UTF-16 pair. */ \
|
|
||||||
c = '_'; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
|
\
|
||||||
|
/* Write destination codepoint with conversion function applied. */ \
|
||||||
*dest++ = cnv(c); \
|
*dest++ = cnv(c); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user