From 8201b75114923069ee193c761f17394e6f8e6b79 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sat, 6 May 2023 01:02:51 -0500 Subject: [PATCH] Truncating demo_utf8towc's check range to 0x10ffff was wrong, that would be for a wctoutf8 test, so it wasn't showing all the results, but going through the full 32 bit range takes a long time, so advance based on the number of bytes consumed by the match. --- toys/example/demo_utf8towc.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/toys/example/demo_utf8towc.c b/toys/example/demo_utf8towc.c index 2c6050b1..26c4390d 100644 --- a/toys/example/demo_utf8towc.c +++ b/toys/example/demo_utf8towc.c @@ -18,23 +18,32 @@ config DEMO_UTF8TOWC void demo_utf8towc_main(void) { mbstate_t mb; - int len1, len2; - unsigned u, h, wc2; + int len1, len2, maxlen = 0; + unsigned h, u, wc2; wchar_t wc1; + char *str = (void *)&h; memset(&mb, 0, sizeof(mb)); - for (u = 1; u<=0x10ffff; u++) { - char *str = (void *)&h; - + // Although there are 0x10ffff unicode points, test all 4 byte combinations. + for (u = 1; u;) { wc1 = wc2 = 0; len2 = 4; - h = htonl(u); - while (!*str) str++, len2--; + h = SWAP_BE32(u); len1 = mbrtowc(&wc1, str, len2, &mb); if (len1<0) memset(&mb, 0, sizeof(mb)); len2 = utf8towc(&wc2, str, len2); + + if (wcwidth(wc2)>maxlen) maxlen = wcwidth(wc2); if (len1 != len2 || wc1 != wc2) - printf("%x %d %x %d %x\n", u, len1, wc1, len2, wc2); + printf("%x %d %x %d %x\n", u++, len1, wc1, len2, wc2); + else if (len2<1) u++; + else { + h = 1<<(8*(4-len2)); + u &= ~(h-1); + u += h; + } + } + dprintf(2, "maxlen=%d\n", maxlen); } -- 2.39.2