commit 68ad37c9bee5f12c0804f165a5176e4bbf4b4898
parent 29f51dd9ea6407ff5d6386d7dc82cd499185533e
Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
Date: Fri, 28 Mar 2025 09:42:13 +0100
libc/wchar: Simplify mbsrtowc() and mbstowc()
We only support UTF8, that is stateless, and as mbsrtowc() cannot return
-2 then we do know that the internal state of mbsrtowc() will be always
set to the initial state, so we can reuse it from mbstowcs().
Also, mbsrtowcs() had a bug and it was incrementing the number of wchar
stored in the output buffer when mbrtowc() returned -2. Not a big issue
because we try to read MB_LEN_MAX, and longer sequences will eventually
return -1 and the value of n will not be used.
Diffstat:
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/src/libc/stdlib/mbstowcs.c b/src/libc/stdlib/mbstowcs.c
@@ -4,10 +4,14 @@
#undef mbstowcs
+/*
+ * we can reuse the internal state of mbstwocws() because
+ * we loop in case of having a -2 from mbrtowc(), that
+ * makes impossible to leave the function in a non initial
+ * state (or error that cleans the state).
+ */
size_t
mbstowcs(wchar_t *restrict dest, const char *restrict src, size_t n)
{
- static mbstate_t st;
-
- return mbsrtowcs(dest, (void *) &src, n, &st);
+ return mbsrtowcs(dest, (void *) &src, n, NULL);
}
diff --git a/src/libc/wchar/mbsrtowcs.c b/src/libc/wchar/mbsrtowcs.c
@@ -16,7 +16,8 @@ mbsrtowcs(wchar_t *restrict dest, const char **restrict src, size_t len,
if (!ps)
ps = &st;
- for (n = 0; ; n++) {
+ n = 0;
+ for (;;) {
cnt = mbrtowc(&wc, s, MB_LEN_MAX, ps);
if (cnt == (size_t) -2) {
s += MB_LEN_MAX;
@@ -28,19 +29,20 @@ mbsrtowcs(wchar_t *restrict dest, const char **restrict src, size_t len,
if (dest) {
if (n == len) {
*src = s;
- return n;
+ break;
}
*dest++ = wc;
}
- s += cnt;
- if (wc == 0)
+ if (wc == 0) {
+ if (dest)
+ *src = NULL;
break;
- }
+ }
+
+ s += cnt;
+ n++;
- if (dest) {
- memset(ps, 0, sizeof(mbstate_t));
- *src = NULL;
}
return n;