commit 29f51dd9ea6407ff5d6386d7dc82cd499185533e
parent 115f7694ce1502f57bb72b9e34d8816c5224e843
Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
Date: Thu, 27 Mar 2025 22:11:35 +0100
libc/wchar: Remove state from mbtowc()
As we only support utf8 it does not make sense to keep state in mbtowc()
because as we cannot differentiate between an incorrect sequence and
a non complete sequence then there is not actual use case for that
state. We still keep the static variable because we don't want to use
the internal state from mbrtowc(), and as this function is not safe
thread anyway it is not a problem to have that variable, whose value
will be always correct, except when mbrtowc() returns -2 and in that
case we have to clear it.
The C99 specification does not specify any errno value when mbtowc()
fails, but it allows changes to errno even when it is not directly
specified in the standard, so it is not a problem to implement
mbtowc() using mbrtowc() which sets errno when it cannot translate
a full multibyte sequence.
Diffstat:
5 files changed, 46 insertions(+), 60 deletions(-)
diff --git a/src/libc/stdlib/mblen.c b/src/libc/stdlib/mblen.c
@@ -1,16 +1,9 @@
#include <stdlib.h>
-#include <wchar.h>
#undef mblen
int
mblen(const char *s, size_t n)
{
- int ret;
- static mbstate_t st;
-
- ret = mbrtowc(NULL, s, n, &st);
- if (ret < 0)
- ret = -1;
- return ret;
+ return mbtowc(NULL, s, n);
}
diff --git a/src/libc/stdlib/mbtowc.c b/src/libc/stdlib/mbtowc.c
@@ -7,12 +7,14 @@
int
mbtowc(wchar_t *restrict pwc, const char *restrict s, size_t n)
{
- static mbstate_t st;
int ret;
+ static mbstate_t st;
ret = mbrtowc(pwc, s, n, &st);
- if (ret < 0)
+ if (ret == -2) {
+ memset(&st, 0, sizeof(st));
ret = -1;
+ }
return ret;
}
diff --git a/tests/libc/execute/0039-mbrtowc.c b/tests/libc/execute/0039-mbrtowc.c
@@ -31,9 +31,9 @@ tests_mbrtowc(void)
wc = -1;
errno = 0;
r = mbrtowc(tp->pwc, tp->s, tp->l, NULL);
- assert(tp->r == r);
+ assert(tp->rmbr == r);
assert(tp->syserr == errno);
- if (tp->r != -1)
+ if (tp->rmbr != -1)
assert(tp->wc == wc);
}
@@ -43,9 +43,9 @@ tests_mbrtowc(void)
wc = -1;
errno = 0;
r = mbrtowc(tp->pwc, tp->s, tp->l, &s);
- assert(tp->r == r);
+ assert(tp->rmbr == r);
assert(tp->syserr == errno);
- if (tp->r != -1)
+ if (tp->rmbr != -1)
assert(tp->wc == wc);
assert(mbsinit(&s) != 0 == tp->mbstate);
}
@@ -60,16 +60,10 @@ tests_mbtowc(void)
puts("testing mbtowc");
for (tp = tests; tp < &tests[NELEM(tests)]; ++tp) {
wc = -1;
- errno = 0;
r = mbtowc(tp->pwc, tp->s, tp->l);
- assert(tp->syserr == errno);
- if (tp->r >= 0) {
- rt = tp->r;
+ assert(tp->rmb == r);
+ if (tp->rmb >= 0)
assert(tp->wc == wc);
- } else {
- rt = -1;
- }
- assert(rt == r);
}
}
diff --git a/tests/libc/execute/0041-mbrlen.c b/tests/libc/execute/0041-mbrlen.c
@@ -31,7 +31,7 @@ tests_mbrlen(void)
wc = -1;
errno = 0;
r = mbrlen(tp->s, tp->l, NULL);
- assert(tp->r == r);
+ assert(tp->rmbr == r);
assert(tp->syserr == errno);
}
@@ -41,7 +41,7 @@ tests_mbrlen(void)
wc = -1;
errno = 0;
r = mbrlen(tp->s, tp->l, &s);
- assert(tp->r == r);
+ assert(tp->rmbr == r);
assert(tp->syserr == errno);
assert(mbsinit(&s) != 0 == tp->mbstate);
}
@@ -55,12 +55,8 @@ tests_mblen(void)
puts("testing mblen");
for (tp = tests; tp < &tests[NELEM(tests)]; ++tp) {
- wc = -1;
- errno = 0;
r = mblen(tp->s, tp->l);
- assert(tp->syserr == errno);
- rt = (tp->r >= 0) ? tp->r : -1;
- assert(rt == r);
+ assert(tp->rmb == r);
}
}
diff --git a/tests/libc/execute/mbtest.h b/tests/libc/execute/mbtest.h
@@ -2,43 +2,44 @@ static wchar_t wc;
static struct mbtest {
char *s;
int l;
- int r;
+ int rmbr;
+ int rmb;
int mbstate;
int syserr;
wchar_t *pwc;
wchar_t wc;
} tests[] = {
- {"\0", 2, 0, 1, 0, &wc, 0},
- {"\x21", 2, 1, 1, 0, &wc, 0x21},
- {"\xc2\xa1", 3, 2, 1, 0, &wc, 0x00A1},
- {"\xc2\xa1", 2, 2, 1, 0, &wc, 0x00A1},
- {"\xe2\x80\x94", 4, 3, 1, 0, &wc, 0x2014},
- {"\xf0\x9f\x92\xa9", 5, 4, 1, 0, &wc, 0x01F4A9},
- {"\xf0\x9f\x92\xa9", 5, 4, 1, 0, NULL, -1},
- {"\xf0\x9f\x92\xa9", -1, 4, 1, 0, &wc, 0x01F4A9},
+ {"\0", 2, 0, 0, 1, 0, &wc, 0},
+ {"\x21", 2, 1, 1, 1, 0, &wc, 0x21},
+ {"\xc2\xa1", 3, 2, 2, 1, 0, &wc, 0x00A1},
+ {"\xc2\xa1", 2, 2, 2, 1, 0, &wc, 0x00A1},
+ {"\xe2\x80\x94", 4, 3, 3, 1, 0, &wc, 0x2014},
+ {"\xf0\x9f\x92\xa9", 5, 4, 4, 1, 0, &wc, 0x01F4A9},
+ {"\xf0\x9f\x92\xa9", 5, 4, 4, 1, 0, NULL, -1},
+ {"\xf0\x9f\x92\xa9", -1, 4, 4, 1, 0, &wc, 0x01F4A9},
- {NULL, 4, 0, 1, 0, NULL, -1},
- {"\xed\x9f\xbf", 4, 3, 1, 0, &wc, 0xd7ff},
- {"\xed\xa0\x80", 4, -1, 1, EILSEQ, &wc, -1},
- {"\xed\xb3\xbf", 4, -1, 1, EILSEQ, &wc, -1},
- {"\xed\xb4\x80", 4, 3, 1, 0, &wc, 0xdd00},
+ {NULL, 4, 0, 0, 1, 0, NULL, -1},
+ {"\xed\x9f\xbf", 4, 3, 3, 1, 0, &wc, 0xd7ff},
+ {"\xed\xa0\x80", 4, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xed\xb3\xbf", 4, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xed\xb4\x80", 4, 3, 3, 1, 0, &wc, 0xdd00},
- {"\xf0\x9f\x92\xa9", 3, -2, 0, 0, &wc, -1},
- {"\xa9", 2, 1, 1, 0, &wc, 0x01F4A9},
- {"\xf0\x9f\x92\xa9", 3, -2, 0, 0, &wc, -1},
- {NULL, 4, -1, 1, EILSEQ, &wc, -1},
- {"\xa9", 2, -1, 1, EILSEQ, &wc, -1},
- {"\xf0\x9f\x92\xa9", 3, -2, 0, 0, &wc, -1},
- {NULL, 4, -1, 1, EILSEQ, &wc, -1},
- {"\x21", 2, 1, 1, 0, &wc, 0x21},
- {"\xf0\x9f\x92\xa9", 2, -2, 0, 0, &wc, -1},
- {"\xf0\x9f\x92\xa9", 0, -2, 0, 0, &wc, -1},
- {"\x92\xa9", 2, 2, 1, 0, &wc, 0x01F4A9},
+ {"\xf0\x9f\x92\xa9", 3, -2, -1, 0, 0, &wc, -1},
+ {"\xa9", 2, 1, -1, 1, 0, &wc, 0x01F4A9},
+ {"\xf0\x9f\x92\xa9", 3, -2, -1, 0, 0, &wc, -1},
+ {NULL, 4, -1, 0, 1, EILSEQ, &wc, -1},
+ {"\xa9", 2, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xf0\x9f\x92\xa9", 3, -2, -1, 0, 0, &wc, -1},
+ {NULL, 4, -1, 0, 1, EILSEQ, &wc, -1},
+ {"\x21", 2, 1, 1, 1, 0, &wc, 0x21},
+ {"\xf0\x9f\x92\xa9", 2, -2, -1, 0, 0, &wc, -1},
+ {"\xf0\x9f\x92\xa9", 0, -2, -1, 0, 0, &wc, -1},
+ {"\x92\xa9", 2, 2, -1, 1, 0, &wc, 0x01F4A9},
- {"\x80", 2, -1, 1, EILSEQ, &wc, -1},
- {"\xc0\x80", 2, -1, 1, EILSEQ, &wc, -1},
- {"\xc0\x00", 2, -1, 1, EILSEQ, &wc, -1},
- {"\xc1\x81", 2, -1, 1, EILSEQ, &wc, -1},
- {"\xf8\x81\x82\x83\x84\x85", -1, -1, 1, EILSEQ, &wc, -1},
- {"\xfe\x81\x82\x83\x84\x85\x86", 8, -1, 1, EILSEQ, &wc, -1},
+ {"\x80", 2, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xc0\x80", 2, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xc0\x00", 2, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xc1\x81", 2, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xf8\x81\x82\x83\x84\x85", -1, -1, -1, 1, EILSEQ, &wc, -1},
+ {"\xfe\x81\x82\x83\x84\x85\x86", 8, -1, -1, 1, EILSEQ, &wc, -1},
};