scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit 3cb21279e81fde0e4dc9b10061b9aa2ecd7225b5
parent 2fa9d9d8e2580b0d8081184db9da3339a291b912
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Sun,  6 Nov 2022 20:07:46 +0100

libc: Discard invalid utf8 ranges

RFC 2277 defines UTF-8 as having a maximum of 6
bytes but RFC 3629 marks as invalid several ranges
to match UTF-16 rstrictions and it limits the encoded
endpints to U+10FFFF.

Diffstat:
Minclude/limits.h | 2+-
Minclude/stdlib.h | 2+-
Msrc/libc/libc.h | 4++++
Msrc/libc/objs/common-objs.mk | 1+
Msrc/libc/wchar/Makefile | 1+
Msrc/libc/wchar/mbrtowc.c | 5+++++
Msrc/libc/wchar/wcrtomb.c | 8++++++++
7 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/limits.h b/include/limits.h @@ -3,6 +3,6 @@ #include <arch/limits.h> -#define MB_LEN_MAX 1 +#define MB_LEN_MAX 4 #endif diff --git a/include/stdlib.h b/include/stdlib.h @@ -11,7 +11,7 @@ #define _ATEXIT_MAX 32 -#define MB_CUR_MAX 6 +#define MB_CUR_MAX 4 #define RAND_MAX 32767 typedef struct { diff --git a/src/libc/libc.h b/src/libc/libc.h @@ -58,3 +58,7 @@ extern void (*_exitf[])(void); extern unsigned _exitn; extern void (*_flushall)(void); extern void (*_atexithdl)(void); + +#ifdef _WCHAR_H +extern int _validutf8(wchar_t); +#endif diff --git a/src/libc/objs/common-objs.mk b/src/libc/objs/common-objs.mk @@ -121,3 +121,4 @@ COMMON_OBJS =\ wchar/mbrtowc.$O\ wchar/wcrtomb.$O\ wchar/wcwidth.$O\ + wchar/_validutf8.$O\ diff --git a/src/libc/wchar/Makefile b/src/libc/wchar/Makefile @@ -8,6 +8,7 @@ OBJS =\ mbrtowc.$O\ wcrtomb.$O\ wcwidth.$O\ + _validutf8.$O\ all: $(OBJS) diff --git a/src/libc/wchar/mbrtowc.c b/src/libc/wchar/mbrtowc.c @@ -1,5 +1,7 @@ #include <wchar.h> +#include "../libc.h" + #undef mbrtowc size_t @@ -30,6 +32,9 @@ mbrtowc(wchar_t *restrict pwc, const char *restrict s, size_t n, wc |= c & 0x3F; } + if (!_validutf8(wc)) + return -1; + return_code: if (pwc) *pwc = wc; diff --git a/src/libc/wchar/wcrtomb.c b/src/libc/wchar/wcrtomb.c @@ -1,5 +1,8 @@ +#include <errno.h> #include <wchar.h> +#include "../libc.h" + #undef wcrtomb size_t @@ -14,6 +17,11 @@ wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict ps) if (!s) return 1; + if (_validutf8(wc)) { + errno = EILSEQ; + return -1; + } + for (n = 0; n < 5 && c >= limits[n]; ++n) ;