scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit 67eb502477c70b9d84d0615c1a4ea169fb1c6b84
parent 43e7e49b2686b39cac4695343ae4ee8452645f93
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Sat,  5 Nov 2022 10:47:23 +0100

cc1: Add support for long character constants

Diffstat:
Msrc/cmd/cc/cc1/cc1.h | 3+++
Msrc/cmd/cc/cc1/lex.c | 89++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Atests/cc/error/0033-character.c | 32++++++++++++++++++++++++++++++++
Mtests/cc/error/scc-tests.lst | 1+
Atests/cc/execute/0217-lchar.c | 13+++++++++++++
Mtests/cc/execute/scc-tests.lst | 1+
6 files changed, 123 insertions(+), 16 deletions(-)

diff --git a/src/cmd/cc/cc1/cc1.h b/src/cmd/cc/cc1/cc1.h @@ -1,3 +1,5 @@ +#include <stdint.h> + #define INPUTSIZ LINESIZ #define GLOBALCTX 0 @@ -290,6 +292,7 @@ typedef struct node Node; typedef struct macro Macro; typedef struct input Input; typedef struct arch Arch; +typedef uint32_t Rune; struct limits { union { diff --git a/src/cmd/cc/cc1/lex.c b/src/cmd/cc/cc1/lex.c @@ -196,10 +196,6 @@ repeat: break; case EOF: break; - default: - if (!isprint(c) && !ispunct(c) && !isspace(c)) - warn("invalid input character. The shame of UB is yours"); - break; } return c; @@ -545,25 +541,83 @@ escape(void) return c; } +static Rune +utf8rune(void) +{ + Rune wc; + unsigned c; + size_t i, len; + + c = *input->p; + for (len = 0; c & 0x80; len++) + c <<= 1; + if (len == 0) + return c; + if (len == 1 || len == 8) + goto invalid; + + wc = (c & 0xFF) >> len; + for (i = 0; i < len-1; i++) { + c = input->p[1]; + if ((c & 0xC0) != 0x80) + goto invalid; + input->p++; + wc <<= 6; + wc |= c & 0x3F; + } + return wc; + +invalid: + errorp("invalid multibyte sequence"); + return 0xFFFD; +} + +static Rune +decode(int multi) +{ + Rune r; + + if (*input->p == '\\') { + r = escape(); + return r; + } + + return multi ? utf8rune() : *input->p; +} + static int character(void) { - int c; + int i, multi = 0; + Rune r, d; + Type *tp = inttype; Symbol *sym; - if ((c = *++input->p) == '\\') - c = escape(); - else - c = *input->p; - ++input->p; - if (*input->p != '\'') - errorp("invalid character constant"); - else - ++input->p; + if (*input->p == 'L') { + multi = 1; + tp = wchartype; + input->p++; + } + + d = 0; + input->p++; + for (i = 0; *input->p != '\''; i++) { + r = decode(multi); + if (r > getlimits(tp)->max.i) + warn("character too large for enclosing character literal type"); + d |= r; + input->p++; + } + input->p++; + + if (i == 0) + errorp("empty character constant"); + if (i > 1) + warn("multi-character character constant"); sym = newsym(NS_IDEN, NULL); - sym->u.i = c; - sym->type = inttype; + sym->u.i = d; + sym->type = tp; yylval.sym = sym; tok2str(); return CONSTANT; @@ -635,6 +689,9 @@ iden(void) Symbol *sym; char *p, *begin; + if (input->p[0] == 'L' && input->p[1] == '\'') + return character(); + begin = input->p; for (p = begin; isalnum(*p) || *p == '_'; ++p) ; diff --git a/tests/cc/error/0033-character.c b/tests/cc/error/0033-character.c @@ -0,0 +1,32 @@ +/* +PATTERN: +0033-character.c:23: error: empty character constant +0033-character.c:24: warning: multi-character character constant +0033-character.c:25: error: empty character constant +0033-character.c:26: warning: multi-character character constant +0033-character.c:27: warning: multi-character character constant +0033-character.c:28: error: invalid multibyte sequence +0033-character.c:28: warning: multi-character character constant +0033-character.c:29: error: invalid multibyte sequence +0033-character.c:29: error: invalid multibyte sequence +0033-character.c:29: warning: multi-character character constant +. +*/ +#include <wchar.h> + +int +main() +{ + int i; + wchar_t w; + + i = ''; + i = 'ab'; + w = L''; + w = L'ab'; + w = L'áá'; + w = L'€ '; + w = L'€À'; + + return 0; +} diff --git a/tests/cc/error/scc-tests.lst b/tests/cc/error/scc-tests.lst @@ -30,3 +30,4 @@ 0030-krtypes.c 0031-krtypes.c 0032-krtypes.c +0033-character.c diff --git a/tests/cc/execute/0217-lchar.c b/tests/cc/execute/0217-lchar.c @@ -0,0 +1,13 @@ +#include <wchar.h> + +int +main() +{ + wchar_t c; + + c = L'á'; + + if (c != 225) + return 1; + return 0; +} diff --git a/tests/cc/execute/scc-tests.lst b/tests/cc/execute/scc-tests.lst @@ -207,3 +207,4 @@ 0214-va_copy.c 0215-ret_struct.c 0216-initialize.c [TODO] +0217-lchar.c