scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit f53a0d9b613467cdfaf87c868f25c6f79f742f56
parent 6d702ec165be91c82ccd3940c94625967957ae8b
Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
Date:   Wed,  8 Apr 2026 15:04:29 +0200

cc1: Add support for wchat_t strings

Diffstat:
Msrc/cmd/scc-cc/cc1/cc1.h | 3+++
Msrc/cmd/scc-cc/cc1/code.c | 21+++++++++++++++++++++
Msrc/cmd/scc-cc/cc1/expr.c | 33+++++++++++++++++----------------
Msrc/cmd/scc-cc/cc1/lex.c | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Msrc/cmd/scc-cc/cc1/symbol.c | 16++++++++++++++++
5 files changed, 124 insertions(+), 26 deletions(-)

diff --git a/src/cmd/scc-cc/cc1/cc1.h b/src/cmd/scc-cc/cc1/cc1.h @@ -116,6 +116,7 @@ enum { SLOCAL = 1 << 9, SEMITTED = 1 << 10, SDEFINED = 1 << 11, + SRSTRING = 1 << 11, SSTRING = 1 << 12, STYPEDEF = 1 << 13, SINITLST = 1 << 14, @@ -356,6 +357,7 @@ struct symbol { double d; long double ld; char *s; + Rune *rs; unsigned char token; Node **init; Symbol **pars; @@ -466,6 +468,7 @@ void killsym(Symbol *sym); Symbol *newlabel(void); void builtins(struct builtin *builts); Symbol *newstring(char *s, size_t len); +Symbol *newrstring(Rune *, size_t); unsigned newid(void); void isyms(void); diff --git a/src/cmd/scc-cc/cc1/code.c b/src/cmd/scc-cc/cc1/code.c @@ -1,4 +1,5 @@ #include <assert.h> +#include <inttypes.h> #include <ctype.h> #include <stdio.h> #include <stdlib.h> @@ -322,6 +323,20 @@ emittype(int op, void *arg) } static void +emitrstring(Symbol *sym, Type *tp) +{ + Rune *bp, *lim; + + bp = sym->u.rs; + lim = &sym->u.rs[tp->n.elem]; + while (bp < lim) { + fprintf(outfp, + "\t#%c%" PRIX32 "\n", + wchartype->letter, *bp++); + } +} + +static void emitstring(Symbol *sym, Type *tp) { char *bp, *s, *lim; @@ -389,6 +404,12 @@ emitdesig(Node *np, Type *tp, unsigned long long *addr) *addr += tp->n.elem; return; } + if (sym->flags & SRSTRING) { + emitrstring(sym, tp); + *addr += tp->n.elem * wchartype->size; + return; + } + if ((sym->flags & SINITLST) == 0) goto emit_expression; } diff --git a/src/cmd/scc-cc/cc1/expr.c b/src/cmd/scc-cc/cc1/expr.c @@ -652,28 +652,29 @@ negation(int op, Node *np) static Symbol * adjstrings(Symbol *sym) { - char *s, *t; - size_t len, n; - Type *tp; + char *d, *s; + Type *tp, *base; + size_t len, n, osiz, nsiz; tp = sym->type; - s = sym->u.s; - for (len = tp->n.elem;; len += n) { - next(); - if (yytoken != STRING) + base = tp->type; + for (len = tp->n.elem; next() == STRING; len += n - 1) { + if (yylval.sym->type->type != base) break; - t = yylval.sym->u.s; - n = yylval.sym->type->n.elem - 1; - s = xrealloc(s, len + n); - memcpy(s + len - 1, t, n); - s[len + n - 1] = '\0'; - } + d = sym->u.s; + s = yylval.sym->u.s; + n = yylval.sym->type->n.elem; + osiz = (len-1) * base->size; + nsiz = n * base->size; - if (tp->n.elem != len) { - sym->type = mktype(chartype, ARY, len, NULL); - sym->u.s = s; + sym->u.s = d = xrealloc(d, osiz + nsiz); + memcpy(d + osiz, s, nsiz); } + + if (tp->n.elem != len) + sym->type = mktype(base, ARY, len, NULL); + return sym; } diff --git a/src/cmd/scc-cc/cc1/lex.c b/src/cmd/scc-cc/cc1/lex.c @@ -12,6 +12,8 @@ #include "cc1.h" #define REPLACECHAR 0xFFFD +#define NOMULTICHAR 0 +#define MULTICHAR 1 int yytoken; struct yystype yylval; @@ -712,24 +714,66 @@ character(void) } static int -string(void) +rstring(void) { - int c, multi = 0; - char buff[STRINGSIZ+1], *bp = buff, *beg = input->p, *end; + char *beg = input->p++; + Rune c, buff[STRINGSIZ+1], *bp = buff; for (++input->p; ; ++input->p) { - c = *input->p; - - if (c == '"') - break; + c = *(unsigned char *) input->p; - if (c == '\0') { + switch (c) { + case '\0': errorp("missing terminating '\"' character"); + case '"': + goto end_loop; + case '\\': + c = escape(MULTICHAR); + } + + if (input->p - beg == STRINGSIZ + 1) { + /* too long, ignore everything until next quote */ + for (++input->p; *input->p != '"'; ++input->p) { + if (*input->p == '\\') + ++input->p; + if (*input->p == '\0') + break; + } + --bp; + errorp("string too long"); break; } + *bp++ = c; + } - if (c == '\\') - c = escape(multi); +end_loop: + *bp++ = '\0'; + input->p++; + + yylval.sym = newrstring(buff, bp - buff); + tok2str(); + + return STRING; +} + +static int +sstring(void) +{ + int c; + struct limits *lim; + char buff[STRINGSIZ+1], *bp = buff, *beg = input->p; + + for (++input->p; ; ++input->p) { + c = *input->p; + + switch (c) { + case '\0': + errorp("missing terminating '\"' character"); + case '"': + goto end_loop; + case '\\': + c = escape(NOMULTICHAR); + } if (input->p - beg == STRINGSIZ + 1) { /* too long, ignore everything until next quote */ @@ -745,6 +789,8 @@ string(void) } *bp++ = c; } + +end_loop: *bp++ = '\0'; input->p++; @@ -755,6 +801,14 @@ string(void) } static int +string(void) +{ + if (*input->p == 'L') + return rstring(); + return sstring(); +} + +static int iden(void) { Symbol *sym; @@ -763,6 +817,9 @@ iden(void) if (input->p[0] == 'L' && input->p[1] == '\'') return character(); + if (input->p[0] == 'L' && input->p[1] == '"') + return string(); + begin = input->p; for (p = begin; isalnum(*p) || *p == '_'; ++p) ; diff --git a/src/cmd/scc-cc/cc1/symbol.c b/src/cmd/scc-cc/cc1/symbol.c @@ -252,6 +252,22 @@ linkhash(Symbol *sym) } Symbol * +newrstring(Rune *rs, size_t len) +{ + Symbol *sym = newsym(NS_IDEN, NULL); + + if (lexmode != CPPMODE) + sym->type = mktype(wchartype, ARY, len, NULL); + sym->id = newid(); + sym->flags |= SRSTRING | SCONSTANT | SPRIVATE; + sym->u.rs = xcalloc(len, sizeof(Rune)); + if (rs) + memcpy(sym->u.rs, rs, len * sizeof(Rune)); + + return sym; +} + +Symbol * newstring(char *s, size_t len) { Symbol *sym = newsym(NS_IDEN, NULL);