commit f53a0d9b613467cdfaf87c868f25c6f79f742f56
parent 6d702ec165be91c82ccd3940c94625967957ae8b
Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
Date: Wed, 8 Apr 2026 15:04:29 +0200
cc1: Add support for wchat_t strings
Diffstat:
5 files changed, 124 insertions(+), 26 deletions(-)
diff --git a/src/cmd/scc-cc/cc1/cc1.h b/src/cmd/scc-cc/cc1/cc1.h
@@ -116,6 +116,7 @@ enum {
SLOCAL = 1 << 9,
SEMITTED = 1 << 10,
SDEFINED = 1 << 11,
+ SRSTRING = 1 << 11,
SSTRING = 1 << 12,
STYPEDEF = 1 << 13,
SINITLST = 1 << 14,
@@ -356,6 +357,7 @@ struct symbol {
double d;
long double ld;
char *s;
+ Rune *rs;
unsigned char token;
Node **init;
Symbol **pars;
@@ -466,6 +468,7 @@ void killsym(Symbol *sym);
Symbol *newlabel(void);
void builtins(struct builtin *builts);
Symbol *newstring(char *s, size_t len);
+Symbol *newrstring(Rune *, size_t);
unsigned newid(void);
void isyms(void);
diff --git a/src/cmd/scc-cc/cc1/code.c b/src/cmd/scc-cc/cc1/code.c
@@ -1,4 +1,5 @@
#include <assert.h>
+#include <inttypes.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -322,6 +323,20 @@ emittype(int op, void *arg)
}
static void
+emitrstring(Symbol *sym, Type *tp)
+{
+ Rune *bp, *lim;
+
+ bp = sym->u.rs;
+ lim = &sym->u.rs[tp->n.elem];
+ while (bp < lim) {
+ fprintf(outfp,
+ "\t#%c%" PRIX32 "\n",
+ wchartype->letter, *bp++);
+ }
+}
+
+static void
emitstring(Symbol *sym, Type *tp)
{
char *bp, *s, *lim;
@@ -389,6 +404,12 @@ emitdesig(Node *np, Type *tp, unsigned long long *addr)
*addr += tp->n.elem;
return;
}
+ if (sym->flags & SRSTRING) {
+ emitrstring(sym, tp);
+ *addr += tp->n.elem * wchartype->size;
+ return;
+ }
+
if ((sym->flags & SINITLST) == 0)
goto emit_expression;
}
diff --git a/src/cmd/scc-cc/cc1/expr.c b/src/cmd/scc-cc/cc1/expr.c
@@ -652,28 +652,29 @@ negation(int op, Node *np)
static Symbol *
adjstrings(Symbol *sym)
{
- char *s, *t;
- size_t len, n;
- Type *tp;
+ char *d, *s;
+ Type *tp, *base;
+ size_t len, n, osiz, nsiz;
tp = sym->type;
- s = sym->u.s;
- for (len = tp->n.elem;; len += n) {
- next();
- if (yytoken != STRING)
+ base = tp->type;
+ for (len = tp->n.elem; next() == STRING; len += n - 1) {
+ if (yylval.sym->type->type != base)
break;
- t = yylval.sym->u.s;
- n = yylval.sym->type->n.elem - 1;
- s = xrealloc(s, len + n);
- memcpy(s + len - 1, t, n);
- s[len + n - 1] = '\0';
- }
+ d = sym->u.s;
+ s = yylval.sym->u.s;
+ n = yylval.sym->type->n.elem;
+ osiz = (len-1) * base->size;
+ nsiz = n * base->size;
- if (tp->n.elem != len) {
- sym->type = mktype(chartype, ARY, len, NULL);
- sym->u.s = s;
+ sym->u.s = d = xrealloc(d, osiz + nsiz);
+ memcpy(d + osiz, s, nsiz);
}
+
+ if (tp->n.elem != len)
+ sym->type = mktype(base, ARY, len, NULL);
+
return sym;
}
diff --git a/src/cmd/scc-cc/cc1/lex.c b/src/cmd/scc-cc/cc1/lex.c
@@ -12,6 +12,8 @@
#include "cc1.h"
#define REPLACECHAR 0xFFFD
+#define NOMULTICHAR 0
+#define MULTICHAR 1
int yytoken;
struct yystype yylval;
@@ -712,24 +714,66 @@ character(void)
}
static int
-string(void)
+rstring(void)
{
- int c, multi = 0;
- char buff[STRINGSIZ+1], *bp = buff, *beg = input->p, *end;
+ char *beg = input->p++;
+ Rune c, buff[STRINGSIZ+1], *bp = buff;
for (++input->p; ; ++input->p) {
- c = *input->p;
-
- if (c == '"')
- break;
+ c = *(unsigned char *) input->p;
- if (c == '\0') {
+ switch (c) {
+ case '\0':
errorp("missing terminating '\"' character");
+ case '"':
+ goto end_loop;
+ case '\\':
+ c = escape(MULTICHAR);
+ }
+
+ if (input->p - beg == STRINGSIZ + 1) {
+ /* too long, ignore everything until next quote */
+ for (++input->p; *input->p != '"'; ++input->p) {
+ if (*input->p == '\\')
+ ++input->p;
+ if (*input->p == '\0')
+ break;
+ }
+ --bp;
+ errorp("string too long");
break;
}
+ *bp++ = c;
+ }
- if (c == '\\')
- c = escape(multi);
+end_loop:
+ *bp++ = '\0';
+ input->p++;
+
+ yylval.sym = newrstring(buff, bp - buff);
+ tok2str();
+
+ return STRING;
+}
+
+static int
+sstring(void)
+{
+ int c;
+ struct limits *lim;
+ char buff[STRINGSIZ+1], *bp = buff, *beg = input->p;
+
+ for (++input->p; ; ++input->p) {
+ c = *input->p;
+
+ switch (c) {
+ case '\0':
+ errorp("missing terminating '\"' character");
+ case '"':
+ goto end_loop;
+ case '\\':
+ c = escape(NOMULTICHAR);
+ }
if (input->p - beg == STRINGSIZ + 1) {
/* too long, ignore everything until next quote */
@@ -745,6 +789,8 @@ string(void)
}
*bp++ = c;
}
+
+end_loop:
*bp++ = '\0';
input->p++;
@@ -755,6 +801,14 @@ string(void)
}
static int
+string(void)
+{
+ if (*input->p == 'L')
+ return rstring();
+ return sstring();
+}
+
+static int
iden(void)
{
Symbol *sym;
@@ -763,6 +817,9 @@ iden(void)
if (input->p[0] == 'L' && input->p[1] == '\'')
return character();
+ if (input->p[0] == 'L' && input->p[1] == '"')
+ return string();
+
begin = input->p;
for (p = begin; isalnum(*p) || *p == '_'; ++p)
;
diff --git a/src/cmd/scc-cc/cc1/symbol.c b/src/cmd/scc-cc/cc1/symbol.c
@@ -252,6 +252,22 @@ linkhash(Symbol *sym)
}
Symbol *
+newrstring(Rune *rs, size_t len)
+{
+ Symbol *sym = newsym(NS_IDEN, NULL);
+
+ if (lexmode != CPPMODE)
+ sym->type = mktype(wchartype, ARY, len, NULL);
+ sym->id = newid();
+ sym->flags |= SRSTRING | SCONSTANT | SPRIVATE;
+ sym->u.rs = xcalloc(len, sizeof(Rune));
+ if (rs)
+ memcpy(sym->u.rs, rs, len * sizeof(Rune));
+
+ return sym;
+}
+
+Symbol *
newstring(char *s, size_t len)
{
Symbol *sym = newsym(NS_IDEN, NULL);