scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | README | LICENSE

commit f89df166f2425304ff0ab221940e41a1b8ea079f
parent c9d3ab93258058932a722957c30d16c41a48615b
Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
Date:   Wed, 17 Jun 2026 09:52:15 +0200

cc1: Mark hidden tokens

The input hideset approximation works very well because we are able
to maintain the hideset for every change in the hideset, but it fails
when we fully expand a macro, because then the rescan loses the hideset
information. To solve it, we have to introduce annotation tokens to
the lexer to signal it that it should disable expansions between
a NOEXPAND and a EXPAND. These tokens are always consumed by the
lexer or the macro expansion and they are not shown ever in the
-E output or the grammar parser, because they always appear in a buffer
associated to a IMACRO input that is consumed by the lexer to generate
the tokens seen by the grammar parser.

Diffstat:
Msrc/cmd/scc-cc/cc1/cc1.h | 3+++
Msrc/cmd/scc-cc/cc1/cpp.c | 63+++++++++++++++++++++++++++++++++++++++++++--------------------
Msrc/cmd/scc-cc/cc1/lex.c | 38++++++++++++++++++++++++++++----------
3 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/src/cmd/scc-cc/cc1/cc1.h b/src/cmd/scc-cc/cc1/cc1.h @@ -148,6 +148,8 @@ enum tokens { MACROPAR = 17, CONCAT = 18, STRINGIZE = 19, + NOEXPAND = 20, + EXPAND = 21, TYPE = 129, IDEN, SCLASS, @@ -410,6 +412,7 @@ struct arch { struct yystype { Symbol *sym; + Symbol *cppsym; unsigned char token; }; diff --git a/src/cmd/scc-cc/cc1/cpp.c b/src/cmd/scc-cc/cc1/cpp.c @@ -353,38 +353,61 @@ expandarg(char *arg, char *def, char *curdef, char *buf, int bufsiz) /* gives priority to concatenation operators */ if (concatoper(def, curdef)) { - siz = strlen(arg); - if (siz >= bufsiz) { - siz = -1; - } else { - memcpy(buf, arg, siz); - buf += siz; + char *p; + int c; + + for (siz = 0, p = arg; (c = *p) != '\0'; p++) { + if (c == NOEXPAND || c == EXPAND) + continue; + if (siz >= bufsiz) { + siz = -1; + break; + } + *buf++ = c; + siz++; } } else { - disexpand++; + int n; + char noex[2] = {0, 0}, ex[2] = {0, 0}; + + /* + * Here we have to deal with a special case, + * because the recursive input expansion deals + * correctly with the hideset manipulation, + * but once the macro expansion is flattened in + * a buffer associated to the macro expansion + * all the hideset information is lost. To solve + * this problem we have to add annotation tokens + * to maintain this information and avoid expanding + * macros hidden due to the recursive hidesets + */ addinput(IPARAM, arg, FAIL); - for (siz = 0; ; siz += yylen) { - yyspace = '\0'; + for (siz = 0; ; siz += n) { + yyspace = noex[0] = ex[0] = '\0'; if (next() == EOFTOK) break; - if (yylen > bufsiz-2) { - siz = -1; - break; + + if (yytoken == IDEN) { + Symbol *sym = yylval.cppsym; + if (disexpand || sym && sym->hide) { + noex[0] = NOEXPAND; + ex[0] = EXPAND; + } } - if (yyspace) { - *buf++ = ' '; - bufsiz--; - siz++; + n = snprintf(buf, bufsiz, "%s%s%s%s", + yyspace ? " " : "", noex, yytext, ex); + + if (n < 0 || n == bufsiz) { + siz = -1; + break; } - memcpy(buf, yytext, yylen); - bufsiz -= yylen; - buf += yylen; + buf += n; + bufsiz -= n; } delinput(); - disexpand--; } *buf = '\0'; diff --git a/src/cmd/scc-cc/cc1/lex.c b/src/cmd/scc-cc/cc1/lex.c @@ -884,10 +884,13 @@ iden(void) ; input->p = p; tok2str(); - if ((sym = lookup(NS_CPP, yytext, NOALLOC)) != NULL) { + + sym = lookup(NS_CPP, yytext, NOALLOC); + if ((yylval.cppsym = sym) != NULL) { if (expand(sym)) return next(); } + sym = lookup(namespace, yytext, ALLOC); yylval.sym = sym; if (sym->flags & SCONSTANT) @@ -1068,23 +1071,38 @@ return_byte: return c; } -int -next(void) +static int +lex(void) { int c; if ((c = skipspaces()) == EOF) - yytoken = EOFTOK; - else if (isalpha(c) || c == '_') - yytoken = iden(); + return EOFTOK; + + if (c == NOEXPAND) { + disexpand++; + input->begin = ++input->p; + } else if (c == EXPAND) { + disexpand--; + input->begin = ++input->p; + } + + if (isalpha(c) || c == '_') + return iden(); else if (isdigit(c)) - yytoken = number(); + return number(); else if (c == '"') - yytoken = string(); + return string(); else if (c == '\'') - yytoken = character(); + return character(); else - yytoken = operator(); + return operator(); +} + +int +next(void) +{ + yytoken = lex(); if (yytoken == EOFTOK) { strcpy(yytext, "<EOF>");