scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit d3b8c4d1db7ed84845f1115765d143ce7ffc4d44
parent 2342614311a01f5fdd45e2cd01d4e6c6cac210b7
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Tue,  5 Apr 2022 14:04:12 +0200

cc1: Use control characters for #, ## and macro parameters

We were using the ascii characters $,@ and # for them and it had the drawback
that the preprocessor could not work with text containing these characters
in unexpected places. Using control characters remove that problem and
it enables the use of these new tokens in the lexer itself.

Diffstat:
Msrc/cmd/cc/cc1/cc1.h | 7+++++--
Msrc/cmd/cc/cc1/cpp.c | 22++++++++++++----------
Msrc/cmd/cc/cc1/lex.c | 2+-
3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/cmd/cc/cc1/cc1.h b/src/cmd/cc/cc1/cc1.h @@ -129,8 +129,11 @@ enum tokens { RESTRICT = 1 << 1, VOLATILE = 1 << 2, INLINE = 1 << 3, - TQUALIFIER = 1 << 7, /* this value is picked outside of ASCII range */ - TYPE, + TQUALIFIER = 1 << 7, + MACROPAR = 17, + CONCAT = 18, + STRINGIZE = 19, + TYPE = 129, IDEN, SCLASS, CONSTANT, diff --git a/src/cmd/cc/cc1/cpp.c b/src/cmd/cc/cc1/cpp.c @@ -242,14 +242,14 @@ copymacro(struct macroctx *mp) bufsiz -= size; bp += size; break; - case '$': + case CONCAT: /* token concatenation operator */ while (bp[-1] == ' ') --bp, ++bufsiz; while (s[1] == ' ') ++s; break; - case '#': + case STRINGIZE: /* stringfier operator */ arg = mp->arglist[atoi(s += 2)]; s += 2; @@ -275,7 +275,7 @@ copymacro(struct macroctx *mp) *bp++ = '"'; break; - case '@': + case MACROPAR: /* parameter substitution */ arg = mp->arglist[atoi(++s)]; size = expandarg(arg, bp, bufsiz); @@ -393,10 +393,11 @@ static int getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) { Symbol **argp; + int siz; size_t len; int prevc = 0, ispar; - if (yytoken == '$') { + if (yytoken == CONCAT) { cpperror("'##' cannot appear at either ends of a macro expansion"); return 0; } @@ -409,11 +410,13 @@ getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) break; } if (argp != &args[nargs]) { - sprintf(yytext, "@%02d@", (int) (argp - args)); + siz = argp - args; + sprintf(yytext, + "%c%02d%c", MACROPAR, siz, MACROPAR); ispar = 1; } } - if (prevc == '#' && !ispar) { + if (prevc == STRINGIZE && !ispar) { cpperror("'#' is not followed by a macro parameter"); return 0; } @@ -424,16 +427,15 @@ getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) cpperror("macro too long"); return 0; } - /* $ token is generated by ## */ - if (yytoken == '$') { - *bp++ = '$'; + if (yytoken == CONCAT || yytoken == STRINGIZE) { + *bp++ = yytoken; --bufsiz; } else { memcpy(bp, yytext, len); bp += len; bufsiz -= len; } - if ((prevc = yytoken) != '#') { + if ((prevc = yytoken) != STRINGIZE) { *bp++ = ' '; --bufsiz; } diff --git a/src/cmd/cc/cc1/lex.c b/src/cmd/cc/cc1/lex.c @@ -755,7 +755,7 @@ operator(void) t = follow('=', NE, '!'); break; case '#': - t = follow('#', '$', '#'); + t = follow('#', CONCAT, STRINGIZE); break; case '-': t = minus();