scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit afec1bc0e0ba8999c706adf6438cd74d18715a88
parent f826b55f340d6d1806060e40c898ec746a96da02
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Tue,  5 Apr 2022 14:04:12 +0200

cc1: Use control characters for #, ## and macro parameters

We were using the ascii characters $,@ and # for them and it had the drawback
that the preprocessor could not work with text containing these characters
in unexpected places. Using control characters remove that problem and
it enables the use of these new tokens in the lexer itself.

Diffstat:
Msrc/cmd/cc/cc1/cc1.h | 7+++++--
Msrc/cmd/cc/cc1/cpp.c | 22++++++++++++----------
Msrc/cmd/cc/cc1/lex.c | 2+-
3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/cmd/cc/cc1/cc1.h b/src/cmd/cc/cc1/cc1.h @@ -129,8 +129,11 @@ enum tokens { RESTRICT = 1 << 1, VOLATILE = 1 << 2, INLINE = 1 << 3, - TQUALIFIER = 1 << 7, /* this value is picked outside of ASCII range */ - TYPE, + TQUALIFIER = 1 << 7, + MACROPAR = 17, + CONCAT = 18, + STRINGIZE = 19, + TYPE = 129, IDEN, SCLASS, CONSTANT, diff --git a/src/cmd/cc/cc1/cpp.c b/src/cmd/cc/cc1/cpp.c @@ -242,14 +242,14 @@ copymacro(struct macroctx *mp) bufsiz -= size; bp += size; break; - case '$': + case CONCAT: /* token concatenation operator */ while (bp[-1] == ' ') --bp, ++bufsiz; while (s[1] == ' ') ++s; break; - case '#': + case STRINGIZE: /* stringfier operator */ arg = mp->arglist[atoi(s += 2)]; s += 2; @@ -275,7 +275,7 @@ copymacro(struct macroctx *mp) *bp++ = '"'; break; - case '@': + case MACROPAR: /* parameter substitution */ arg = mp->arglist[atoi(++s)]; size = expandarg(arg, bp, bufsiz); @@ -393,10 +393,11 @@ static int getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) { Symbol **argp; + int siz; size_t len; int prevc = 0, ispar; - if (yytoken == '$') { + if (yytoken == CONCAT) { cpperror("'##' cannot appear at either ends of a macro expansion"); return 0; } @@ -409,11 +410,13 @@ getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) break; } if (argp != &args[nargs]) { - sprintf(yytext, "@%02d@", (int) (argp - args)); + siz = argp - args; + sprintf(yytext, + "%c%02d%c", MACROPAR, siz, MACROPAR); ispar = 1; } } - if (prevc == '#' && !ispar) { + if (prevc == STRINGIZE && !ispar) { cpperror("'#' is not followed by a macro parameter"); return 0; } @@ -424,16 +427,15 @@ getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) cpperror("macro too long"); return 0; } - /* $ token is generated by ## */ - if (yytoken == '$') { - *bp++ = '$'; + if (yytoken == CONCAT || yytoken == STRINGIZE) { + *bp++ = yytoken; --bufsiz; } else { memcpy(bp, yytext, len); bp += len; bufsiz -= len; } - if ((prevc = yytoken) != '#') { + if ((prevc = yytoken) != STRINGIZE) { *bp++ = ' '; --bufsiz; } diff --git a/src/cmd/cc/cc1/lex.c b/src/cmd/cc/cc1/lex.c @@ -755,7 +755,7 @@ operator(void) t = follow('=', NE, '!'); break; case '#': - t = follow('#', '$', '#'); + t = follow('#', CONCAT, STRINGIZE); break; case '-': t = minus();