scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

commit dcde10bc7be69fa1c7e497f67754f5323649f5cf
parent fad9e0fb2e77d701992ea2e740f8c3bb67e9a049
Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
Date:   Wed, 28 Jan 2026 14:58:35 +0100

cc1: Rewrite macro defintion parser

The macro definition parser was based in the tokenizer used by
cc1 but it means that the input string was modified adding
spaces around the tokens, which in normal use cases does not
generate any problems. Some people does things like:

	#define STDIO <stdio.h>
	#include STDIO

that is not conformant becuase the preprocessor is allowed to
work based in C tokens that would split the previous macro in:

	< stdio . h >

This commits modifies the parser of macro definitions to not
using the tokenizer (except in the case of strings) and preserve
the input string as much as possible.

Diffstat:
Msrc/cmd/scc-cc/cc1/cpp.c | 112++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Atests/cc/execute/0232-cppmacro.c | 9+++++++++
Mtests/cc/execute/scc-tests.lst | 1+
3 files changed, 89 insertions(+), 33 deletions(-)

diff --git a/src/cmd/scc-cc/cc1/cpp.c b/src/cmd/scc-cc/cc1/cpp.c @@ -1,3 +1,4 @@ +#include <assert.h> #include <ctype.h> #include <limits.h> #include <stdio.h> @@ -485,14 +486,13 @@ getpars(Symbol *args[NR_MACROARG]) int n, c; Symbol *sym; - c = *input->p; - next(); - if (c != '(') + if (*input->p != '(') return -1; /* skip the '(' */ next(); - if (accept(')')) + next(); + if (yytoken == ')') return 0; n = 0; @@ -517,65 +517,111 @@ getpars(Symbol *args[NR_MACROARG]) } next(); } while (accept(',')); - expect(')'); + + if (yytoken != ')') { + cpperror("expected ')' at the end of macro argument list"); + return NR_MACROARG; + } return n; } static int -getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) +getdefs(Symbol *args[NR_MACROARG], int nargs, char *buffer, size_t bufsiz) { - Symbol **argp; - int siz; size_t len; - int prevc = 0, ispar; + Symbol **argp, *sym; + int c, id, token, prevc, ispar; + char *bp, *p, iden[INTIDENTSIZ + 1]; - if (yytoken == CONCAT) - goto wrong_concat; + while (isspace(*input->p)) + ++input->p; - for (;;) { + bp = buffer; + for (prevc = 0; (c = *input->p) != '\n' && c != '\0'; ++input->p) { + len = 1; ispar = 0; - if (yytoken == IDEN && nargs >= 0) { + token = c; + sym = NULL; + + if (c == '#') { + if (input->p[1] == '#') { + token = CONCAT; + ++input->p; + } else { + token = STRINGIZE; + } + } else if (c == '_' || isalpha(c)) { + token = IDEN; + for (p = input->p; isalpha(*p) || *p == '_'; ++p) + ; + len = p - input->p; + if (len > INTIDENTSIZ) { + cpperror("identifier too long in macro definition"); + return 0; + } + memcpy(iden, input->p, len); + iden[len] = '\0'; + input->p = p - 1; + sym = lookup(NS_IDEN, iden, NOALLOC); + } else if (c == '"') { + next(); + assert(yytoken == STRING); + token = STRING; + len = yylen; + } + + if (sym && nargs > 0) { for (argp = args; argp < &args[nargs]; ++argp) { - if (*argp == yylval.sym) + if (*argp == sym) break; } if (argp != &args[nargs]) { - siz = argp - args; - sprintf(yytext, - "%c%02d%c", MACROPAR, siz, MACROPAR); + id = argp - args; + sprintf(iden, + "%c%02d%c", MACROPAR, id, MACROPAR); ispar = 1; + len = 4; } } + + if (prevc == 0 && token == CONCAT) + goto wrong_concat; + if (prevc == STRINGIZE && !ispar) { cpperror("'#' is not followed by a macro parameter"); return 0; } - if (yytoken == '\n') - break; - if ((len = strlen(yytext)) >= bufsiz) { + if (len >= bufsiz) { cpperror("macro too long"); return 0; } - if (yytoken == CONCAT || yytoken == STRINGIZE) { - *bp++ = yytoken; - --bufsiz; - } else { - memcpy(bp, yytext, len); - bp += len; - bufsiz -= len; - } - if ((prevc = yytoken) != STRINGIZE) { - *bp++ = ' '; - --bufsiz; + + switch (token) { + case IDEN: + memcpy(bp, iden, len); + break; + case STRING: + memcpy(bp, yytext, yylen); + break; + default: + *bp = token; + break; } - next(); + + bp += len; + bufsiz -= len; + prevc = token; } +end_loop: + if ((yytoken = c) == '\0') + yytoken = EOFTOK; if (prevc == CONCAT) goto wrong_concat; - + for ( ; bp > buffer && isspace(bp[-1]); --bp); + ; *bp = '\0'; return 1; diff --git a/tests/cc/execute/0232-cppmacro.c b/tests/cc/execute/0232-cppmacro.c @@ -0,0 +1,9 @@ +#define HEADER <stdio.h> + +#include HEADER + +int +main(void) +{ + return 0; +} diff --git a/tests/cc/execute/scc-tests.lst b/tests/cc/execute/scc-tests.lst @@ -222,3 +222,4 @@ 0229-commalog.c 0230-init.c 0231-init.c +0232-cppmacro.c