commit 76a7e8501aac08ca649752263c991b03a3a73d7b
parent 59e3faab23c5f9d281394c3e88172736d5ec847c
Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
Date: Wed, 25 Mar 2026 20:09:20 +0100
cc1: Do not use the tokenizer for macro arguments
Using the tokenizer to get macro parameters has the side effect that it
destroyes the whitespaces within the parameter. To avoid this problem
we fallback to use a char by char parsing except in the case of strings
and constant characters because it can lead to more problems and we
should not consider duplicating the code from lex.c into cpp.c.
Diffstat:
5 files changed, 87 insertions(+), 67 deletions(-)
diff --git a/src/cmd/scc-cc/cc1/cc1.h b/src/cmd/scc-cc/cc1/cc1.h
@@ -473,6 +473,7 @@ int ahead(void);
int next(void);
void expect(int tok);
void discard(void);
+int moreinput(void);
int addinput(int, void *, int);
void delinput(void);
void setsafe(int type);
diff --git a/src/cmd/scc-cc/cc1/cpp.c b/src/cmd/scc-cc/cc1/cpp.c
@@ -93,53 +93,54 @@ icpp(void)
ncmdlines = 0;
}
-static void
-nextcpp(Macro *mp)
+static char *
+appendpar(Macro *mp, char *s, int len)
{
- int len, siz;
char *arg;
+ int siz = mp->argsiz;
- next();
- if (yytoken == EOFTOK) {
- error("unterminated argument list invoking macro \"%s\"",
- mp->sym->name);
- }
-
- if (yytoken == IDEN)
- yylval.sym->flags |= SUSED;
-
- len = strlen(yytext);
- siz = mp->argsiz;
if (len+1 > INT_MAX - siz) {
error("too long argument invoking macro \"%s\"",
mp->sym->name);
}
- arg = xrealloc(mp->arg, siz + len + 1);
- if (siz > 0) {
- arg[siz-1] = ' ';
- memcpy(arg + siz, yytext, len+1);
- } else {
- memcpy(arg, yytext, len+1);
- }
-
- mp->arg = arg;
- mp->argsiz = siz + len + 1;
+ mp->arg = xrealloc(mp->arg, siz + len + 1);
+ memcpy(mp->arg + siz, s, len);
+ mp->argsiz = siz += len;
+ mp->arg[siz] = '\0';
}
static void
paren(Macro *mp)
{
for (;;) {
- nextcpp(mp);
- switch (yytoken) {
+ switch (*input->p) {
case ')':
+ appendpar(mp, input->p++, 1);
return;
case '(':
+ appendpar(mp, input->p++, 1);
paren(mp);
break;
+ case '"':
+ case '\'':
+ next();
+ assert(yytoken == STRING || yytoken == CONSTANT);
+ appendpar(mp, yytext, yylen);
+ break;
+ case '\0':
+ if (!moreinput())
+ goto unterminated;
+ continue;
+ default:
+ appendpar(mp, input->p++, 1);
+ break;
}
}
+
+unterminated:
+ error("unterminated argument list invoking macro \"%s\"",
+ mp->sym->name);
}
static char *
@@ -147,73 +148,90 @@ parameter(Macro *mp)
{
int siz;
char *s, *begin, *end;
- Input *ip = input;
mp->arg = NULL;
mp->argsiz = 0;
+
for (;;) {
- nextcpp(mp);
- switch (yytoken) {
+ switch (*input->p) {
+ case '"':
+ case '\'':
+ next();
+ assert(yytoken == STRING || yytoken == CONSTANT);
+ appendpar(mp, yytext, yylen);
+ break;
+ case '(':
+ appendpar(mp, input->p++, 1);
+ paren(mp);
+ break;
case ')':
case ',':
- /* remove "," or ")"*/
begin = mp->arg;
- end = mp->arg + mp->argsiz - 2;
+ end = begin + mp->argsiz;
- while (end > begin && isspace(end[-1]))
- --end;
- while (begin < end && isspace(begin[0]))
- ++begin;
+ while (begin < end && isspace(*begin))
+ begin++;
+ while (end > begin && isspace(*end))
+ end--;
siz = end - begin;
- s = memcpy(xmalloc(siz+1), begin, siz);
+ s = memcpy(xmalloc(siz + 1), begin, siz);
s[siz] = '\0';
- free(mp->arg);
return s;
- case '(':
- paren(mp);
+ case '\0':
+ if (!moreinput())
+ goto unterminated;
+ continue;
+ default:
+ appendpar(mp, input->p++, 1);
break;
}
}
+
+unterminated:
+ error("unterminated argument list invoking macro \"%s\"",
+ mp->sym->name);
}
static int
parsepars(Macro *mp)
{
- int n;
+ int c, n;
+ char *name;
if (mp->npars == -1)
return 1;
if (ahead() != '(')
return 0;
- disexpand = 1;
- next();
- n = 0;
+ input->p++;
+ name = mp->sym->name;
if (mp->npars == 0 && ahead() == ')') {
- next();
- } else {
- do {
- mp->arglist = xrealloc(mp->arglist, (n+1)*sizeof(char *));
- mp->arglist[n] = parameter(mp);
- DBG("MACRO fetched arg '%s'", mp->arglist[n]);
- } while (++n < NR_MACROARG && yytoken == ',');
+ input->p++;
+ return 1;
}
- if (yytoken != ')')
- error("incorrect macro function-alike invocation");
- disexpand = 0;
+ disexpand = 1;
+ for (n = 0; n < NR_MACROARG; ++n) {
+ mp->arglist = xrealloc(mp->arglist, (n+1)*sizeof(char *));
+ mp->arglist[n] = parameter(mp);
+ DBG("MACRO fetched arg '%s'", mp->arglist[n]);
- if (n == NR_MACROARG)
- error("too many parameters in macro \"%s\"", mp->sym->name);
- if (n != mp->npars) {
- error("macro \"%s\" received %d arguments, but it takes %d",
- mp->sym->name, n, mp->npars);
+ c = *input->p++;
+ if (c == ')')
+ break;
+ if (c != ',')
+ error("incorrect macro function-alike invocation of \"%s\"", name);
}
+ disexpand = 0;
- return 1;
+checknpars:
+ if (n+1 == mp->npars)
+ return 1;
+ error("macro \"%s\" received %d arguments, but it takes %d",
+ mp->sym->name, n, mp->npars);
}
static int
@@ -567,10 +585,10 @@ getdefs(Symbol *args[NR_MACROARG], int nargs, char *buffer, size_t bufsiz)
yylen = len;
input->p = p - 1;
sym = lookup(NS_IDEN, yytext, NOALLOC);
- } else if (c == '"') {
+ } else if (c == '"' || c == '\'') {
next();
- assert(yytoken == STRING);
- token = STRING;
+ assert(yytoken == STRING || yytoken == CONSTANT);
+ token = yytoken;
len = yylen;
}
@@ -601,7 +619,7 @@ getdefs(Symbol *args[NR_MACROARG], int nargs, char *buffer, size_t bufsiz)
return 0;
}
- if (token == IDEN || token == STRING)
+ if (token == IDEN || token == STRING || token == CONSTANT)
memcpy(bp, yytext, yylen);
else
*bp = token;
diff --git a/src/cmd/scc-cc/cc1/lex.c b/src/cmd/scc-cc/cc1/lex.c
@@ -301,7 +301,7 @@ readline(void)
* is processed the line is discarded because it must not
* be passed to the lexer
*/
-static int
+int
moreinput(void)
{
int wasexpand = 0;
diff --git a/tests/cc/execute/0224-macro.c b/tests/cc/execute/0224-macro.c
@@ -1,9 +1,10 @@
#define _PROTOTYPE(x, y) x y
-_PROTOTYPE(int fun, (char *s, int n, const char *format, char *arg) );
-
_PROTOTYPE(int fun, (char *s, int n, const char *format,
char *arg) );
+_PROTOTYPE(int fun, (char *s, int n, const char *format, char *arg) );
+_PROTOTYPE(int fun, (char *s, int n, const char *format, char
+ *arg) );
int
fun(char *s, int n, const char *format, char *arg)
diff --git a/tests/cc/execute/scc-tests.lst b/tests/cc/execute/scc-tests.lst
@@ -178,7 +178,7 @@
0185-esc_macro2.c
0186-dec_ary.c
0187-zero_struct.c
-0188-multi_string.c [TODO]
+0188-multi_string.c
0189-cpp.c [TODO]
0190-enum_ary.c
0191-ary_addr.c