scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

parser.c (6917B)


      1 #include <assert.h>
      2 #include <ctype.h>
      3 #include <errno.h>
      4 #include <limits.h>
      5 #include <setjmp.h>
      6 #include <stdarg.h>
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 #include <string.h>
     10 
     11 #include <scc/cstd.h>
     12 #include <scc/mach.h>
     13 #include <scc/scc.h>
     14 
     15 #include "as.h"
     16 
     17 #define NARGS 20
     18 #define NR_INPUTS 10
     19 #define MAXLINE 100
     20 
     21 struct input {
     22 	char *fname;
     23 	unsigned lineno;
     24 	FILE *fp;
     25 };
     26 
     27 int nerrors;
     28 jmp_buf recover;
     29 char yytext[INTIDENTSIZ+1];
     30 int yytoken;
     31 size_t yylen;
     32 union yylval yylval;
     33 
     34 static char *textp, *endp;
     35 static int regmode;
     36 static struct input inputs[NR_INPUTS], *isp = inputs;
     37 
     38 static int
     39 follow(int expect1, int expect2, int ifyes1, int ifyes2, int ifno)
     40 {
     41 	int c;
     42 
     43 	if ((c = *++textp) == expect1)
     44 		return ifyes1;
     45 	if (c == expect2)
     46 		return ifyes2;
     47 	--textp;
     48 	return ifno;
     49 }
     50 
     51 static void
     52 tok2str(void)
     53 {
     54 	if ((yylen = endp - textp) > INTIDENTSIZ) {
     55 		error("token too big");
     56 		yylen = INTIDENTSIZ;
     57 	}
     58 	memcpy(yytext, textp, yylen);
     59 	yytext[yylen] = '\0';
     60 	textp = endp;
     61 }
     62 
     63 static int
     64 iden(void)
     65 {
     66 	int c;
     67 	char *p;
     68 
     69 	for ( ; c = *endp; ++endp) {
     70 		if (isalnum(c))
     71 			continue;
     72 		switch (c) {
     73 		case '\'':
     74 		case '_':
     75 		case '.':
     76 		case '$':
     77 			continue;
     78 		default:
     79 			goto out_loop;
     80 		}
     81 	}
     82 
     83 out_loop:
     84 	tok2str();
     85 	yylval.sym = lookup(yytext);
     86 
     87 	return (yylval.sym->flags & FREG) ? REG : IDEN;
     88 }
     89 
     90 static int
     91 number(void)
     92 {
     93 	int c, base = 10;
     94 	char *p;
     95 	TUINT n;
     96 	static char digits[] = "0123456789ABCDEF";
     97 
     98 	if (*endp == '0') {
     99 		base = 8;
    100 		++endp;
    101 		if (*endp == 'x') {
    102 			base = 16;
    103 			++endp;
    104 		}
    105 	}
    106 	for (n = 0; (c = *endp) && isxdigit(c); n += c) {
    107 		p = strchr(digits, toupper(c));
    108 		c = p - digits;
    109 		if (c > base)
    110 			error("invalid digit in number");
    111 		if (n >= TUINT_MAX/base - c)
    112 			error("overflow in number");
    113 		n *= base;
    114 		endp++;
    115 	}
    116 	tok2str();
    117 	yylval.sym = tmpsym(n);
    118 
    119 	return NUMBER;
    120 }
    121 
    122 static int
    123 character(void)
    124 {
    125 	int c;
    126 	char *p;
    127 
    128 	while (*endp != '\'')
    129 		++endp;
    130 	return NUMBER;
    131 }
    132 
    133 static int
    134 string(void)
    135 {
    136 	int c;
    137 	size_t l;
    138 	char *s;
    139 	Symbol *sym = tmpsym(0);
    140 
    141 	for (++endp; *endp != '"'; ++endp)
    142 		;
    143 	++endp;
    144 	tok2str();
    145 	yylval.sym = sym;
    146 	/* FIXME: this memory is not freed ever */
    147 	l = yylen-2;
    148 	s = memcpy(xmalloc(l+1), yytext+1, l);
    149 	s[l] = '\0';
    150 	sym->name = s;
    151 
    152 	return STRING;
    153 }
    154 
    155 static int
    156 operator(void)
    157 {
    158 	int c;
    159 
    160 	++endp;
    161 	if ((c = *textp) == '>')
    162 		c = follow('=', '>', LE, SHL, '>');
    163 	else if (c == '<')
    164 		c = follow('=', '<', GE, SHR, '>');
    165 	tok2str();
    166 
    167 	return c;
    168 }
    169 
    170 int
    171 ahead(void)
    172 {
    173 	while (isspace(*textp))
    174 		++textp;
    175 
    176 	if (*textp != '\0')
    177 		return *textp;
    178 	return EOS;
    179 }
    180 
    181 int
    182 next(void)
    183 {
    184 	int c;
    185 
    186 	while (isspace(*textp))
    187 		++textp;
    188 
    189 	endp = textp;
    190 
    191 	switch (c = *textp) {
    192 	case '\0':
    193 		strcpy(yytext, "EOS");
    194 		yylen = 3;
    195 		c = EOS;
    196 		break;
    197 	case '"':
    198 		c = string();
    199 		break;
    200 	case '\'':
    201 		c = character();
    202 		break;
    203 	case '%':
    204 		c = (regmode ? iden : operator)();
    205 		break;
    206 	case '.':
    207 	case '_':
    208 		c = iden();
    209 		break;
    210 	default:
    211 		if (isdigit(c))
    212 			c = number();
    213 		else if (isalpha(c))
    214 			c = iden();
    215 		else
    216 			c = operator();
    217 		break;
    218 	}
    219 	return yytoken = c;
    220 }
    221 
    222 void
    223 expect(int token)
    224 {
    225 	if (yytoken != token)
    226 		unexpected();
    227 	next();
    228 }
    229 
    230 void
    231 unexpected(void)
    232 {
    233 	error("unexpected '%s'", yytext);
    234 }
    235 
    236 void
    237 error(char *msg, ...)
    238 {
    239 	va_list va;
    240 	struct input *ip;
    241 
    242 	assert(isp > inputs);
    243 	ip = &isp[-1];
    244 
    245 	va_start(va, msg);
    246 	fprintf(stderr, "as:%s:%u: ", ip->fname, ip->lineno);
    247 	vfprintf(stderr, msg, va);
    248 	putc('\n', stderr);
    249 	nerrors++;
    250 	va_end(va);
    251 
    252 	if (nerrors == 10)
    253 		die("as: too many errors");
    254 	longjmp(recover, 1);
    255 }
    256 
    257 Node *
    258 getreg(void)
    259 {
    260 	Node *np;
    261 
    262 	np = node(REG, NULL, NULL);
    263 	np->sym = yylval.sym;
    264 	np->addr = AREG;
    265 	expect(REG);
    266 	return np;
    267 }
    268 
    269 void
    270 regctx(int mode)
    271 {
    272 	regmode = mode;
    273 }
    274 
    275 Node *
    276 operand(char **strp)
    277 {
    278 	int c, imm = 0;
    279 	Node *np;
    280 
    281 	textp = *strp;
    282 	regctx(1);
    283 	switch (next()) {
    284 	case EOS:
    285 		np = NULL;
    286 		break;
    287 	case REG:
    288 		np = getreg();
    289 		break;
    290 	case STRING:
    291 		np = node(yytoken, NULL, NULL);
    292 		np->sym = yylval.sym;
    293 		np->addr = ASTR;
    294 		next();
    295 		break;
    296 	case IDEN:
    297 		c = ahead();
    298 		if (c != EOS && c != ',')
    299 			goto expression;
    300 		np = node(IDEN, NULL, NULL);
    301 		np->sym = yylval.sym;
    302 		np->addr = ANUMBER;
    303 		next();
    304 		break;
    305 	case '$':
    306 		next();
    307 		imm = 1;
    308 	default:
    309 	expression:
    310 		if (!imm) {
    311 			np = moperand();
    312 		} else {
    313 			np = expr();
    314 			np->addr = AIMM;
    315 		}
    316 	}
    317 	if (yytoken != ',' && yytoken != EOS)
    318 		error("trailing characters in expression '%s'", textp);
    319 	*strp = endp;
    320 
    321 	return np;
    322 }
    323 
    324 Node **
    325 getargs(char *s)
    326 {
    327 	Node **ap;
    328 	static Node *args[NARGS];
    329 
    330 	if (!s)
    331 		return NULL;
    332 
    333 	for (ap = args; ap < &args[NARGS-1]; ++ap) {
    334 		if ((*ap = operand(&s)) == NULL)
    335 			return args;
    336 	}
    337 	error("too many arguments in one instruction");
    338 }
    339 
    340 static char *
    341 field(char **oldp, size_t *siz)
    342 {
    343 	char *s, *t, *begin;
    344 	size_t n;
    345 
    346 	if ((begin = *oldp) == NULL)
    347 		return NULL;
    348 
    349 	for (s = begin; isspace(*s) && *s != '\t'; ++s)
    350 		;
    351 	if (*s == '\0' || *s == '#') {
    352 		*s = '\0';
    353 		return *oldp = NULL;
    354 	}
    355 
    356 	for (t = s; *t && *t != '\t'; ++t)
    357 		;
    358 	if (*t == '\t')
    359 		*t++ = '\0';
    360 	*siz -= begin - t;
    361 	*oldp = t;
    362 
    363 	while (t >= s && isspace(*t))
    364 		*t-- = '\0';
    365 	return (*s != '\0') ? s : NULL;
    366 }
    367 
    368 static int
    369 validlabel(char *name)
    370 {
    371 	int c;
    372 
    373 	while ((c = *name++) != '\0') {
    374 		if (isalnum(c))
    375 			continue;
    376 		switch (c) {
    377 		case '_':
    378 		case '.':
    379 		case '$':
    380 			continue;
    381 		case ':':
    382 			if (*name != '\0')
    383 				return 0;
    384 			*--name = '\0';
    385 			continue;
    386 		default:
    387 			return 0;
    388 		}
    389 	}
    390 	return 1;
    391 }
    392 
    393 static int
    394 extract(char *s, size_t len, struct line *lp)
    395 {
    396 	int r = 0;
    397 
    398 	if (lp->label = field(&s, &len))
    399 		r++;
    400 	if (lp->op = field(&s, &len))
    401 		r++;
    402 	if (lp->args = field(&s, &len))
    403 		r++;
    404 
    405 	if (s && *s && *s != '#')
    406 		error("trailing characters at the end of the line");
    407 	if (lp->label && !validlabel(lp->label))
    408 		error("incorrect label name '%s'", lp->label);
    409 
    410 	return r;
    411 }
    412 
    413 static size_t
    414 getln(FILE *fp, char buff[MAXLINE])
    415 {
    416 	int c;
    417 	char *bp;
    418 
    419 	for (bp = buff; (c = getc(fp)) != EOF; *bp++ = c) {
    420 		if (c == '\n')
    421 			break;
    422 
    423 		if (c > UCHAR_MAX)
    424 			error("invalid character '%x'", c);
    425 
    426 		if (bp == &buff[MAXLINE-1])
    427 			error("line too long");
    428 	}
    429 	*bp = '\0';
    430 
    431 	return bp - buff;
    432 }
    433 
    434 int
    435 nextline(struct line *lp)
    436 {
    437 	struct input *ip;
    438 	size_t n;
    439 	static char buff[MAXLINE];
    440 
    441 	assert(isp > inputs);
    442 repeat:
    443 	if (isp == inputs)
    444 		return 0;
    445 	ip = &isp[-1];
    446 	if (feof(ip->fp)) {
    447 		delinput();
    448 		goto repeat;
    449 	}
    450 	n = getln(ip->fp, buff);
    451 	if (++ip->lineno == 0)
    452 		die("as: %s: file too long", infile);
    453 	if (n == 0)
    454 		goto repeat;
    455 	if (extract(buff, n, lp) == 0)
    456 		goto repeat;
    457 	return 1;
    458 }
    459 
    460 void
    461 addinput(char *fname)
    462 {
    463 	FILE *fp;
    464 
    465 	if (isp == &inputs[NR_INPUTS])
    466 		die("as: too many included files");
    467 	if ((fp = fopen(fname, "r")) == NULL)
    468 		die("as: %s: %s", fname, strerror(errno));
    469 	isp->fname = xstrdup(fname);
    470 	isp->fp = fp;
    471 	isp->lineno = 0;
    472 	++isp;
    473 }
    474 
    475 int
    476 delinput(void)
    477 {
    478 	if (isp == inputs)
    479 		return EOF;
    480 	--isp;
    481 	if (fclose(isp->fp) == EOF)
    482 		die("as: %s: %s", isp->fname, strerror(errno));
    483 	free(isp->fname);
    484 	return 0;
    485 }