scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | README | LICENSE

parser.c (6880B)


      1 static char sccsid[] = "@(#) ./as/parser.c";
      2 #include <assert.h>
      3 #include <ctype.h>
      4 #include <errno.h>
      5 #include <limits.h>
      6 #include <setjmp.h>
      7 #include <stdarg.h>
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 #include <string.h>
     11 
     12 #include <scc/cstd.h>
     13 #include <scc/scc.h>
     14 #include "as.h"
     15 
     16 #define NARGS 20
     17 #define NR_INPUTS 10
     18 #define MAXLINE 100
     19 
     20 struct input {
     21 	char *fname;
     22 	unsigned lineno;
     23 	FILE *fp;
     24 };
     25 
     26 int nerrors;
     27 jmp_buf recover;
     28 char yytext[INTIDENTSIZ+1];
     29 int yytoken;
     30 size_t yylen;
     31 union yylval yylval;
     32 
     33 static char *textp, *endp;
     34 static int regmode;
     35 static unsigned lineno;
     36 static struct input inputs[NR_INPUTS], *isp = inputs;
     37 
     38 static int
     39 follow(int expect1, int expect2, int ifyes1, int ifyes2, int ifno)
     40 {
     41 	int c;
     42 
     43 	if ((c = *++textp) == expect1)
     44 		return ifyes1;
     45 	if (c == expect2)
     46 		return ifyes2;
     47 	--textp;
     48 	return ifno;
     49 }
     50 
     51 static void
     52 tok2str(void)
     53 {
     54 	if ((yylen = endp - textp) > INTIDENTSIZ) {
     55 		error("token too big");
     56 		yylen = INTIDENTSIZ;
     57 	}
     58 	memcpy(yytext, textp, yylen);
     59 	yytext[yylen] = '\0';
     60 	textp = endp;
     61 }
     62 
     63 static int
     64 iden(void)
     65 {
     66 	int c;
     67 	char *p;
     68 
     69 	for ( ; c = *endp; ++endp) {
     70 		if (isalnum(c))
     71 			continue;
     72 		switch (c) {
     73 		case '\'':
     74 		case '_':
     75 		case '-':
     76 		case '.':
     77 		case '$':
     78 			continue;
     79 		default:
     80 			goto out_loop;
     81 		}
     82 	}
     83 
     84 out_loop:
     85 	tok2str();
     86 	yylval.sym = lookup(yytext);
     87 
     88 	return (yylval.sym->flags & FREG) ? REG : IDEN;
     89 }
     90 
     91 static int
     92 number(void)
     93 {
     94 	int c, base = 10;
     95 	char *p;
     96 	TUINT n;
     97 
     98 	if (*endp == '0') {
     99 		base = 8;
    100 		++endp;
    101 		if (*endp == 'x') {
    102 			base = 16;
    103 			++endp;
    104 		}
    105 	}
    106 	for (n = 0; (c = *endp) && isxdigit(c); n += c) {
    107 		n *= base;
    108 		c -= '0';
    109 		if (n >= TUINT_MAX - c*base)
    110 			error("overflow in number");
    111 		endp++;
    112 	}
    113 	tok2str();
    114 	yylval.sym = tmpsym(n);
    115 
    116 	return NUMBER;
    117 }
    118 
    119 static int
    120 character(void)
    121 {
    122 	int c;
    123 	char *p;
    124 
    125 	while (*endp != '\'')
    126 		++endp;
    127 	return NUMBER;
    128 }
    129 
    130 static int
    131 string(void)
    132 {
    133 	int c;
    134 	size_t l;
    135 	char *s;
    136 	Symbol *sym = tmpsym(0);
    137 
    138 	for (++endp; *endp != '"'; ++endp)
    139 		;
    140 	++endp;
    141 	tok2str();
    142 	yylval.sym = sym;
    143 	/* FIXME: this memory is not freed ever */
    144 	l = yylen-2;
    145 	s = memcpy(xmalloc(l+1), yytext+1, l);
    146 	s[l] = '\0';
    147 	sym->name.buf = s;
    148 
    149 	return STRING;
    150 }
    151 
    152 static int
    153 operator(void)
    154 {
    155 	int c;
    156 
    157 	++endp;
    158 	if ((c = *textp) == '>')
    159 		c = follow('=', '>', LE, SHL, '>');
    160 	else if (c == '<')
    161 		c = follow('=', '<', GE, SHR, '>');
    162 	tok2str();
    163 
    164 	return c;
    165 }
    166 
    167 int
    168 next(void)
    169 {
    170 	int c;
    171 
    172 	while (isspace(*textp))
    173 		++textp;
    174 
    175 	endp = textp;
    176 
    177 	switch (c = *textp) {
    178 	case '\0':
    179 		strcpy(yytext, "EOS");
    180 		yylen = 3;
    181 		c = EOS;
    182 		break;
    183 	case '"':
    184 		c = string();
    185 		break;
    186 	case '\'':
    187 		c = character();
    188 		break;
    189 	case '%':
    190 		c = (regmode ? iden : operator)();
    191 		break;
    192 	case '_':
    193 		c = iden();
    194 		break;
    195 	default:
    196 		if (isdigit(c))
    197 			c = number();
    198 		else if (isalpha(c))
    199 			c = iden();
    200 		else
    201 			c = operator();
    202 		break;
    203 	}
    204 	return yytoken = c;
    205 }
    206 
    207 void
    208 expect(int token)
    209 {
    210 	if (yytoken != token)
    211 		unexpected();
    212 	next();
    213 }
    214 
    215 void
    216 unexpected(void)
    217 {
    218 	error("unexpected '%s'", yytext);
    219 }
    220 
    221 void
    222 error(char *msg, ...)
    223 {
    224 	va_list va;
    225 	struct input *ip;
    226 
    227 	assert(isp > inputs);
    228 	ip = &isp[-1];
    229 
    230 	va_start(va, msg);
    231 	fprintf(stderr, "as:%s:%u: ", ip->fname, ip->lineno);
    232 	vfprintf(stderr, msg, va);
    233 	putc('\n', stderr);
    234 	nerrors++;
    235 	va_end(va);
    236 
    237 	if (nerrors == 10)
    238 		die("as: too many errors");
    239 	longjmp(recover, 1);
    240 }
    241 
    242 Node *
    243 getreg(void)
    244 {
    245 	Node *np;
    246 
    247 	np = node(REG, NULL, NULL);
    248 	np->sym = yylval.sym;
    249 	np->addr = AREG;
    250 	expect(REG);
    251 	return np;
    252 }
    253 
    254 void
    255 regctx(int mode)
    256 {
    257 	regmode = mode;
    258 }
    259 
    260 Node *
    261 operand(char **strp)
    262 {
    263 	int imm = 0;
    264 	Node *np;
    265 
    266 	textp = *strp;
    267 	regctx(1);
    268 	switch (next()) {
    269 	case EOS:
    270 		np = NULL;
    271 		break;
    272 	case REG:
    273 		np = getreg();
    274 		break;
    275 	case STRING:
    276 		np = node(yytoken, NULL, NULL);
    277 		np->sym = yylval.sym;
    278 		np->addr = ASTR;
    279 		next();
    280 		break;
    281 	case '$':
    282 		next();
    283 		imm = 1;
    284 	default:
    285 		if (!imm) {
    286 			np = moperand();
    287 		} else {
    288 			np = expr();
    289 			np->addr = AIMM;
    290 		}
    291 	}
    292 	if (yytoken != ',' && yytoken != EOS)
    293 		error("trailing characters in expression '%s'", textp);
    294 	*strp = endp;
    295 
    296 	return np;
    297 }
    298 
    299 Node **
    300 getargs(char *s)
    301 {
    302 	Node **ap;
    303 	static Node *args[NARGS];
    304 
    305 	if (!s)
    306 		return NULL;
    307 
    308 	for (ap = args; ap < &args[NARGS-1]; ++ap) {
    309 		if ((*ap = operand(&s)) == NULL)
    310 			return args;
    311 	}
    312 	error("too many arguments in one instruction");
    313 }
    314 
    315 static char *
    316 field(char **oldp, size_t *siz)
    317 {
    318 	char *s, *t, *begin;
    319 	size_t n;
    320 
    321 	if ((begin = *oldp) == NULL)
    322 		return NULL;
    323 
    324 	for (s = begin; isspace(*s) && *s != '\t'; ++s)
    325 		;
    326 	if (*s == '\0' || *s == '/' || *s == ';') {
    327 		*s = '\0';
    328 		return *oldp = NULL;
    329 	}
    330 
    331 	for (t = s; *t && *t != '\t'; ++t)
    332 		;
    333 	if (*t == '\t')
    334 		*t++ = '\0';
    335 	*siz -= begin - t;
    336 	*oldp = t;
    337 
    338 	while (t >= s && isspace(*t))
    339 		*t-- = '\0';
    340 	return (*s != '\0') ? s : NULL;
    341 }
    342 
    343 static int
    344 validlabel(char *name)
    345 {
    346 	int c;
    347 
    348 	while ((c = *name++) != '\0') {
    349 		if (isalnum(c))
    350 			continue;
    351 		switch (c) {
    352 		case '_':
    353 		case '-':
    354 		case '.':
    355 		case '$':
    356 			continue;
    357 		case ':':
    358 			if (*name != '\0')
    359 				return 0;
    360 			*--name = '\0';
    361 			continue;
    362 		default:
    363 			return 0;
    364 		}
    365 	}
    366 	return 1;
    367 }
    368 
    369 static int
    370 extract(char *s, size_t len, struct line *lp)
    371 {
    372 	int r = 0;
    373 
    374 	if (lp->label = field(&s, &len))
    375 		r++;
    376 	if (lp->op = field(&s, &len))
    377 		r++;
    378 	if (lp->args = field(&s, &len))
    379 		r++;
    380 
    381 	if (s && *s && *s != '/')
    382 		error("trailing characters at the end of the line");
    383 	if (lp->label && !validlabel(lp->label))
    384 		error("incorrect label name '%s'", lp->label);
    385 
    386 	return r;
    387 }
    388 
    389 static void
    390 comment(FILE *fp)
    391 {
    392 	int c;
    393 
    394 	while ((c = getc(fp)) != EOF) {
    395 		if (c != '*')
    396 			continue;
    397 		if ((c = getc(fp)) == '/')
    398 			return;
    399 		ungetc(c, fp);
    400 	}
    401 }
    402 
    403 static size_t
    404 getline(FILE *fp, char buff[MAXLINE])
    405 {
    406 	int c;
    407 	char *bp;
    408 
    409 	for (bp = buff; (c = getc(fp)) != EOF; *bp++ = c) {
    410 		if (c == '\n')
    411 			break;
    412 		if (c == '/') {
    413 			if ((c = getc(fp)) != '*') {
    414 				ungetc(c, fp);
    415 				c = '/';
    416 			} else {
    417 				comment(fp);
    418 				c = ' ';
    419 			}
    420 		} else if (c > UCHAR_MAX) {
    421 			error("invalid character '%x'", c);
    422 		}
    423 		if (bp == &buff[MAXLINE-1])
    424 			error("line too long");
    425 	}
    426 	*bp = '\0';
    427 
    428 	return bp - buff;
    429 }
    430 
    431 int
    432 nextline(FILE *fp, struct line *lp)
    433 {
    434 	struct input *ip;
    435 	size_t n;
    436 	static char buff[MAXLINE];
    437 
    438 	assert(isp > inputs);
    439 repeat:
    440 	if (isp == inputs)
    441 		return 0;
    442 	ip = &isp[-1];
    443 	if (feof(ip->fp)) {
    444 		delinput();
    445 		goto repeat;
    446 	}
    447 	n = getline(ip->fp, buff);
    448 	if (++ip->lineno == 0)
    449 		die("as: %s: file too long", infile);
    450 	if (n == 0)
    451 		goto repeat;
    452 	if (extract(buff, n, lp) == 0)
    453 		goto repeat;
    454 	return 1;
    455 }
    456 
    457 void
    458 addinput(char *fname)
    459 {
    460 	FILE *fp;
    461 
    462 	if (isp == &inputs[NR_INPUTS])
    463 		die("as: too many included files");
    464 	if ((fp = fopen(fname, "r")) == NULL)
    465 		die("as: %s: %s", fname, strerror(errno));
    466 	isp->fname = xstrdup(fname);
    467 	isp->fp = fp;
    468 	isp->lineno = 0;
    469 	++isp;
    470 }
    471 
    472 int
    473 delinput(void)
    474 {
    475 	if (isp == inputs)
    476 		return EOF;
    477 	--isp;
    478 	if (fclose(isp->fp) == EOF)
    479 		die("as: %s: %s", isp->fname, strerror(errno));
    480 	free(isp->fname);
    481 	return 0;
    482 }