scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | README | LICENSE

parser.c (6825B)


      1 #include <assert.h>
      2 #include <ctype.h>
      3 #include <errno.h>
      4 #include <limits.h>
      5 #include <setjmp.h>
      6 #include <stdarg.h>
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 #include <string.h>
     10 
     11 #include <scc/cstd.h>
     12 #include <scc/scc.h>
     13 #include "as.h"
     14 
     15 #define NARGS 20
     16 #define NR_INPUTS 10
     17 #define MAXLINE 100
     18 
     19 struct input {
     20 	char *fname;
     21 	unsigned lineno;
     22 	FILE *fp;
     23 };
     24 
     25 int nerrors;
     26 jmp_buf recover;
     27 char yytext[INTIDENTSIZ+1];
     28 int yytoken;
     29 size_t yylen;
     30 union yylval yylval;
     31 
     32 static char *textp, *endp;
     33 static int regmode;
     34 static unsigned lineno;
     35 static struct input inputs[NR_INPUTS], *isp = inputs;
     36 
     37 static int
     38 follow(int expect1, int expect2, int ifyes1, int ifyes2, int ifno)
     39 {
     40 	int c;
     41 
     42 	if ((c = *++textp) == expect1)
     43 		return ifyes1;
     44 	if (c == expect2)
     45 		return ifyes2;
     46 	--textp;
     47 	return ifno;
     48 }
     49 
     50 static void
     51 tok2str(void)
     52 {
     53 	if ((yylen = endp - textp) > INTIDENTSIZ) {
     54 		error("token too big");
     55 		yylen = INTIDENTSIZ;
     56 	}
     57 	memcpy(yytext, textp, yylen);
     58 	yytext[yylen] = '\0';
     59 	textp = endp;
     60 }
     61 
     62 static int
     63 iden(void)
     64 {
     65 	int c;
     66 	char *p;
     67 
     68 	for ( ; c = *endp; ++endp) {
     69 		if (isalnum(c))
     70 			continue;
     71 		switch (c) {
     72 		case '\'':
     73 		case '_':
     74 		case '-':
     75 		case '.':
     76 		case '$':
     77 			continue;
     78 		default:
     79 			goto out_loop;
     80 		}
     81 	}
     82 
     83 out_loop:
     84 	tok2str();
     85 	yylval.sym = lookup(yytext);
     86 
     87 	return (yylval.sym->flags & FREG) ? REG : IDEN;
     88 }
     89 
     90 static int
     91 number(void)
     92 {
     93 	int c, base = 10;
     94 	char *p;
     95 	TUINT n;
     96 
     97 	if (*endp == '0') {
     98 		base = 8;
     99 		++endp;
    100 		if (*endp == 'x') {
    101 			base = 16;
    102 			++endp;
    103 		}
    104 	}
    105 	for (n = 0; (c = *endp) && isxdigit(c); n += c) {
    106 		n *= base;
    107 		c -= '0';
    108 		if (n >= TUINT_MAX - c*base)
    109 			error("overflow in number");
    110 		endp++;
    111 	}
    112 	tok2str();
    113 	yylval.sym = tmpsym(n);
    114 
    115 	return NUMBER;
    116 }
    117 
    118 static int
    119 character(void)
    120 {
    121 	int c;
    122 	char *p;
    123 
    124 	while (*endp != '\'')
    125 		++endp;
    126 	return NUMBER;
    127 }
    128 
    129 static int
    130 string(void)
    131 {
    132 	int c;
    133 	size_t l;
    134 	char *s;
    135 	Symbol *sym = tmpsym(0);
    136 
    137 	for (++endp; *endp != '"'; ++endp)
    138 		;
    139 	++endp;
    140 	tok2str();
    141 	yylval.sym = sym;
    142 	/* FIXME: this memory is not freed ever */
    143 	l = yylen-2;
    144 	s = memcpy(xmalloc(l+1), yytext+1, l);
    145 	s[l] = '\0';
    146 	sym->name.buf = s;
    147 
    148 	return STRING;
    149 }
    150 
    151 static int
    152 operator(void)
    153 {
    154 	int c;
    155 
    156 	++endp;
    157 	if ((c = *textp) == '>')
    158 		c = follow('=', '>', LE, SHL, '>');
    159 	else if (c == '<')
    160 		c = follow('=', '<', GE, SHR, '>');
    161 	tok2str();
    162 
    163 	return c;
    164 }
    165 
    166 int
    167 next(void)
    168 {
    169 	int c;
    170 
    171 	while (isspace(*textp))
    172 		++textp;
    173 
    174 	endp = textp;
    175 
    176 	switch (c = *textp) {
    177 	case '\0':
    178 		strcpy(yytext, "EOS");
    179 		yylen = 3;
    180 		c = EOS;
    181 		break;
    182 	case '"':
    183 		c = string();
    184 		break;
    185 	case '\'':
    186 		c = character();
    187 		break;
    188 	case '%':
    189 		c = (regmode ? iden : operator)();
    190 		break;
    191 	case '_':
    192 		c = iden();
    193 		break;
    194 	default:
    195 		if (isdigit(c))
    196 			c = number();
    197 		else if (isalpha(c))
    198 			c = iden();
    199 		else
    200 			c = operator();
    201 		break;
    202 	}
    203 	return yytoken = c;
    204 }
    205 
    206 void
    207 expect(int token)
    208 {
    209 	if (yytoken != token)
    210 		unexpected();
    211 	next();
    212 }
    213 
    214 void
    215 unexpected(void)
    216 {
    217 	error("unexpected '%s'", yytext);
    218 }
    219 
    220 void
    221 error(char *msg, ...)
    222 {
    223 	va_list va;
    224 	struct input *ip;
    225 
    226 	assert(isp > inputs);
    227 	ip = &isp[-1];
    228 
    229 	va_start(va, msg);
    230 	fprintf(stderr, "as:%s:%u: ", ip->fname, ip->lineno);
    231 	vfprintf(stderr, msg, va);
    232 	putc('\n', stderr);
    233 	nerrors++;
    234 	va_end(va);
    235 
    236 	if (nerrors == 10)
    237 		die("as: too many errors");
    238 	longjmp(recover, 1);
    239 }
    240 
    241 Node *
    242 getreg(void)
    243 {
    244 	Node *np;
    245 
    246 	np = node(REG, NULL, NULL);
    247 	np->sym = yylval.sym;
    248 	np->addr = AREG;
    249 	expect(REG);
    250 	return np;
    251 }
    252 
    253 void
    254 regctx(int mode)
    255 {
    256 	regmode = mode;
    257 }
    258 
    259 Node *
    260 operand(char **strp)
    261 {
    262 	int imm = 0;
    263 	Node *np;
    264 
    265 	textp = *strp;
    266 	regctx(1);
    267 	switch (next()) {
    268 	case EOS:
    269 		np = NULL;
    270 		break;
    271 	case REG:
    272 		np = getreg();
    273 		break;
    274 	case STRING:
    275 		np = node(yytoken, NULL, NULL);
    276 		np->sym = yylval.sym;
    277 		np->addr = ASTR;
    278 		next();
    279 		break;
    280 	case '$':
    281 		next();
    282 		imm = 1;
    283 	default:
    284 		if (!imm) {
    285 			np = moperand();
    286 		} else {
    287 			np = expr();
    288 			np->addr = AIMM;
    289 		}
    290 	}
    291 	if (yytoken != ',' && yytoken != EOS)
    292 		error("trailing characters in expression '%s'", textp);
    293 	*strp = endp;
    294 
    295 	return np;
    296 }
    297 
    298 Node **
    299 getargs(char *s)
    300 {
    301 	Node **ap;
    302 	static Node *args[NARGS];
    303 
    304 	if (!s)
    305 		return NULL;
    306 
    307 	for (ap = args; ap < &args[NARGS-1]; ++ap) {
    308 		if ((*ap = operand(&s)) == NULL)
    309 			return args;
    310 	}
    311 	error("too many arguments in one instruction");
    312 }
    313 
    314 static char *
    315 field(char **oldp, size_t *siz)
    316 {
    317 	char *s, *t, *begin;
    318 	size_t n;
    319 
    320 	if ((begin = *oldp) == NULL)
    321 		return NULL;
    322 
    323 	for (s = begin; isspace(*s) && *s != '\t'; ++s)
    324 		;
    325 	if (*s == '\0' || *s == '/' || *s == ';') {
    326 		*s = '\0';
    327 		return *oldp = NULL;
    328 	}
    329 
    330 	for (t = s; *t && *t != '\t'; ++t)
    331 		;
    332 	if (*t == '\t')
    333 		*t++ = '\0';
    334 	*siz -= begin - t;
    335 	*oldp = t;
    336 
    337 	while (t >= s && isspace(*t))
    338 		*t-- = '\0';
    339 	return (*s != '\0') ? s : NULL;
    340 }
    341 
    342 static int
    343 validlabel(char *name)
    344 {
    345 	int c;
    346 
    347 	while ((c = *name++) != '\0') {
    348 		if (isalnum(c))
    349 			continue;
    350 		switch (c) {
    351 		case '_':
    352 		case '-':
    353 		case '.':
    354 		case '$':
    355 			continue;
    356 		case ':':
    357 			if (*name != '\0')
    358 				return 0;
    359 			*--name = '\0';
    360 			continue;
    361 		default:
    362 			return 0;
    363 		}
    364 	}
    365 	return 1;
    366 }
    367 
    368 static int
    369 extract(char *s, size_t len, struct line *lp)
    370 {
    371 	int r = 0;
    372 
    373 	if (lp->label = field(&s, &len))
    374 		r++;
    375 	if (lp->op = field(&s, &len))
    376 		r++;
    377 	if (lp->args = field(&s, &len))
    378 		r++;
    379 
    380 	if (s && *s && *s != '/')
    381 		error("trailing characters at the end of the line");
    382 	if (lp->label && !validlabel(lp->label))
    383 		error("incorrect label name '%s'", lp->label);
    384 
    385 	return r;
    386 }
    387 
    388 static void
    389 comment(FILE *fp)
    390 {
    391 	int c;
    392 
    393 	while ((c = getc(fp)) != EOF) {
    394 		if (c != '*')
    395 			continue;
    396 		if ((c = getc(fp)) == '/')
    397 			return;
    398 		ungetc(c, fp);
    399 	}
    400 }
    401 
    402 static size_t
    403 getline(FILE *fp, char buff[MAXLINE])
    404 {
    405 	int c;
    406 	char *bp;
    407 
    408 	for (bp = buff; (c = getc(fp)) != EOF; *bp++ = c) {
    409 		if (c == '\n')
    410 			break;
    411 		if (c == '/') {
    412 			if ((c = getc(fp)) != '*') {
    413 				ungetc(c, fp);
    414 				c = '/';
    415 			} else {
    416 				comment(fp);
    417 				c = ' ';
    418 			}
    419 		} else if (c > UCHAR_MAX) {
    420 			error("invalid character '%x'", c);
    421 		}
    422 		if (bp == &buff[MAXLINE-1])
    423 			error("line too long");
    424 	}
    425 	*bp = '\0';
    426 
    427 	return bp - buff;
    428 }
    429 
    430 int
    431 nextline(struct line *lp)
    432 {
    433 	struct input *ip;
    434 	size_t n;
    435 	static char buff[MAXLINE];
    436 
    437 	assert(isp > inputs);
    438 repeat:
    439 	if (isp == inputs)
    440 		return 0;
    441 	ip = &isp[-1];
    442 	if (feof(ip->fp)) {
    443 		delinput();
    444 		goto repeat;
    445 	}
    446 	n = getline(ip->fp, buff);
    447 	if (++ip->lineno == 0)
    448 		die("as: %s: file too long", infile);
    449 	if (n == 0)
    450 		goto repeat;
    451 	if (extract(buff, n, lp) == 0)
    452 		goto repeat;
    453 	return 1;
    454 }
    455 
    456 void
    457 addinput(char *fname)
    458 {
    459 	FILE *fp;
    460 
    461 	if (isp == &inputs[NR_INPUTS])
    462 		die("as: too many included files");
    463 	if ((fp = fopen(fname, "r")) == NULL)
    464 		die("as: %s: %s", fname, strerror(errno));
    465 	isp->fname = xstrdup(fname);
    466 	isp->fp = fp;
    467 	isp->lineno = 0;
    468 	++isp;
    469 }
    470 
    471 int
    472 delinput(void)
    473 {
    474 	if (isp == inputs)
    475 		return EOF;
    476 	--isp;
    477 	if (fclose(isp->fp) == EOF)
    478 		die("as: %s: %s", isp->fname, strerror(errno));
    479 	free(isp->fname);
    480 	return 0;
    481 }