scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

parser.c (14397B)


      1 #include <errno.h>
      2 #include <limits.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include <scc/cstd.h>
      8 #include <scc/scc.h>
      9 
     10 #include "cc2.h"
     11 
     12 #define STACKSIZ     50
     13 
     14 Type funetype = {
     15 	.flags = FUNF | ELLIPS
     16 };
     17 
     18 Type funtype = {
     19 	.flags = FUNF
     20 };
     21 
     22 union tokenop {
     23 	void *arg;
     24 	unsigned op;
     25 };
     26 
     27 Node *laststmt;
     28 static Swtch swtbl[NR_BLOCK], *swp = swtbl;
     29 static Symbol *lastfun;
     30 
     31 typedef void parsefun(char *, union tokenop);
     32 static parsefun type, symbol, getname, unary, binary, ternary, call,
     33                 constant, composed, binit, einit,
     34                 jump, oreturn, loop, assign,
     35                 ocase, bswitch, eswitch, builtin;
     36 
     37 typedef void evalfun(void);
     38 static evalfun vardecl, beginfun, endfun, endpars, stmt,
     39                array, aggregate, flddecl, labeldcl;
     40 
     41 static struct decoc {
     42 	void (*eval)(void);
     43 	void (*parse)(char *token, union tokenop);
     44 	union tokenop u;
     45 } optbl[] = {      /*  eval     parse           args */
     46 	['A']   = {  vardecl,  symbol, .u.op  =  SAUTO<<8 | OAUTO},
     47 	['R']   = {  vardecl,  symbol, .u.op  =   SREG<<8 |  OREG},
     48 	['G']   = {  vardecl,  symbol, .u.op  =  SGLOB<<8 |  OMEM},
     49 	['X']   = {  vardecl,  symbol, .u.op  = SEXTRN<<8 |  OMEM},
     50 	['Y']   = {  vardecl,  symbol, .u.op  =  SPRIV<<8 |  OMEM},
     51 	['T']   = {  vardecl,  symbol, .u.op  = SLOCAL<<8 |  OMEM},
     52 	['M']   = {  flddecl,  symbol, .u.op  =  SMEMB<<8 |  OMEM},
     53 	['L']   = { labeldcl,  symbol, .u.op  = SLABEL<<8 | OLABEL},
     54 
     55 	['C']   = {     NULL,    type, .u.arg =    &int8type},
     56 	['I']   = {     NULL,    type, .u.arg =   &int16type},
     57 	['W']   = {     NULL,    type, .u.arg =   &int32type},
     58 	['Q']   = {     NULL,    type, .u.arg =   &int64type},
     59 	['K']   = {     NULL,    type, .u.arg =   &uint8type},
     60 	['N']   = {     NULL,    type, .u.arg =  &uint16type},
     61 	['Z']   = {     NULL,    type, .u.arg =  &uint32type},
     62 	['O']   = {     NULL,    type, .u.arg =  &uint64type},
     63 	['J']   = {     NULL,    type, .u.arg = &float32type},
     64 	['D']   = {     NULL,    type, .u.arg = &float64type},
     65 	['H']   = {     NULL,    type, .u.arg = &float80type},
     66 	['0']   = {     NULL,    type, .u.arg =    &voidtype},
     67 	['B']   = {     NULL,    type, .u.arg =    &booltype},
     68 	['P']   = {     NULL,    type, .u.arg =     &ptrtype},
     69 	['E']   = {     NULL,    type, .u.arg =    &funetype},
     70 	['1']	= {     NULL,    type, .u.arg =    &arg_type},
     71 
     72 	['F']   = {     NULL,    type, .u.arg =     &funtype},
     73 	['V']   = {    array,composed,                     0},
     74 	['U']   = {aggregate,composed,                     0},
     75 	['S']   = {aggregate,composed,                     0},
     76 
     77 	['"']   = {     NULL, getname,                     0},
     78 	['{']   = { beginfun,    NULL,                     0},
     79 	['}']   = {   endfun,    NULL,                     0},
     80 	['(']   = {     NULL,   binit,                     0},
     81 	[')']   = {     NULL,   einit,                     0},
     82 	['\\']  = {  endpars,    NULL,                     0},
     83 	['\t']  = {     stmt,    NULL,                     0},
     84 
     85 	['~']   = {     NULL,   unary, .u.op =          OCPL},
     86 	['_']   = {     NULL,   unary, .u.op =         OSNEG},
     87 	['\'']  = {     NULL,   unary, .u.op =         OADDR},
     88 	['@']   = {     NULL,   unary, .u.op =          OPTR},
     89 	['g']   = {     NULL,   unary, .u.op =         OCAST},
     90 	['p']   = {     NULL,   unary, .u.op =          OPAR},
     91 	['n']   = {     NULL,   unary, .u.op =          ONEG},
     92 
     93 	['a']   = {     NULL,  binary, .u.op =          OAND},
     94 	['o']   = {     NULL,  binary, .u.op =           OOR},
     95 	['.']   = {     NULL,  binary, .u.op =        OFIELD},
     96 	['+']   = {     NULL,  binary, .u.op =          OADD},
     97 	['-']   = {     NULL,  binary, .u.op =          OSUB},
     98 	['*']   = {     NULL,  binary, .u.op =          OMUL},
     99 	['%']   = {     NULL,  binary, .u.op =          OMOD},
    100 	['/']   = {     NULL,  binary, .u.op =          ODIV},
    101 	['l']   = {     NULL,  binary, .u.op =          OSHL},
    102 	['r']   = {     NULL,  binary, .u.op =          OSHR},
    103 	['<']   = {     NULL,  binary, .u.op =           OLT},
    104 	['>']   = {     NULL,  binary, .u.op =           OGT},
    105 	['[']   = {     NULL,  binary, .u.op =           OLE},
    106 	[']']   = {     NULL,  binary, .u.op =           OGE},
    107 	['=']   = {     NULL,  binary, .u.op =           OEQ},
    108 	['!']   = {     NULL,  binary, .u.op =           ONE},
    109 	['&']   = {     NULL,  binary, .u.op =         OBAND},
    110 	['|']   = {     NULL,  binary, .u.op =          OBOR},
    111 	['^']   = {     NULL,  binary, .u.op =         OBXOR},
    112 	[',']   = {     NULL,  binary, .u.op =        OCOMMA},
    113 	['m']   = {     NULL,  builtin,.u.op =      OBUILTIN},
    114 
    115 	[':']   = {     NULL,  assign, .u.op =        OASSIG},
    116 	['?']   = {     NULL, ternary, .u.op =          OASK},
    117 	['c']   = {     NULL,    call, .u.op =         OCALL},
    118 	['z']   = {     NULL,    call, .u.op =        OCALLE},
    119 
    120 	['#']   = {     NULL,constant, .u.op =        OCONST},
    121 
    122 	['j']   = {     NULL,    jump, .u.op =          OJMP},
    123 	['y']   = {     NULL,    jump, .u.op =       OBRANCH},
    124 	['h']   = {     NULL, oreturn, .u.op =          ORET},
    125 	['i']   = {     NULL,    NULL, .u.op =          OINC},
    126 	['d']   = {     NULL,    NULL, .u.op =          ODEC},
    127 
    128 	['b']   = {     NULL,    loop, .u.op =        OBLOOP},
    129 	['e']   = {     NULL,    loop, .u.op =        OELOOP},
    130 
    131 	['v']   = {     NULL,   ocase, .u.op =         OCASE},
    132 	['f']   = {     NULL,   ocase, .u.op =      ODEFAULT},
    133 	['t']   = {     NULL, eswitch, .u.op =      OESWITCH},
    134 	['s']   = {     NULL, bswitch, .u.op =      OBSWITCH},
    135 };
    136 
    137 static int sclass, inpars, ininit, beginf, endf, lineno;
    138 static void *stack[STACKSIZ], **sp = stack;
    139 
    140 static Node *
    141 push(void *elem)
    142 {
    143 	if (sp == &stack[STACKSIZ])
    144 		error(ESTACKO);
    145 	return *sp++ = elem;
    146 }
    147 
    148 static void *
    149 pop(void)
    150 {
    151 	if (sp == stack)
    152 		error(ESTACKU);
    153 	return *--sp;
    154 }
    155 
    156 static int
    157 empty(void)
    158 {
    159 	return sp == stack;
    160 }
    161 
    162 static void
    163 type(char *token, union tokenop u)
    164 {
    165 	push(u.arg);
    166 }
    167 
    168 static void
    169 composed(char *token, union tokenop u)
    170 {
    171 	Symbol *sym;
    172 
    173 	sym = getsym(atoi(token+1));
    174 	sym->type.id = sym->id;
    175 	push(&sym->type);
    176 }
    177 
    178 static void
    179 getname(char *t, union tokenop u)
    180 {
    181 	push((*++t) ? xstrdup(t) : NULL);
    182 }
    183 
    184 static void
    185 symbol(char *token, union tokenop u)
    186 {
    187 	Node *np = node(u.op & 0xFF);
    188 	Symbol *sym = getsym(atoi(token+1));
    189 
    190 	sclass = u.op >> 8;
    191 	np->u.sym = sym;
    192 	np->type = sym->type;
    193 	push(np);
    194 }
    195 
    196 static Type *
    197 gettype(char *token)
    198 {
    199 	struct decoc *dp;
    200 
    201 	dp = &optbl[*token];
    202 	if (!dp->parse)
    203 		error(ESYNTAX);
    204 	(*dp->parse)(token, dp->u);
    205 	return pop();
    206 }
    207 
    208 static void
    209 constant(char *token, union tokenop u)
    210 {
    211 	static char letters[] = "0123456789ABCDEF";
    212 	Node *np;
    213 	TUINT v;
    214 	unsigned c;
    215 
    216 	++token;
    217 	if (*token == '"') {
    218 		++token;
    219 		np = node(OSTRING);
    220 		np->type.flags = STRF;
    221 		np->type.size = strlen(token);
    222 		np->type.align = int8type.align;
    223 		np->u.s = xstrdup(token);
    224 	} else {
    225 		np = node(OCONST);
    226 		np->type = *gettype(token++);
    227 		for (v = 0; c = *token++; v += c) {
    228 			v <<= 4;
    229 			c = strchr(letters, c) - letters;
    230 		}
    231 		np->u.i = v;
    232 	}
    233 	push(np);
    234 }
    235 
    236 static void
    237 assign(char *token, union tokenop u)
    238 {
    239 	int subop;
    240 	Node *np = node(u.op);
    241 
    242 	switch (subop = *++token) {
    243 	case '+':
    244 	case '-':
    245 	case '*':
    246 	case '%':
    247 	case '/':
    248 	case 'l':
    249 	case 'r':
    250 	case '&':
    251 	case '|':
    252 	case '^':
    253 	case 'i':
    254 	case 'd':
    255 		++token;
    256 		subop = optbl[subop].u.op;
    257 		break;
    258 	default:
    259 		subop = 0;
    260 		break;
    261 	}
    262 
    263 	np->u.subop = subop;
    264 	np->type = *gettype(token);
    265 	np->right = pop();
    266 	np->left = pop();
    267 	push(np);
    268 }
    269 
    270 static void
    271 ternary(char *token, union tokenop u)
    272 {
    273 	Node *ask = node(OASK), *colon = node(OCOLON);
    274 	Type *tp = gettype(token+1);
    275 
    276 	colon->right = pop();
    277 	colon->left = pop();
    278 
    279 	ask->type = *tp;
    280 	ask->left = pop();
    281 	ask->right = colon;
    282 	push(ask);
    283 }
    284 
    285 static void
    286 eval(char *tok)
    287 {
    288 	struct decoc *dp;
    289 
    290 	do {
    291 		dp = &optbl[*tok];
    292 		if (!dp->parse)
    293 			break;
    294 		(*dp->parse)(tok, dp->u);
    295 	} while (tok = strtok(NULL, "\t\n"));
    296 }
    297 
    298 static int
    299 nextline(void)
    300 {
    301 	static char line[LINESIZ];
    302 	size_t len;
    303 	int c;
    304 	void (*fun)(void);
    305 
    306 repeat:
    307 	++lineno;
    308 	if (!fgets(line, sizeof(line), stdin))
    309 		return 0;
    310 	if ((len = strlen(line)) == 0 || line[0] == '\n')
    311 		goto repeat;
    312 	if (line[len-1] != '\n')
    313 		error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
    314 	line[len-1] = '\0';
    315 
    316 	c = *line;
    317 	eval(strtok(line, "\t\n"));
    318 	if ((fun = *optbl[c].eval) != NULL)
    319 		(*fun)();
    320 	if (sp != stack)
    321 		error(ESTACKA);
    322 	return 1;
    323 }
    324 
    325 static void
    326 oreturn(char *token, union tokenop u)
    327 {
    328 	Node *np = node(u.op);
    329 
    330 	if (token = strtok(NULL, "\t\n"))
    331 		eval(token);
    332 	if (!empty())
    333 		np->left = pop();
    334 	push(np);
    335 }
    336 
    337 static void
    338 addcase(Node *np)
    339 {
    340 	TINT val;
    341 	Node **bp;
    342 	Swtch *cur;
    343 
    344 	if (swp == swtbl)
    345 		error(EWTACKU);
    346 
    347 	cur = swp - 1;
    348 	if (np->op == ODEFAULT) {
    349 		cur->defnode = np;
    350 	} else {
    351 		cur->nr++;
    352 		bp = cur->cases;
    353 		bp = xrealloc(bp, cur->nr * sizeof(Node *));
    354 		bp[cur->nr-1] = np;
    355 		cur->cases = bp;
    356 
    357 		val = np->left->u.i;
    358 		if (val < cur->min)
    359 			cur->min = val;
    360 		if (val > cur->max)
    361 			cur->max = val;
    362 	}
    363 }
    364 
    365 static void
    366 bswitch(char *token, union tokenop u)
    367 {
    368 	Swtch *cur;
    369 	Node *np = node(u.op);
    370 
    371 	if (swp == &swtbl[NR_BLOCK])
    372 		error(EWTACKO);
    373 	cur = swp++;
    374 	cur->nr = 0;
    375 	cur->min = TINT_MAX;
    376 	cur->max = TINT_MIN;
    377 	cur->defnode = NULL;
    378 	cur->cases = NULL;
    379 	eval(strtok(NULL, "\t\n"));
    380 	np->left = pop();
    381 
    382 	push(cur->bswtch = np);
    383 }
    384 
    385 static int
    386 cmpcase(const void *p1, const void *p2)
    387 {
    388 	TINT d;
    389 	Node *np1, *np2;
    390 
    391 	np1 = *(Node **) p1;
    392 	np2 = *(Node **) p2;
    393 	d = np1->left->u.i - np2->left->u.i;
    394 	if (d < 0)
    395 		return -1;
    396 	if (d > 0)
    397 		return 1;
    398 	return 0;
    399 }
    400 
    401 static void
    402 eswitch(char *token, union tokenop u)
    403 {
    404 	Node *p;
    405 	Swtch *cur;
    406 
    407 	if (swp == swtbl)
    408 		error(EWTACKU);
    409 	jump(token, u);
    410 
    411 	cur = --swp;
    412 	cur->eswtch = pop();
    413 	cur->bswtch->u.swtch = newswtch(cur);
    414 	qsort(cur->cases, cur->nr, sizeof(Node *), cmpcase);
    415 }
    416 
    417 static void
    418 ocase(char *token, union tokenop u)
    419 {
    420 	jump(token, u);
    421 	addcase(pop());
    422 }
    423 
    424 static void
    425 jump(char *token, union tokenop u)
    426 {
    427 	Symbol *label;
    428 	Node *aux, *np = node(u.op);
    429 
    430 	eval(strtok(NULL, "\t\n"));
    431 
    432 	if (u.op == OBRANCH || u.op == OCASE)
    433 		np->left = pop();
    434 	aux = pop();
    435 	label = aux->u.sym;
    436 	label->refcnt++;
    437 	np->u.sym = label;
    438 	delnode(aux);
    439 	push(np);
    440 }
    441 
    442 static void
    443 loop(char *token, union tokenop u)
    444 {
    445 	push(node(u.op));
    446 }
    447 
    448 static void
    449 unary(char *token, union tokenop u)
    450 {
    451 	Node *np = node(u.op);
    452 
    453 	np->type = *gettype(token+1);
    454 	np->left = pop();
    455 	np->right = NULL;
    456 	push(np);
    457 }
    458 
    459 static void
    460 call(char *token, union tokenop u)
    461 {
    462 	Node *np, *par, *fun = node(u.op);
    463 
    464 	for (par = NULL;; par = np) {
    465 		np = pop();
    466 		if (np->op != OPAR)
    467 			break;
    468 		np->right = par;
    469 	}
    470 
    471 	fun->type = *gettype(token+1);
    472 	fun->left = np;
    473 	fun->right = par;
    474 	push(fun);
    475 }
    476 
    477 static void
    478 builtin(char *token, union tokenop u)
    479 {
    480 	Node *np = node(u.op);
    481 	char *name;
    482 	unsigned subop, nchilds;
    483 
    484 	np->type = *gettype(token+1);
    485 	name = pop();
    486 
    487 	if (!strcmp("__builtin_va_arg", name)) {
    488 		nchilds = 1;
    489 		subop = BVA_ARG;
    490 	} else if (!strcmp("__builtin_va_start", name)) {
    491 		nchilds = 2;
    492 		subop = BVA_START;
    493 	} else if (!strcmp("__builtin_va_end", name)) {
    494 		nchilds = 1;
    495 		subop = BVA_END;
    496 	} else if (!strcmp("__builtin_va_copy", name)) {
    497 		nchilds = 2;
    498 		subop = BVA_COPY;
    499 	} else {
    500 		error(EBBUILT);
    501 	}
    502 
    503 	np->u.subop = subop;
    504 	np->right = (nchilds == 2) ? pop() : NULL;
    505 	np->left = (nchilds != 0) ? pop() : NULL;
    506 
    507 	free(name);
    508 	push(np);
    509 }
    510 
    511 static void
    512 binary(char *token, union tokenop u)
    513 {
    514 	Node *np = node(u.op);
    515 
    516 	np->type = *gettype(token+1);
    517 	np->right = pop();
    518 	np->left = pop();
    519 	push(np);
    520 }
    521 
    522 static void
    523 binit(char *token, union tokenop u)
    524 {
    525 	ininit = 1;
    526 }
    527 
    528 static void
    529 einit(char *token, union tokenop u)
    530 {
    531 	ininit = 0;
    532 	endinit();
    533 }
    534 
    535 static void
    536 endpars(void)
    537 {
    538 	if (!inpars)
    539 		error(ESYNTAX);
    540 	inpars = 0;
    541 }
    542 
    543 static void
    544 aggregate(void)
    545 {
    546 	Node *align, *size;
    547 	char *name;
    548 	Type *tp;
    549 	Symbol *sym;
    550 
    551 	align = pop();
    552 	size = pop();
    553 	name = pop();
    554 	tp = pop();
    555 
    556 	tp->size = size->u.i;
    557 	tp->align = align->u.i;
    558 	tp->flags = AGGRF;
    559 	/*
    560 	 * type is the first field of Symbol so we can obtain the
    561 	 * address of the symbol from the address of the type.
    562 	 * We have to do this because composed returns the pointer
    563 	 * to the type, but in this function we also need the
    564 	 * symbol to store the name.
    565 	 */
    566 	sym = (Symbol *) tp;
    567 	sym->name = name;
    568 	deftype(tp);
    569 
    570 	delnode(align);
    571 	delnode(size);
    572 }
    573 
    574 static void
    575 array(void)
    576 {
    577 	Type *tp, *base;
    578 	Node *size;
    579 
    580 	size = pop();
    581 	base = pop();
    582 	tp = pop();
    583 	tp->flags = ARRF;
    584 
    585 	if (size->u.i > LONG_MAX/base->size)
    586 		error(EOVERFL);
    587 
    588 	tp->size = size->u.i * base->size;
    589 	tp->align = base->align;
    590 
    591 	delnode(size);
    592 }
    593 
    594 static void
    595 decl(Symbol *sym)
    596 {
    597 	Type *tp = &sym->type;
    598 
    599 	if (tp->flags & FUNF) {
    600 		lastfun = sym;
    601 	} else {
    602 		switch (sym->kind) {
    603 		case SEXTRN:
    604 		case SGLOB:
    605 		case SPRIV:
    606 		case SLOCAL:
    607 			defglobal(sym);
    608 			break;
    609 		case SAUTO:
    610 		case SREG:
    611 			if (!beginf)
    612 				error(ESYNTAX);
    613 			((inpars) ? defpar : defvar)(sym);
    614 			break;
    615 		default:
    616 			abort();
    617 		}
    618 	}
    619 }
    620 
    621 static void
    622 vardecl(void)
    623 {
    624 	Type *tp, *rp;
    625 	Node *np;
    626 	Symbol *sym;
    627 	char *name;
    628 
    629 	name = pop();
    630 	tp = pop();
    631 	if (tp->flags & FUNF)
    632 		rp = pop();
    633 	np = pop();
    634 
    635 	sym = np->u.sym;
    636 	/*
    637 	 * We have to free sym->name because in tentative declarations
    638 	 * we can have multiple declarations of the same symbol, and in
    639 	 * this case our parser will allocate twice the memory
    640 	 */
    641 	free(sym->name);
    642 	sym->name = name;
    643 	sym->type = *tp;
    644 	if (tp->flags & FUNF)
    645 		sym->rtype = *rp;
    646 	sym->kind = sclass;
    647 
    648 	if (ininit)
    649 		sym->type.flags |= INITF;
    650 	decl(sym);
    651 	delnode(np);
    652 }
    653 
    654 static void
    655 flddecl(void)
    656 {
    657 	Node *off, *np;
    658 	char *name;
    659 	Type *tp;
    660 	Symbol *sym;
    661 
    662 	off = pop();
    663 	name = pop();
    664 	tp = pop();
    665 	np = pop();
    666 
    667 	sym = np->u.sym;
    668 	sym->u.off = off->u.i;
    669 	sym->name = name;
    670 	sym->type = *tp;
    671 
    672 	delnode(np);
    673 	delnode(off);
    674 }
    675 
    676 static void
    677 labeldcl(void)
    678 {
    679 	Node *np;
    680 	Symbol *sym;
    681 
    682 	np = pop();
    683 	np->op = ONOP;
    684 	sym = np->u.sym;
    685 	sym->kind = SLABEL;
    686 	labelstmt(np, sym);
    687 	addstmt(np);
    688 }
    689 
    690 static void
    691 stmt(void)
    692 {
    693 	Node *np;
    694 
    695 	if (empty())
    696 		return;
    697 	np = pop();
    698 	if (ininit) {
    699 		data(np);
    700 		deltree(np);
    701 		return;
    702 	}
    703 	addstmt(np);
    704 }
    705 
    706 static void
    707 beginfun(void)
    708 {
    709 	newfun(lastfun, node(OBFUN));
    710 	beginf = inpars = 1;
    711 	pushctx();
    712 }
    713 
    714 static void
    715 endfun(void)
    716 {
    717 	endf = 1;
    718 	laststmt = addstmt(node(OEFUN));
    719 }
    720 
    721 void
    722 parse(void)
    723 {
    724 	/* clean from previous function */
    725 	cleanswtch();
    726 	cleancfg();
    727 	cleannodes();
    728 	popctx();
    729 
    730 	inpars = ininit = beginf = endf = 0;
    731 
    732 	while (!endf && nextline())
    733 		;
    734 
    735 	if (ferror(stdin))
    736 		error(EFERROR, strerror(errno));
    737 
    738 	if (beginf && !endf)
    739 		error(EBAFFUN);
    740 
    741 	PRTREE("after parsing");
    742 }