scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

parser.c (14254B)


      1 #include <errno.h>
      2 #include <limits.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include <scc/cstd.h>
      8 #include <scc/scc.h>
      9 
     10 #include "cc2.h"
     11 
     12 #define STACKSIZ     50
     13 
     14 Type funetype = {
     15 	.flags = FUNF | ELLIPS
     16 };
     17 
     18 Type funtype = {
     19 	.flags = FUNF
     20 };
     21 
     22 union tokenop {
     23 	void *arg;
     24 	unsigned op;
     25 };
     26 
     27 struct swtch {
     28 	int nr;
     29 	Node *first;
     30 	Node *last;
     31 };
     32 
     33 static struct swtch swtbl[NR_BLOCK], *swp = swtbl;
     34 static Symbol *lastfun;
     35 
     36 typedef void parsefun(char *, union tokenop);
     37 static parsefun type, symbol, getname, unary, binary, ternary, call,
     38                 constant, composed, binit, einit,
     39                 jump, oreturn, loop, assign,
     40                 ocase, bswitch, eswitch, builtin;
     41 
     42 typedef void evalfun(void);
     43 static evalfun vardecl, beginfun, endfun, endpars, stmt,
     44                array, aggregate, flddecl, labeldcl;
     45 
     46 static struct decoc {
     47 	void (*eval)(void);
     48 	void (*parse)(char *token, union tokenop);
     49 	union tokenop u;
     50 } optbl[] = {      /*  eval     parse           args */
     51 	['A']   = {  vardecl,  symbol, .u.op  =  SAUTO<<8 | OAUTO},
     52 	['R']   = {  vardecl,  symbol, .u.op  =   SREG<<8 |  OREG},
     53 	['G']   = {  vardecl,  symbol, .u.op  =  SGLOB<<8 |  OMEM},
     54 	['X']   = {  vardecl,  symbol, .u.op  = SEXTRN<<8 |  OMEM},
     55 	['Y']   = {  vardecl,  symbol, .u.op  =  SPRIV<<8 |  OMEM},
     56 	['T']   = {  vardecl,  symbol, .u.op  = SLOCAL<<8 |  OMEM},
     57 	['M']   = {  flddecl,  symbol, .u.op  =  SMEMB<<8 |  OMEM},
     58 	['L']   = { labeldcl,  symbol, .u.op  = SLABEL<<8 | OLABEL},
     59 
     60 	['C']   = {     NULL,    type, .u.arg =    &int8type},
     61 	['I']   = {     NULL,    type, .u.arg =   &int16type},
     62 	['W']   = {     NULL,    type, .u.arg =   &int32type},
     63 	['Q']   = {     NULL,    type, .u.arg =   &int64type},
     64 	['K']   = {     NULL,    type, .u.arg =   &uint8type},
     65 	['N']   = {     NULL,    type, .u.arg =  &uint16type},
     66 	['Z']   = {     NULL,    type, .u.arg =  &uint32type},
     67 	['O']   = {     NULL,    type, .u.arg =  &uint64type},
     68 	['J']   = {     NULL,    type, .u.arg = &float32type},
     69 	['D']   = {     NULL,    type, .u.arg = &float64type},
     70 	['H']   = {     NULL,    type, .u.arg = &float80type},
     71 	['0']   = {     NULL,    type, .u.arg =    &voidtype},
     72 	['B']   = {     NULL,    type, .u.arg =    &booltype},
     73 	['P']   = {     NULL,    type, .u.arg =     &ptrtype},
     74 	['E']   = {     NULL,    type, .u.arg =    &funetype},
     75 	['1']	= {     NULL,    type, .u.arg =    &arg_type},
     76 
     77 	['F']   = {     NULL,    type, .u.arg =     &funtype},
     78 	['V']   = {    array,composed,                     0},
     79 	['U']   = {aggregate,composed,                     0},
     80 	['S']   = {aggregate,composed,                     0},
     81 
     82 	['"']   = {     NULL, getname,                     0},
     83 	['{']   = { beginfun,    NULL,                     0},
     84 	['}']   = {   endfun,    NULL,                     0},
     85 	['(']   = {     NULL,   binit,                     0},
     86 	[')']   = {     NULL,   einit,                     0},
     87 	['\\']  = {  endpars,    NULL,                     0},
     88 	['\t']  = {     stmt,    NULL,                     0},
     89 
     90 	['~']   = {     NULL,   unary, .u.op =          OCPL},
     91 	['_']   = {     NULL,   unary, .u.op =         OSNEG},
     92 	['\'']  = {     NULL,   unary, .u.op =         OADDR},
     93 	['@']   = {     NULL,   unary, .u.op =          OPTR},
     94 	['g']   = {     NULL,   unary, .u.op =         OCAST},
     95 	['p']   = {     NULL,   unary, .u.op =          OPAR},
     96 	['n']   = {     NULL,   unary, .u.op =          ONEG},
     97 
     98 	['a']   = {     NULL,  binary, .u.op =          OAND},
     99 	['o']   = {     NULL,  binary, .u.op =           OOR},
    100 	['.']   = {     NULL,  binary, .u.op =        OFIELD},
    101 	['+']   = {     NULL,  binary, .u.op =          OADD},
    102 	['-']   = {     NULL,  binary, .u.op =          OSUB},
    103 	['*']   = {     NULL,  binary, .u.op =          OMUL},
    104 	['%']   = {     NULL,  binary, .u.op =          OMOD},
    105 	['/']   = {     NULL,  binary, .u.op =          ODIV},
    106 	['l']   = {     NULL,  binary, .u.op =          OSHL},
    107 	['r']   = {     NULL,  binary, .u.op =          OSHR},
    108 	['<']   = {     NULL,  binary, .u.op =           OLT},
    109 	['>']   = {     NULL,  binary, .u.op =           OGT},
    110 	['[']   = {     NULL,  binary, .u.op =           OLE},
    111 	[']']   = {     NULL,  binary, .u.op =           OGE},
    112 	['=']   = {     NULL,  binary, .u.op =           OEQ},
    113 	['!']   = {     NULL,  binary, .u.op =           ONE},
    114 	['&']   = {     NULL,  binary, .u.op =         OBAND},
    115 	['|']   = {     NULL,  binary, .u.op =          OBOR},
    116 	['^']   = {     NULL,  binary, .u.op =         OBXOR},
    117 	[',']   = {     NULL,  binary, .u.op =        OCOMMA},
    118 	['m']   = {     NULL,  builtin,.u.op =      OBUILTIN},
    119 
    120 	[':']   = {     NULL,  assign, .u.op =        OASSIG},
    121 	['?']   = {     NULL, ternary, .u.op =          OASK},
    122 	['c']   = {     NULL,    call, .u.op =         OCALL},
    123 	['z']   = {     NULL,    call, .u.op =        OCALLE},
    124 
    125 	['#']   = {     NULL,constant, .u.op =        OCONST},
    126 
    127 	['j']   = {     NULL,    jump, .u.op =          OJMP},
    128 	['y']   = {     NULL,    jump, .u.op =       OBRANCH},
    129 	['h']   = {     NULL, oreturn, .u.op =          ORET},
    130 	['i']   = {     NULL,    NULL, .u.op =          OINC},
    131 	['d']   = {     NULL,    NULL, .u.op =          ODEC},
    132 
    133 	['b']   = {     NULL,    loop, .u.op =        OBLOOP},
    134 	['e']   = {     NULL,    loop, .u.op =        OELOOP},
    135 
    136 	['v']   = {     NULL,   ocase, .u.op =         OCASE},
    137 	['f']   = {     NULL,   ocase, .u.op =      ODEFAULT},
    138 	['t']   = {     NULL, eswitch, .u.op =      OESWITCH},
    139 	['s']   = {     NULL, bswitch, .u.op =      OBSWITCH},
    140 };
    141 
    142 static int sclass, inpars, ininit, endf, lineno;
    143 static void *stack[STACKSIZ], **sp = stack;
    144 
    145 static Node *
    146 push(void *elem)
    147 {
    148 	if (sp == &stack[STACKSIZ])
    149 		error(ESTACKO);
    150 	return *sp++ = elem;
    151 }
    152 
    153 static void *
    154 pop(void)
    155 {
    156 	if (sp == stack)
    157 		error(ESTACKU);
    158 	return *--sp;
    159 }
    160 
    161 static int
    162 empty(void)
    163 {
    164 	return sp == stack;
    165 }
    166 
    167 static void
    168 type(char *token, union tokenop u)
    169 {
    170 	push(u.arg);
    171 }
    172 
    173 static void
    174 composed(char *token, union tokenop u)
    175 {
    176 	Symbol *sym;
    177 
    178 	sym = getsym(atoi(token+1));
    179 	sym->type.id = sym->id;
    180 	push(&sym->type);
    181 }
    182 
    183 static void
    184 getname(char *t, union tokenop u)
    185 {
    186 	push((*++t) ? xstrdup(t) : NULL);
    187 }
    188 
    189 static void
    190 symbol(char *token, union tokenop u)
    191 {
    192 	Node *np = node(u.op & 0xFF);
    193 	Symbol *sym = getsym(atoi(token+1));
    194 
    195 	sclass = u.op >> 8;
    196 	np->u.sym = sym;
    197 	np->type = sym->type;
    198 	push(np);
    199 }
    200 
    201 static Type *
    202 gettype(char *token)
    203 {
    204 	struct decoc *dp;
    205 
    206 	dp = &optbl[*token];
    207 	if (!dp->parse)
    208 		error(ESYNTAX);
    209 	(*dp->parse)(token, dp->u);
    210 	return pop();
    211 }
    212 
    213 static void
    214 constant(char *token, union tokenop u)
    215 {
    216 	static char letters[] = "0123456789ABCDEF";
    217 	Node *np;
    218 	TUINT v;
    219 	unsigned c;
    220 
    221 	++token;
    222 	if (*token == '"') {
    223 		++token;
    224 		np = node(OSTRING);
    225 		np->type.flags = STRF;
    226 		np->type.size = strlen(token);
    227 		np->type.align = int8type.align;
    228 		np->u.s = xstrdup(token);
    229 	} else {
    230 		np = node(OCONST);
    231 		np->type = *gettype(token++);
    232 		for (v = 0; c = *token++; v += c) {
    233 			v <<= 4;
    234 			c = strchr(letters, c) - letters;
    235 		}
    236 		np->u.i = v;
    237 	}
    238 	push(np);
    239 }
    240 
    241 static void
    242 assign(char *token, union tokenop u)
    243 {
    244 	int subop;
    245 	Node *np = node(u.op);
    246 
    247 	switch (subop = *++token) {
    248 	case '+':
    249 	case '-':
    250 	case '*':
    251 	case '%':
    252 	case '/':
    253 	case 'l':
    254 	case 'r':
    255 	case '&':
    256 	case '|':
    257 	case '^':
    258 	case 'i':
    259 	case 'd':
    260 		++token;
    261 		subop = optbl[subop].u.op;
    262 		break;
    263 	default:
    264 		subop = 0;
    265 		break;
    266 	}
    267 
    268 	np->u.subop = subop;
    269 	np->type = *gettype(token);
    270 	np->right = pop();
    271 	np->left = pop();
    272 	push(np);
    273 }
    274 
    275 static void
    276 ternary(char *token, union tokenop u)
    277 {
    278 	Node *ask = node(OASK), *colon = node(OCOLON);
    279 	Type *tp = gettype(token+1);
    280 
    281 	colon->right = pop();
    282 	colon->left = pop();
    283 
    284 	ask->type = *tp;
    285 	ask->left = pop();
    286 	ask->right = colon;
    287 	push(ask);
    288 }
    289 
    290 static void
    291 eval(char *tok)
    292 {
    293 	struct decoc *dp;
    294 
    295 	do {
    296 		dp = &optbl[*tok];
    297 		if (!dp->parse)
    298 			break;
    299 		(*dp->parse)(tok, dp->u);
    300 	} while (tok = strtok(NULL, "\t\n"));
    301 }
    302 
    303 static int
    304 nextline(void)
    305 {
    306 	static char line[LINESIZ];
    307 	size_t len;
    308 	int c;
    309 	void (*fun)(void);
    310 
    311 repeat:
    312 	++lineno;
    313 	if (!fgets(line, sizeof(line), stdin))
    314 		return 0;
    315 	if ((len = strlen(line)) == 0 || line[0] == '\n')
    316 		goto repeat;
    317 	if (line[len-1] != '\n')
    318 		error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
    319 	line[len-1] = '\0';
    320 
    321 	c = *line;
    322 	eval(strtok(line, "\t\n"));
    323 	if ((fun = *optbl[c].eval) != NULL)
    324 		(*fun)();
    325 	if (sp != stack)
    326 		error(ESTACKA);
    327 	return 1;
    328 }
    329 
    330 static void
    331 oreturn(char *token, union tokenop u)
    332 {
    333 	Node *np = node(u.op);
    334 
    335 	if (token = strtok(NULL, "\t\n"))
    336 		eval(token);
    337 	if (!empty())
    338 		np->left = pop();
    339 	push(np);
    340 }
    341 
    342 /*
    343  * Move np (which is a OCASE/ODEFAULT/OESWITCH) to be contigous with
    344  * the last switch table. It is a bit ugly to touch directly curstmt
    345  * here, but moving this function to node.c is worse, because we are
    346  * putting knowledge of how the text is parsed into the node
    347  * represtation module.
    348  */
    349 static void
    350 waft(Node *np)
    351 {
    352 	Node *lastcase, *next;
    353 	struct swtch *cur;
    354 	extern Node *curstmt;
    355 
    356 	if (swp == swtbl)
    357 		error(EWTACKU);
    358 
    359 	cur = swp - 1;
    360 	lastcase = cur->last;
    361 	next = lastcase->next;
    362 
    363 	np->next = next;
    364 	np->prev = lastcase;
    365 
    366 	if (next)
    367 		next->prev = np;
    368 	lastcase->next = np;
    369 
    370 	if (curstmt == cur->last)
    371 		curstmt = np;
    372 	cur->last = np;
    373 	cur->nr++;
    374 }
    375 
    376 static void
    377 bswitch(char *token, union tokenop u)
    378 {
    379 	struct swtch *cur;
    380 	Node *np = node(u.op);
    381 
    382 	if (swp == &swtbl[NR_BLOCK])
    383 		error(EWTACKO);
    384 	cur = swp++;
    385 	cur->nr = 0;
    386 
    387 	eval(strtok(NULL, "\t\n"));
    388 	np->left = pop();
    389 
    390 	push(cur->first = cur->last = np);
    391 }
    392 
    393 static void
    394 eswitch(char *token, union tokenop u)
    395 {
    396 	struct swtch *cur;
    397 
    398 	if (swp == swtbl)
    399 		error(EWTACKU);
    400 	jump(token, u);
    401 	waft(pop());
    402 	cur = --swp;
    403 	cur->first->u.i = cur->nr;
    404 }
    405 
    406 static void
    407 ocase(char *token, union tokenop u)
    408 {
    409 	jump(token, u);
    410 	waft(pop());
    411 }
    412 
    413 static void
    414 jump(char *token, union tokenop u)
    415 {
    416 	Node *aux, *np = node(u.op);
    417 
    418 	eval(strtok(NULL, "\t\n"));
    419 
    420 	if (u.op == OBRANCH || u.op == OCASE)
    421 		np->left = pop();
    422 	aux = pop();
    423 	np->u.sym = aux->u.sym;
    424 	delnode(aux);
    425 	push(np);
    426 }
    427 
    428 static void
    429 loop(char *token, union tokenop u)
    430 {
    431 	push(node(u.op));
    432 }
    433 
    434 static void
    435 unary(char *token, union tokenop u)
    436 {
    437 	Node *np = node(u.op);
    438 
    439 	np->type = *gettype(token+1);
    440 	np->left = pop();
    441 	np->right = NULL;
    442 	push(np);
    443 }
    444 
    445 static void
    446 call(char *token, union tokenop u)
    447 {
    448 	Node *np, *par, *fun = node(u.op);
    449 
    450 	for (par = NULL;; par = np) {
    451 		np = pop();
    452 		if (np->op != OPAR)
    453 			break;
    454 		np->right = par;
    455 	}
    456 
    457 	fun->type = *gettype(token+1);
    458 	fun->left = np;
    459 	fun->right = par;
    460 	push(fun);
    461 }
    462 
    463 static void
    464 builtin(char *token, union tokenop u)
    465 {
    466 	Node *np = node(u.op);
    467 	char *name;
    468 	unsigned subop, nchilds;
    469 
    470 	np->type = *gettype(token+1);
    471 	name = pop();
    472 
    473 	if (!strcmp("__builtin_va_arg", name)) {
    474 		nchilds = 1;
    475 		subop = BVA_ARG;
    476 	} else if (!strcmp("__builtin_va_start", name)) {
    477 		nchilds = 2;
    478 		subop = BVA_START;
    479 	} else if (!strcmp("__builtin_va_end", name)) {
    480 		nchilds = 1;
    481 		subop = BVA_END;
    482 	} else if (!strcmp("__builtin_va_copy", name)) {
    483 		nchilds = 2;
    484 		subop = BVA_COPY;
    485 	} else {
    486 		error(EBBUILT);
    487 	}
    488 
    489 	np->u.subop = subop;
    490 	np->right = (nchilds == 2) ? pop() : NULL;
    491 	np->left = (nchilds != 0) ? pop() : NULL;
    492 
    493 	free(name);
    494 	push(np);
    495 }
    496 
    497 static void
    498 binary(char *token, union tokenop u)
    499 {
    500 	Node *np = node(u.op);
    501 
    502 	np->type = *gettype(token+1);
    503 	np->right = pop();
    504 	np->left = pop();
    505 	push(np);
    506 }
    507 
    508 static void
    509 binit(char *token, union tokenop u)
    510 {
    511 	ininit = 1;
    512 }
    513 
    514 static void
    515 einit(char *token, union tokenop u)
    516 {
    517 	ininit = 0;
    518 	endinit();
    519 }
    520 
    521 static void
    522 endpars(void)
    523 {
    524 	if (!curfun || !inpars)
    525 		error(ESYNTAX);
    526 	inpars = 0;
    527 }
    528 
    529 static void
    530 aggregate(void)
    531 {
    532 	Node *align, *size;
    533 	char *name;
    534 	Type *tp;
    535 	Symbol *sym;
    536 
    537 	align = pop();
    538 	size = pop();
    539 	name = pop();
    540 	tp = pop();
    541 
    542 	tp->size = size->u.i;
    543 	tp->align = align->u.i;
    544 	tp->flags = AGGRF;
    545 	/*
    546 	 * type is the first field of Symbol so we can obtain the
    547 	 * address of the symbol from the address of the type.
    548 	 * We have to do this because composed returns the pointer
    549 	 * to the type, but in this function we also need the
    550 	 * symbol to store the name.
    551 	 */
    552 	sym = (Symbol *) tp;
    553 	sym->name = name;
    554 	deftype(tp);
    555 
    556 	delnode(align);
    557 	delnode(size);
    558 }
    559 
    560 static void
    561 array(void)
    562 {
    563 	Type *tp, *base;
    564 	Node *size;
    565 
    566 	size = pop();
    567 	base = pop();
    568 	tp = pop();
    569 	tp->flags = ARRF;
    570 
    571 	if (size->u.i > LONG_MAX/base->size)
    572 		error(EOVERFL);
    573 
    574 	tp->size = size->u.i * base->size;
    575 	tp->align = base->align;
    576 
    577 	delnode(size);
    578 }
    579 
    580 static void
    581 decl(Symbol *sym)
    582 {
    583 	Type *tp = &sym->type;
    584 
    585 	if (tp->flags & FUNF) {
    586 		lastfun = sym;
    587 	} else {
    588 		switch (sym->kind) {
    589 		case SEXTRN:
    590 		case SGLOB:
    591 		case SPRIV:
    592 		case SLOCAL:
    593 			defglobal(sym);
    594 			break;
    595 		case SAUTO:
    596 		case SREG:
    597 			if (!curfun)
    598 				error(ESYNTAX);
    599 			((inpars) ? defpar : defvar)(sym);
    600 			break;
    601 		default:
    602 			abort();
    603 		}
    604 	}
    605 }
    606 
    607 static void
    608 vardecl(void)
    609 {
    610 	Type *tp, *rp;
    611 	Node *np;
    612 	Symbol *sym;
    613 	char *name;
    614 
    615 	name = pop();
    616 	tp = pop();
    617 	if (tp->flags & FUNF)
    618 		rp = pop();
    619 	np = pop();
    620 
    621 	sym = np->u.sym;
    622 	/*
    623 	 * We have to free sym->name because in tentative declarations
    624 	 * we can have multiple declarations of the same symbol, and in
    625 	 * this case our parser will allocate twice the memory
    626 	 */
    627 	free(sym->name);
    628 	sym->name = name;
    629 	sym->type = *tp;
    630 	if (tp->flags & FUNF)
    631 		sym->rtype = *rp;
    632 	sym->kind = sclass;
    633 
    634 	if (ininit)
    635 		sym->type.flags |= INITF;
    636 	decl(sym);
    637 	delnode(np);
    638 }
    639 
    640 static void
    641 flddecl(void)
    642 {
    643 	Node *off, *np;
    644 	char *name;
    645 	Type *tp;
    646 	Symbol *sym;
    647 
    648 	off = pop();
    649 	name = pop();
    650 	tp = pop();
    651 	np = pop();
    652 
    653 	sym = np->u.sym;
    654 	sym->u.off = off->u.i;
    655 	sym->name = name;
    656 	sym->type = *tp;
    657 
    658 	delnode(np);
    659 	delnode(off);
    660 }
    661 
    662 static void
    663 labeldcl(void)
    664 {
    665 	Node *np;
    666 	Symbol *sym;
    667 
    668 	np = pop();
    669 	np->op = ONOP;
    670 	sym = np->u.sym;
    671 	sym->kind = SLABEL;
    672 	sym->u.stmt = np;
    673 	np->label = sym;
    674 	addstmt(np, SETCUR);
    675 }
    676 
    677 static void
    678 stmt(void)
    679 {
    680 	Node *np;
    681 
    682 	if (empty())
    683 		return;
    684 	np = pop();
    685 	if (ininit) {
    686 		data(np);
    687 		deltree(np);
    688 		return;
    689 	}
    690 	addstmt(np, SETCUR);
    691 }
    692 
    693 static void
    694 beginfun(void)
    695 {
    696 	curfun = lastfun;
    697 	inpars = 1;
    698 	pushctx();
    699 	addstmt(node(OBFUN), SETCUR);
    700 }
    701 
    702 static void
    703 endfun(void)
    704 {
    705 	endf = 1;
    706 	addstmt(node(OEFUN), SETCUR);
    707 }
    708 
    709 void
    710 parse(void)
    711 {
    712 	cleannodes();  /* remove code of previous function */
    713 	popctx();  /* remove context of previous function */
    714 	curfun = NULL;
    715 	endf = 0;
    716 
    717 	while (!endf && nextline())
    718 		;
    719 	if (ferror(stdin))
    720 		error(EFERROR, strerror(errno));
    721 }