scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | README | LICENSE

parser.c (14404B)


      1 #include <errno.h>
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 
      6 #include <scc/cstd.h>
      7 #include <scc/scc.h>
      8 
      9 #include "cc2.h"
     10 
     11 #define STACKSIZ     50
     12 
     13 extern Type int8type, int16type, int32type, int64type,
     14             uint8type, uint16type, uint32type, uint64type,
     15             float32type, float64type, float80type,
     16             booltype,
     17             ptrtype,
     18             voidtype,
     19             arg_type;
     20 
     21 Type funetype = {
     22 	.flags = FUNF | ELLIPS
     23 };
     24 
     25 Type funtype = {
     26 	.flags = FUNF
     27 };
     28 
     29 union tokenop {
     30 	void *arg;
     31 	unsigned op;
     32 };
     33 
     34 struct swtch {
     35 	int nr;
     36 	Node *first;
     37 	Node *last;
     38 };
     39 
     40 static struct swtch swtbl[NR_BLOCK], *swp = swtbl;
     41 static Symbol *lastfun;
     42 
     43 typedef void parsefun(char *, union tokenop);
     44 static parsefun type, symbol, getname, unary, binary, ternary, call,
     45                 constant, composed, binit, einit,
     46                 jump, oreturn, loop, assign,
     47                 ocase, bswitch, eswitch, builtin;
     48 
     49 typedef void evalfun(void);
     50 static evalfun vardecl, beginfun, endfun, endpars, stmt,
     51                array, aggregate, flddecl, labeldcl;
     52 
     53 static struct decoc {
     54 	void (*eval)(void);
     55 	void (*parse)(char *token, union tokenop);
     56 	union tokenop u;
     57 } optbl[] = {      /*  eval     parse           args */
     58 	['A']   = {  vardecl,  symbol, .u.op  =  SAUTO<<8 | OAUTO},
     59 	['R']   = {  vardecl,  symbol, .u.op  =   SREG<<8 |  OREG},
     60 	['G']   = {  vardecl,  symbol, .u.op  =  SGLOB<<8 |  OMEM},
     61 	['X']   = {  vardecl,  symbol, .u.op  = SEXTRN<<8 |  OMEM},
     62 	['Y']   = {  vardecl,  symbol, .u.op  =  SPRIV<<8 |  OMEM},
     63 	['T']   = {  vardecl,  symbol, .u.op  = SLOCAL<<8 |  OMEM},
     64 	['M']   = {  flddecl,  symbol, .u.op  =  SMEMB<<8 |  OMEM},
     65 	['L']   = { labeldcl,  symbol, .u.op  = SLABEL<<8 | OLABEL},
     66 
     67 	['C']   = {     NULL,    type, .u.arg =    &int8type},
     68 	['I']   = {     NULL,    type, .u.arg =   &int16type},
     69 	['W']   = {     NULL,    type, .u.arg =   &int32type},
     70 	['Q']   = {     NULL,    type, .u.arg =   &int64type},
     71 	['K']   = {     NULL,    type, .u.arg =   &uint8type},
     72 	['N']   = {     NULL,    type, .u.arg =  &uint16type},
     73 	['Z']   = {     NULL,    type, .u.arg =  &uint32type},
     74 	['O']   = {     NULL,    type, .u.arg =  &uint64type},
     75 	['J']   = {     NULL,    type, .u.arg = &float32type},
     76 	['D']   = {     NULL,    type, .u.arg = &float64type},
     77 	['H']   = {     NULL,    type, .u.arg = &float80type},
     78 	['0']   = {     NULL,    type, .u.arg =    &voidtype},
     79 	['B']   = {     NULL,    type, .u.arg =    &booltype},
     80 	['P']   = {     NULL,    type, .u.arg =     &ptrtype},
     81 	['E']   = {     NULL,    type, .u.arg =    &funetype},
     82 	['1']	= {     NULL,    type, .u.arg =    &arg_type},
     83 
     84 	['F']   = {     NULL,    type, .u.arg =     &funtype},
     85 	['V']   = {    array,composed,                     0},
     86 	['U']   = {aggregate,composed,                     0},
     87 	['S']   = {aggregate,composed,                     0},
     88 
     89 	['"']   = {     NULL, getname,                     0},
     90 	['{']   = { beginfun,    NULL,                     0},
     91 	['}']   = {   endfun,    NULL,                     0},
     92 	['(']   = {     NULL,   binit,                     0},
     93 	[')']   = {     NULL,   einit,                     0},
     94 	['\\']  = {  endpars,    NULL,                     0},
     95 	['\t']  = {     stmt,    NULL,                     0},
     96 
     97 	['~']   = {     NULL,   unary, .u.op =          OCPL},
     98 	['_']   = {     NULL,   unary, .u.op =         OSNEG},
     99 	['\'']  = {     NULL,   unary, .u.op =         OADDR},
    100 	['@']   = {     NULL,   unary, .u.op =          OPTR},
    101 	['g']   = {     NULL,   unary, .u.op =         OCAST},
    102 	['p']   = {     NULL,   unary, .u.op =          OPAR},
    103 	['n']   = {     NULL,   unary, .u.op =          ONEG},
    104 
    105 	['a']   = {     NULL,  binary, .u.op =          OAND},
    106 	['o']   = {     NULL,  binary, .u.op =           OOR},
    107 	['.']   = {     NULL,  binary, .u.op =        OFIELD},
    108 	['+']   = {     NULL,  binary, .u.op =          OADD},
    109 	['-']   = {     NULL,  binary, .u.op =          OSUB},
    110 	['*']   = {     NULL,  binary, .u.op =          OMUL},
    111 	['%']   = {     NULL,  binary, .u.op =          OMOD},
    112 	['/']   = {     NULL,  binary, .u.op =          ODIV},
    113 	['l']   = {     NULL,  binary, .u.op =          OSHL},
    114 	['r']   = {     NULL,  binary, .u.op =          OSHR},
    115 	['<']   = {     NULL,  binary, .u.op =           OLT},
    116 	['>']   = {     NULL,  binary, .u.op =           OGT},
    117 	['[']   = {     NULL,  binary, .u.op =           OLE},
    118 	[']']   = {     NULL,  binary, .u.op =           OGE},
    119 	['=']   = {     NULL,  binary, .u.op =           OEQ},
    120 	['!']   = {     NULL,  binary, .u.op =           ONE},
    121 	['&']   = {     NULL,  binary, .u.op =         OBAND},
    122 	['|']   = {     NULL,  binary, .u.op =          OBOR},
    123 	['^']   = {     NULL,  binary, .u.op =         OBXOR},
    124 	[',']   = {     NULL,  binary, .u.op =        OCOMMA},
    125 	['m']   = {     NULL,  builtin,.u.op =      OBUILTIN},
    126 
    127 	[':']   = {     NULL,  assign, .u.op =        OASSIG},
    128 	['?']   = {     NULL, ternary, .u.op =          OASK},
    129 	['c']   = {     NULL,    call, .u.op =         OCALL},
    130 	['z']   = {     NULL,    call, .u.op =        OCALLE},
    131 
    132 	['#']   = {     NULL,constant, .u.op =        OCONST},
    133 
    134 	['j']   = {     NULL,    jump, .u.op =          OJMP},
    135 	['y']   = {     NULL,    jump, .u.op =       OBRANCH},
    136 	['h']   = {     NULL, oreturn, .u.op =          ORET},
    137 	['i']   = {     NULL,    NULL, .u.op =          OINC},
    138 	['d']   = {     NULL,    NULL, .u.op =          ODEC},
    139 
    140 	['b']   = {     NULL,    loop, .u.op =        OBLOOP},
    141 	['e']   = {     NULL,    loop, .u.op =        OELOOP},
    142 
    143 	['v']   = {     NULL,   ocase, .u.op =         OCASE},
    144 	['f']   = {     NULL,   ocase, .u.op =      ODEFAULT},
    145 	['t']   = {     NULL, eswitch, .u.op =      OESWITCH},
    146 	['s']   = {     NULL, bswitch, .u.op =      OBSWITCH},
    147 };
    148 
    149 static int sclass, inpars, ininit, endf, lineno;
    150 static void *stack[STACKSIZ], **sp = stack;
    151 
    152 static Node *
    153 push(void *elem)
    154 {
    155 	if (sp == &stack[STACKSIZ])
    156 		error(ESTACKO);
    157 	return *sp++ = elem;
    158 }
    159 
    160 static void *
    161 pop(void)
    162 {
    163 	if (sp == stack)
    164 		error(ESTACKU);
    165 	return *--sp;
    166 }
    167 
    168 static int
    169 empty(void)
    170 {
    171 	return sp == stack;
    172 }
    173 
    174 static void
    175 type(char *token, union tokenop u)
    176 {
    177 	push(u.arg);
    178 }
    179 
    180 static void
    181 composed(char *token, union tokenop u)
    182 {
    183 	Symbol *sym;
    184 
    185 	sym = getsym(atoi(token+1));
    186 	push(&sym->type);
    187 }
    188 
    189 static void
    190 getname(char *t, union tokenop u)
    191 {
    192 	push((*++t) ? xstrdup(t) : NULL);
    193 }
    194 
    195 static void
    196 symbol(char *token, union tokenop u)
    197 {
    198 	Node *np = node(u.op & 0xFF);
    199 	Symbol *sym = getsym(atoi(token+1));
    200 
    201 	sclass = u.op >> 8;
    202 	np->u.sym = sym;
    203 	np->type = sym->type;
    204 	push(np);
    205 }
    206 
    207 static Type *
    208 gettype(char *token)
    209 {
    210 	struct decoc *dp;
    211 
    212 	dp = &optbl[*token];
    213 	if (!dp->parse)
    214 		error(ESYNTAX);
    215 	(*dp->parse)(token, dp->u);
    216 	return pop();
    217 }
    218 
    219 static void
    220 constant(char *token, union tokenop u)
    221 {
    222 	static char letters[] = "0123456789ABCDEF";
    223 	Node *np;
    224 	TUINT v;
    225 	unsigned c;
    226 
    227 	++token;
    228 	if (*token == '"') {
    229 		++token;
    230 		np = node(OSTRING);
    231 		np->type.flags = STRF;
    232 		np->type.size = strlen(token);
    233 		np->type.align = int8type.align;
    234 		np->u.s = xstrdup(token);
    235 	} else {
    236 		np = node(OCONST);
    237 		np->type = *gettype(token++);
    238 		for (v = 0; c = *token++; v += c) {
    239 			v <<= 4;
    240 			c = strchr(letters, c) - letters;
    241 		}
    242 		np->u.i = v;
    243 	}
    244 	push(np);
    245 }
    246 
    247 static void
    248 assign(char *token, union tokenop u)
    249 {
    250 	int subop;
    251 	Node *np = node(u.op);
    252 
    253 	switch (subop = *++token) {
    254 	case '+':
    255 	case '-':
    256 	case '*':
    257 	case '%':
    258 	case '/':
    259 	case 'l':
    260 	case 'r':
    261 	case '&':
    262 	case '|':
    263 	case '^':
    264 	case 'i':
    265 	case 'd':
    266 		++token;
    267 		subop = optbl[subop].u.op;
    268 		break;
    269 	default:
    270 		subop = 0;
    271 		break;
    272 	}
    273 
    274 	np->u.subop = subop;
    275 	np->type = *gettype(token);
    276 	np->right = pop();
    277 	np->left = pop();
    278 	push(np);
    279 }
    280 
    281 static void
    282 ternary(char *token, union tokenop u)
    283 {
    284 	Node *ask = node(OASK), *colon = node(OCOLON);
    285 	Type *tp = gettype(token+1);
    286 
    287 	colon->right = pop();
    288 	colon->left = pop();
    289 
    290 	ask->type = *tp;
    291 	ask->left = pop();
    292 	ask->right = colon;
    293 	push(ask);
    294 }
    295 
    296 static void
    297 eval(char *tok)
    298 {
    299 	struct decoc *dp;
    300 
    301 	do {
    302 		dp = &optbl[*tok];
    303 		if (!dp->parse)
    304 			break;
    305 		(*dp->parse)(tok, dp->u);
    306 	} while (tok = strtok(NULL, "\t\n"));
    307 }
    308 
    309 static int
    310 nextline(void)
    311 {
    312 	static char line[LINESIZ];
    313 	size_t len;
    314 	int c;
    315 	void (*fun)(void);
    316 
    317 repeat:
    318 	++lineno;
    319 	if (!fgets(line, sizeof(line), stdin))
    320 		return 0;
    321 	if ((len = strlen(line)) == 0 || line[0] == '\n')
    322 		goto repeat;
    323 	if (line[len-1] != '\n')
    324 		error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
    325 	line[len-1] = '\0';
    326 
    327 	c = *line;
    328 	eval(strtok(line, "\t\n"));
    329 	if ((fun = *optbl[c].eval) != NULL)
    330 		(*fun)();
    331 	if (sp != stack)
    332 		error(ESTACKA);
    333 	return 1;
    334 }
    335 
    336 static void
    337 oreturn(char *token, union tokenop u)
    338 {
    339 	Node *np = node(u.op);
    340 
    341 	if (token = strtok(NULL, "\t\n"))
    342 		eval(token);
    343 	if (!empty())
    344 		np->left = pop();
    345 	push(np);
    346 }
    347 
    348 /*
    349  * Move np (which is a OCASE/ODEFAULT/OESWITCH) to be contigous with
    350  * the last switch table. It is a bit ugly to touch directly curstmt
    351  * here, but moving this function to node.c is worse, because we are
    352  * putting knowledge of how the text is parsed into the node
    353  * represtation module.
    354  */
    355 static void
    356 waft(Node *np)
    357 {
    358 	Node *lastcase, *next;;
    359 	struct swtch *cur;
    360 	extern Node *curstmt;
    361 
    362 	if (swp == swtbl)
    363 		error(EWTACKU);
    364 
    365 	cur = swp - 1;
    366 	lastcase = cur->last;
    367 	next = lastcase->next;
    368 
    369 	np->next = next;
    370 	np->prev = lastcase;
    371 
    372 	if (next)
    373 		next->prev = np;
    374 	lastcase->next = np;
    375 
    376 	if (curstmt == cur->last)
    377 		curstmt = np;
    378 	cur->last = np;
    379 	cur->nr++;
    380 }
    381 
    382 static void
    383 bswitch(char *token, union tokenop u)
    384 {
    385 	struct swtch *cur;
    386 	Node *np = node(u.op);
    387 
    388 	if (swp == &swtbl[NR_BLOCK])
    389 		error(EWTACKO);
    390 	cur = swp++;
    391 	cur->nr = 0;
    392 
    393 	eval(strtok(NULL, "\t\n"));
    394 	np->left = pop();
    395 
    396 	push(cur->first = cur->last = np);
    397 }
    398 
    399 static void
    400 eswitch(char *token, union tokenop u)
    401 {
    402 	struct swtch *cur;
    403 
    404 	if (swp == swtbl)
    405 		error(EWTACKU);
    406 	jump(token, u);
    407 	waft(pop());
    408 	cur = --swp;
    409 	cur->first->u.i = cur->nr;
    410 }
    411 
    412 static void
    413 ocase(char *token, union tokenop u)
    414 {
    415 	jump(token, u);
    416 	waft(pop());
    417 }
    418 
    419 static void
    420 jump(char *token, union tokenop u)
    421 {
    422 	Node *aux, *np = node(u.op);
    423 
    424 	eval(strtok(NULL, "\t\n"));
    425 
    426 	if (u.op == OBRANCH || u.op == OCASE)
    427 		np->left = pop();
    428 	aux = pop();
    429 	np->u.sym = aux->u.sym;
    430 	delnode(aux);
    431 	push(np);
    432 }
    433 
    434 static void
    435 loop(char *token, union tokenop u)
    436 {
    437 	push(node(u.op));
    438 }
    439 
    440 static void
    441 unary(char *token, union tokenop u)
    442 {
    443 	Node *np = node(u.op);
    444 
    445 	np->type = *gettype(token+1);
    446 	np->left = pop();
    447 	np->right = NULL;
    448 	push(np);
    449 }
    450 
    451 static void
    452 call(char *token, union tokenop u)
    453 {
    454 	Node *np, *par, *fun = node(u.op);
    455 
    456 	for (par = NULL;; par = np) {
    457 		np = pop();
    458 		if (np->op != OPAR)
    459 			break;
    460 		np->right = par;
    461 	}
    462 
    463 	fun->type = *gettype(token+1);
    464 	fun->left = np;
    465 	fun->right = par;
    466 	push(fun);
    467 }
    468 
    469 static void
    470 builtin(char *token, union tokenop u)
    471 {
    472 	Node *np = node(u.op);
    473 	char *name;
    474 	unsigned subop, nchilds;
    475 
    476 	np->type = *gettype(token+1);
    477 	name = pop();
    478 
    479 	if (!strcmp("__builtin_va_arg", name)) {
    480 		nchilds = 1;
    481 		subop = BVA_ARG;
    482 	} else if (!strcmp("__builtin_va_start", name)) {
    483 		nchilds = 2;
    484 		subop = BVA_START;
    485 	} else if (!strcmp("__builtin_va_end", name)) {
    486 		nchilds = 1;
    487 		subop = BVA_END;
    488 	} else if (!strcmp("__builtin_va_copy", name)) {
    489 		nchilds = 2;
    490 		subop = BVA_COPY;
    491 	} else {
    492 		error(EBBUILT);;
    493 	}
    494 
    495 	np->u.subop = subop;
    496 	np->right = (nchilds == 2) ? pop() : NULL;
    497 	np->left = (nchilds != 0) ? pop() : NULL;
    498 
    499 	free(name);
    500 	push(np);
    501 }
    502 
    503 static void
    504 binary(char *token, union tokenop u)
    505 {
    506 	Node *np = node(u.op);
    507 
    508 	np->type = *gettype(token+1);
    509 	np->right = pop();
    510 	np->left = pop();
    511 	push(np);
    512 }
    513 
    514 static void
    515 binit(char *token, union tokenop u)
    516 {
    517 	ininit = 1;
    518 }
    519 
    520 static void
    521 einit(char *token, union tokenop u)
    522 {
    523 	ininit = 0;
    524 	endinit();
    525 }
    526 
    527 static void
    528 endpars(void)
    529 {
    530 	if (!curfun || !inpars)
    531 		error(ESYNTAX);
    532 	inpars = 0;
    533 }
    534 
    535 static void
    536 aggregate(void)
    537 {
    538 	Node *align, *size;
    539 	char *name;
    540 	Type *tp;
    541 	Symbol *sym;
    542 
    543 	align = pop();
    544 	size = pop();
    545 	name = pop();
    546 	tp = pop();
    547 
    548 	tp->size = size->u.i;
    549 	tp->align = align->u.i;
    550 	tp->flags = AGGRF;
    551 	/*
    552 	 * type is the first field of Symbol so we can obtain the
    553 	 * address of the symbol from the address of the type.
    554 	 * We have to do this because composed returns the pointer
    555 	 * to the type, but in this function we also need the
    556 	 * symbol to store the name.
    557 	 */
    558 	sym = (Symbol *) tp;
    559 	sym->name = name;
    560 
    561 	delnode(align);
    562 	delnode(size);
    563 }
    564 
    565 static void
    566 array(void)
    567 {
    568 	Type *tp, *base;
    569 	Node *size;
    570 
    571 	size = pop();
    572 	base = pop();
    573 	tp = pop();
    574 	tp->size = size->u.i * base->size; /* FIXME check for overflow */
    575 	tp->align = base->align;
    576 
    577 	delnode(size);
    578 }
    579 
    580 static void
    581 decl(Symbol *sym)
    582 {
    583 	Type *tp = &sym->type;
    584 
    585 	if (tp->flags & FUNF) {
    586 		lastfun = sym;
    587 	} else {
    588 		switch (sym->kind) {
    589 		case SEXTRN:
    590 		case SGLOB:
    591 		case SPRIV:
    592 		case SLOCAL:
    593 			defglobal(sym);
    594 			break;
    595 		case SAUTO:
    596 		case SREG:
    597 			if (!curfun)
    598 				error(ESYNTAX);
    599 			((inpars) ? defpar : defvar)(sym);
    600 			break;
    601 		default:
    602 			abort();
    603 		}
    604 	}
    605 }
    606 
    607 static void
    608 vardecl(void)
    609 {
    610 	Type *tp, *rp;
    611 	Node *np;
    612 	Symbol *sym;
    613 	char *name;
    614 
    615 	name = pop();
    616 	tp = pop();
    617 	if (tp->flags & FUNF)
    618 		rp = pop();
    619 	np = pop();
    620 
    621 	sym = np->u.sym;
    622 	/*
    623 	 * We have to free sym->name because in tentative declarations
    624 	 * we can have multiple declarations of the same symbol, and in
    625 	 * this case our parser will allocate twice the memory
    626 	 */
    627 	free(sym->name);
    628 	sym->name = name;
    629 	sym->type = *tp;
    630 	if (tp->flags & FUNF)
    631 		sym->rtype = *rp;
    632 	sym->kind = sclass;
    633 
    634 	if (ininit)
    635 		sym->type.flags |= INITF;
    636 	decl(sym);
    637 	delnode(np);
    638 }
    639 
    640 static void
    641 flddecl(void)
    642 {
    643 	Node *off, *np;
    644 	char *name;
    645 	Type *tp;
    646 	Symbol *sym;
    647 
    648 	off = pop();
    649 	name = pop();
    650 	tp = pop();
    651 	np = pop();
    652 
    653 	sym = np->u.sym;
    654 	sym->u.off = off->u.i;
    655 	sym->name = name;
    656 	sym->type = *tp;
    657 
    658 	delnode(np);
    659 	delnode(off);
    660 }
    661 
    662 static void
    663 labeldcl(void)
    664 {
    665 	Node *np;
    666 	Symbol *sym;
    667 
    668 	np = pop();
    669 	np->op = ONOP;
    670 	sym = np->u.sym;
    671 	sym->kind = SLABEL;
    672 	sym->u.stmt = np;
    673 	np->label = sym;
    674 	addstmt(np, SETCUR);
    675 }
    676 
    677 static void
    678 stmt(void)
    679 {
    680 	Node *np;
    681 
    682 	if (empty())
    683 		return;
    684 	np = pop();
    685 	if (ininit) {
    686 		data(np);
    687 		deltree(np);
    688 		return;
    689 	}
    690 	addstmt(np, SETCUR);
    691 }
    692 
    693 static void
    694 beginfun(void)
    695 {
    696 	curfun = lastfun;
    697 	inpars = 1;
    698 	pushctx();
    699 	addstmt(node(OBFUN), SETCUR);
    700 }
    701 
    702 static void
    703 endfun(void)
    704 {
    705 	endf = 1;
    706 	addstmt(node(OEFUN), SETCUR);
    707 }
    708 
    709 void
    710 parse(void)
    711 {
    712 	cleannodes();  /* remove code of previous function */
    713 	popctx();  /* remove context of previous function */
    714 	curfun = NULL;
    715 	endf = 0;
    716 
    717 	while (!endf && nextline())
    718 		;
    719 	if (ferror(stdin))
    720 		error(EFERROR, strerror(errno));
    721 }