scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

lex.c (18724B)


      1 #include <assert.h>
      2 #include <ctype.h>
      3 #include <errno.h>
      4 #include <limits.h>
      5 #include <setjmp.h>
      6 #include <stdio.h>
      7 #include <stdlib.h>
      8 #include <string.h>
      9 
     10 #include <scc/cstd.h>
     11 #include <scc/scc.h>
     12 #include "cc1.h"
     13 
     14 #define REPLACECHAR 0xFFFD
     15 #define NOMULTICHAR 0
     16 #define MULTICHAR   1
     17 #define UTF8_MAX    4
     18 
     19 int yytoken;
     20 struct yystype yylval;
     21 char yytext[STRINGSIZ+3];
     22 unsigned short yylen;
     23 int lexmode = CCMODE;
     24 unsigned lineno;
     25 char filenam[FILENAME_MAX];
     26 
     27 int namespace = NS_IDEN;
     28 static int safe;
     29 Input *input;
     30 
     31 void
     32 setloc(char *fname, unsigned line)
     33 {
     34 	size_t len;
     35 
     36 	if (fname) {
     37 		if ((len = strlen(fname)) >= FILENAME_MAX)
     38 			die("cc1: %s: file name too long", fname);
     39 		memmove(filenam, fname, len);
     40 		filenam[len] = '\0';
     41 
     42 		/*
     43 		 * There are cases where we want to call setloc()
     44 		 * with the data in input, and then we have t be
     45 		 * careful about freeing input->filenam
     46 		 */
     47 		if (fname != input->filenam) {
     48 			free(input->filenam);
     49 			input->filenam = xstrdup(fname);
     50 		}
     51 	}
     52 
     53 	lineno = input->lineno = line;
     54 }
     55 
     56 int
     57 addinput(int type, void *arg, int fail)
     58 {
     59 	FILE *fp;
     60 	char *extp, *fname, *buffer, *infile;
     61 	int infileln;
     62 	Macro *mp;
     63 	Symbol *sym;
     64 	Input *newip, *curip = input;
     65 
     66 	if (curip)
     67 		curip->lineno = lineno;
     68 
     69 	switch (type) {
     70 	case IMACRO:
     71 		fp = NULL;
     72 		mp = arg;
     73 		sym = mp->sym;
     74 		fname = mp->fname;
     75 		buffer = mp->buffer;
     76 		DBG("INPUT: expanding macro %s", sym->name);
     77 		break;
     78 	case IPARAM:
     79 		fp = NULL;
     80 		mp = NULL;
     81 		buffer = arg;
     82 		fname = filenam;
     83 		DBG("INPUT: macro parameter '%s'", buffer);
     84 		break;
     85 	case IFILE:
     86 		fname = arg;
     87 		mp = NULL;
     88 		buffer = NULL;
     89 
     90 		if ((fp = fopen(fname, "r")) == NULL) {
     91 			if (!fail)
     92 				return 0;
     93 			die("cc1: %s: %s", fname, strerror(errno));
     94 		}
     95 		if (curip && onlyheader) {
     96 			infile = curip->filenam;
     97 			infileln = strlen(infile);
     98 			if (extp = strrchr(infile, '.'))
     99 				infileln -= strlen(extp);
    100 			printf("%.*s.o: %s %s\n",
    101 			       infileln, infile, infile, fname);
    102 		}
    103 		lineno = 0;
    104 		DBG("INPUT: file input '%s'", fname);
    105 		break;
    106 	case ISTDIN:
    107 		fp = stdin;
    108 		mp = NULL;
    109 		fname = "<stdin>";
    110 		buffer = NULL;
    111 		lineno = 0;
    112 		DBG("INPUT: file input 'stdin'");
    113 		break;
    114 	default:
    115 		abort();
    116 	}
    117 
    118 	if (!buffer) {
    119 		buffer = xmalloc(INPUTSIZ);
    120 		buffer[0] = '\0';
    121 	} else {
    122 		buffer = xstrdup(buffer);
    123 	}
    124 
    125 	newip = xmalloc(sizeof(*newip));
    126 	newip->next = curip;
    127 	newip->macro = mp;
    128 	newip->p = newip->begin = newip->line = buffer;
    129 	newip->filenam = NULL;
    130 	newip->lineno = 0;
    131 	newip->fp = fp;
    132 	newip->flags = type;
    133 	input = newip;
    134 
    135 	setloc(fname, lineno);
    136 	return 1;
    137 }
    138 
    139 void
    140 delinput(void)
    141 {
    142 	Input *ip = input;
    143 
    144 	switch (ip->flags & ITYPE) {
    145 	case IFILE:
    146 		DBG("INPUT: file finished '%s'", ip->filenam);
    147 		if (fclose(ip->fp))
    148 			die("cc1: %s: %s", ip->filenam, strerror(errno));
    149 		break;
    150 	case IMACRO:
    151 		DBG("INPUT: macro %s finished", ip->macro->sym->name);
    152 		delmacro(ip->macro);
    153 		break;
    154 	case IPARAM:
    155 		DBG("INPUT: macro param finished");
    156 		break;
    157 	case ISTDIN:
    158 		DBG("INPUT: stdin finished");
    159 		break;
    160 	default:
    161 		abort();
    162 	}
    163 
    164 	input = ip->next;
    165 	free(ip->filenam);
    166 	free(ip->line);
    167 	free(ip);
    168 	if (input)
    169 		setloc(input->filenam, input->lineno);
    170 }
    171 
    172 static void
    173 newline(void)
    174 {
    175 	if (++lineno == 0)
    176 		die("cc1: %s: file too long", filenam);
    177 }
    178 
    179 /*
    180  * Read the next character from the input file, counting number of lines
    181  * and joining lines escaped with \
    182  */
    183 static int
    184 readchar(void)
    185 {
    186 	FILE *fp = input->fp;
    187 	int c;
    188 
    189 repeat:
    190 	switch (c = getc(fp)) {
    191 	case '\\':
    192 		if ((c = getc(fp)) == '\n') {
    193 			newline();
    194 			goto repeat;
    195 		}
    196 		ungetc(c, fp);
    197 		c = '\\';
    198 		break;
    199 	case '\n':
    200 		newline();
    201 		break;
    202 	case EOF:
    203 		break;
    204 	}
    205 
    206 	return c;
    207 }
    208 
    209 /*
    210  * discard a C comment. This function is only called from readline
    211  * because it is impossible to have a comment in a macro, because
    212  * comments are always discarded before processing any cpp directive
    213  */
    214 static void
    215 comment(int type)
    216 {
    217 	int c;
    218 
    219 	c = readchar();
    220 repeat:
    221 	for ( ; c != EOF && c != type; c = readchar())
    222 		;
    223 
    224 	if (c == EOF) {
    225 		errorp("unterminated comment");
    226 		return;
    227 	}
    228 
    229 	if (type == '*' && (c = readchar()) != '/')
    230 		goto repeat;
    231 }
    232 
    233 /*
    234  * readline is used to read a full logic line from a file.
    235  * It discards comments and check that the line fits in
    236  * the input buffer
    237  */
    238 static int
    239 readline(void)
    240 {
    241 	char *bp, *lim;
    242 	int c, peekc = 0, delim = 0;
    243 
    244 	if (feof(input->fp)) {
    245 		input->flags |= IEOF;
    246 		*input->p = '\0';
    247 		return 0;
    248 	}
    249 
    250 	*input->line = '\0';
    251 	lim = &input->line[INPUTSIZ-1];
    252 	for (bp = input->line; bp < lim-1; *bp++ = c) {
    253 		c = (peekc) ? peekc : readchar();
    254 		peekc = 0;
    255 		if (c == '\n' || c == EOF)
    256 			break;
    257 		if (c == '\\') {
    258 			peekc = readchar();
    259 			if (peekc == '\n' || peekc == EOF)
    260 				continue;
    261 			if (bp == lim-2)
    262 				break;
    263 			*bp++ = c;
    264 			c = peekc;
    265 			peekc = 0;
    266 			continue;
    267 		}
    268 
    269 		if (delim && c == delim)
    270 			delim = 0;
    271 		else if (!delim && (c == '"' || c == '\''))
    272 			delim = c;
    273 		if (c != '/' || delim)
    274 			continue;
    275 
    276 		/* check for /* or // */
    277 		peekc = readchar();
    278 		if (peekc != '*' && peekc != '/')
    279 			continue;
    280 
    281 		if (peekc == '/') {
    282 			comment('\n');
    283 			break;
    284 		} else {
    285 			comment('*');
    286 			c = ' ';
    287 		}
    288 		peekc = 0;
    289 	}
    290 
    291 	input->begin = input->p = input->line;
    292 	if (bp == lim-1) {
    293 		errorp("line too long");
    294 		--bp;
    295 	}
    296 	*bp++ = '\n';
    297 	*bp = '\0';
    298 
    299 	return 1;
    300 }
    301 
    302 /*
    303  * moreinput gets more bytes to be passed to the lexer.
    304  * It can take more bytes from macro expansions or
    305  * directly reading from files. When a cpp directive
    306  * is processed the line is discarded because it must not
    307  * be passed to the lexer
    308  */
    309 int
    310 moreinput(void)
    311 {
    312 	int wasexpand = 0;
    313 
    314 repeat:
    315 	if (!input)
    316 		return 0;
    317 
    318 	if (*input->p == '\0') {
    319 		int t = input->flags & ITYPE;
    320 		if (t == IPARAM) {
    321 			input->flags |= IEOF;
    322 			return 0;
    323 		}
    324 		if (t == IMACRO) {
    325 			wasexpand = 1;
    326 			input->flags |= IEOF;
    327 		}
    328 		if (input->flags & IEOF) {
    329 			delinput();
    330 			goto repeat;
    331 		}
    332 		if (!readline()) {
    333 			*input->p = '\0';
    334 			goto repeat;
    335 		}
    336 		if (cpp())
    337 			goto repeat;
    338 	}
    339 
    340 	if (onlycpp && !wasexpand)
    341 		ppragmaln();
    342 	return 1;
    343 }
    344 
    345 static void
    346 tok2str(void)
    347 {
    348 	if ((yylen = input->p - input->begin) >= sizeof(yytext))
    349 		error("token too big");
    350 	memcpy(yytext, input->begin, yylen);
    351 	yytext[yylen] = '\0';
    352 	input->begin = input->p;
    353 }
    354 
    355 static Symbol *
    356 readint(int base, int sign, Symbol *sym)
    357 {
    358 	char *s = yytext;
    359 	Type *tp = sym->type;
    360 	struct limits *lim;
    361 	unsigned long long u, val, max;
    362 	int c;
    363 
    364 	lim = getlimits(tp);
    365 	max = lim->max.i;
    366 	if (*s == '0')
    367 		++s;
    368 	if (toupper(*s) == 'X')
    369 		++s;
    370 
    371 	for (u = 0; isxdigit(c = *s++); u = u*base + val) {
    372 		static char letters[] = "0123456789ABCDEF";
    373 		val = strchr(letters, toupper(c)) - letters;
    374 	repeat:
    375 		if (u <= max/base && u*base <= max - val)
    376 			continue;
    377 		if (tp->prop & TSIGNED) {
    378 			if (tp == inttype)
    379 				tp = (base==10) ? longtype : uinttype;
    380 			else if (tp == longtype)
    381 				tp = (base==10) ? llongtype : ulongtype;
    382 			else if (tp == llongtype && base != 10)
    383 				tp = ullongtype;
    384 			else
    385 				goto overflow;
    386 		} else {
    387 			if (tp == uinttype)
    388 				tp = (sign==UNSIGNED) ? ulongtype : longtype;
    389 			else if (tp == ulongtype)
    390 				tp = (sign==UNSIGNED) ? ullongtype : llongtype;
    391 			else
    392 				goto overflow;
    393 		}
    394 		sym->type = tp;
    395 		lim = getlimits(tp);
    396 		max = lim->max.i;
    397 		goto repeat;
    398 	}
    399 
    400 	if (tp->prop & TSIGNED)
    401 		sym->u.i = u;
    402 	else
    403 		sym->u.u = u;
    404 
    405 	return sym;
    406 
    407 overflow:
    408 	errorp("overflow in integer constant");
    409 	return sym;
    410 }
    411 
    412 static int
    413 integer(int base)
    414 {
    415 	Type *tp;
    416 	Symbol *sym;
    417 	unsigned size, sign;
    418 
    419 	for (size = sign = 0; ; ++input->p) {
    420 		switch (toupper(*input->p)) {
    421 		case 'L':
    422 			if (size == LLONG)
    423 				goto wrong_type;
    424 			size = (size == LONG) ? LLONG : LONG;
    425 			continue;
    426 		case 'U':
    427 			if (sign == UNSIGNED)
    428 				goto wrong_type;
    429 			sign = UNSIGNED;
    430 			continue;
    431 		default:
    432 			goto convert;
    433 		wrong_type:
    434 			error("invalid suffix in integer constant");
    435 		}
    436 	}
    437 
    438 convert:
    439 	tok2str();
    440 	tp = ctype(INT, sign, size);
    441 	sym = newsym(NS_IDEN, NULL);
    442 	sym->type = tp;
    443 	sym->flags |= SCONSTANT;
    444 	yylval.sym = readint(base, sign, sym);
    445 	return CONSTANT;
    446 }
    447 
    448 static void
    449 digits(int base)
    450 {
    451 	char *p;
    452 	int c;
    453 
    454 	for (p = input->p; c = *p; ++p) {
    455 		switch (base) {
    456 		case 8:
    457 			if (!strchr("01234567", c))
    458 				goto end;
    459 			break;
    460 		case 10:
    461 			if (!isdigit(c))
    462 				goto end;
    463 			break;
    464 		case 16:
    465 			if (!isxdigit(c))
    466 				goto end;
    467 			break;
    468 		}
    469 	}
    470 end:
    471 	input->p = p;
    472 }
    473 
    474 static int
    475 number(void)
    476 {
    477 	Type *tp;
    478 	Symbol *sym;
    479 	int ch, base;
    480 	long double ld;
    481 
    482 	if (*input->p != '0') {
    483 		base = 10;
    484 	} else {
    485 		if (toupper(*++input->p) == 'X') {
    486 			++input->p;
    487 			base = 16;
    488 		} else {
    489 			base = 8;
    490 		}
    491 	}
    492 	digits(base);
    493 
    494 	if (*input->p != '.')
    495 		return integer(base);
    496 
    497 	sym = newsym(NS_IDEN, NULL);
    498 
    499 	ld = strtold(input->begin, &input->p);
    500 	switch (toupper(*input->p)) {
    501 	case 'F':
    502 		++input->p;
    503 		tp = floattype;
    504 		sym->u.f = ld;
    505 		break;
    506 	case 'L':
    507 		++input->p;
    508 		tp = ldoubletype;
    509 		sym->u.ld = ld;
    510 		break;
    511 	default:
    512 		tp = doubletype;
    513 		sym->u.d = ld;
    514 		break;
    515 	}
    516 
    517 	tok2str();
    518 	sym->type = tp;
    519 	sym->flags |= SCONSTANT;
    520 	yylval.sym = sym;
    521 	return CONSTANT;
    522 }
    523 
    524 static Rune
    525 escape(int multi)
    526 {
    527 	Rune c;
    528 	int uni, d, i, cnt, base;
    529 
    530 	switch (*++input->p) {
    531 	case 'a':
    532 		return '\a';
    533 	case 'b':
    534 		return '\b';
    535 	case 'f':
    536 		return '\f';
    537 	case 'n':
    538 		return '\n';
    539 	case 'r':
    540 		return '\r';
    541 	case 't':
    542 		return '\t';
    543 	case 'v':
    544 		return '\v';
    545 	case '"':
    546 		return '"';
    547 	case '\'':
    548 		return '\'';
    549 	case '\\':
    550 		return '\\';
    551 	case '\?':
    552 		return '\?';
    553 	case 'U':
    554 		cnt = 8;
    555 		goto check_universal;
    556 	case 'u':
    557 		cnt = 4;
    558 	check_universal:
    559 		if (!multi)
    560 			warn("multi-character character constant");
    561 		++input->p;
    562 		uni = 1;
    563 		base = 16;
    564 		break;
    565 	case 'x':
    566 		++input->p;
    567 		uni = 0;
    568 		cnt = 2;
    569 		base = 16;
    570 		break;
    571 	case '0':
    572 	case '1':
    573 	case '2':
    574 	case '3':
    575 	case '4':
    576 	case '5':
    577 	case '6':
    578 	case '7':
    579 		uni = 0;
    580 		cnt = 3;
    581 		base = 8;
    582 		break;
    583 	default:
    584 		warn("unknown escape sequence");
    585 		return ' ';
    586 	}
    587 
    588 	for (c = i = 0; i < cnt && isxdigit(*input->p); ++i) {
    589 		static char digits[] = "0123456789ABCDEF";
    590 		char *p = strchr(digits, toupper(*input->p));
    591 
    592 		if (!p || (d = p - digits) > base)
    593 			break;
    594 		c *= base;
    595 		c += d;
    596 		++input->p;
    597 	}
    598 	--input->p;
    599 
    600 	if (base == 16 && i != cnt) {
    601 		if (uni) {
    602 			errorp("incorrect digit for universal character constant");
    603 			c = REPLACECHAR;
    604 		} else {
    605 			errorp("\\x used with no following hex digits");
    606 			c = ' ';
    607 		}
    608 	}
    609 
    610 	if (!uni)
    611 		return c;
    612 
    613 	if (c < 0xa0 && c != 0x24 && c != 0x40 && c != 0x60
    614 	|| c >= 0xD800 && c <= 0xDFFF
    615 	|| c >= 0x110000) {
    616 		errorp("invalid universal character constant");
    617 		c = REPLACECHAR;
    618 	}
    619 
    620 	return c;
    621 }
    622 
    623 static int
    624 validutf8(Rune wc, int *nbytes)
    625 {
    626 	static struct range {
    627 		unsigned long begin, end;
    628 		int valid;
    629 		int nbytes;
    630 	} ranges[] = {
    631 		{0,         0x80,       1, 1},
    632 		{0x80,      0x800,      1, 2},
    633 		{0x800,     0xD800,     1, 3},
    634 		{0xD800,    0xDD00,     0, 3},
    635 		{0xDD00,    0x10000,    1, 3},
    636 		{0x10000,   0x110000,   1, 4},
    637 		{0x110000,  -1ul,       0, 0},
    638 	};
    639 	struct range *bp;
    640 
    641 	for (bp = ranges; bp->begin > wc || bp->end <= wc; ++
    642 bp)
    643 		;
    644 	*nbytes = bp->nbytes;
    645 
    646 	return bp->valid;
    647 }
    648 
    649 static Rune
    650 utf8rune(void)
    651 {
    652 	Rune wc;
    653 	int i, sh, n;
    654 	unsigned oc, c;
    655 	unsigned char *s = (unsigned char *) input->p;
    656 
    657 	/* fast track for ascii */
    658 	if ((c = *s) < 0x80)
    659 		return c;
    660 
    661 	/* out of sequence multibyte? */
    662 	if ((c & 0xc0) != 0xc0)
    663 		goto invalid;
    664 
    665 	sh = 1;
    666 	wc = 0;
    667 	oc = c << 1;
    668 
    669 	for (i = 0; i < UTF8_MAX; ++i) {
    670 		c = s[1];
    671 		if ((c & 0xc0) != 0x80)
    672 			goto invalid;
    673 		++s;
    674 
    675 		wc <<= 6;
    676 		wc |= c & 0x3f;
    677 		oc <<= 1;
    678 		sh++;
    679 
    680 		if ((oc & 0x80) == 0) {
    681 			oc = (oc & 0xff) >> sh;
    682 			wc |= oc << (sh-1) * 6;
    683 
    684 			if (!validutf8(wc, &n) || sh != n)
    685 				goto invalid;
    686 			goto return_code;
    687 		}
    688 	}
    689 
    690 invalid:
    691 	errorp("invalid multibyte sequence");
    692 	wc = REPLACECHAR;
    693 
    694 return_code:
    695 	input->p = s;
    696 	return wc;
    697 }
    698 
    699 static Rune
    700 decode(int multi)
    701 {
    702 	Rune r;
    703 
    704 	if (*input->p == '\\') {
    705 		r = escape(multi);
    706 		return r;
    707 	}
    708 
    709 	return multi ? utf8rune() : *input->p;
    710 }
    711 
    712 static int
    713 character(void)
    714 {
    715 	int i, multi = NOMULTICHAR;
    716 	Rune r;
    717 	Type *tp;
    718 	Symbol *sym;
    719 	long long d;
    720 	struct limits *lim;
    721 
    722 	tp = chartype;
    723 	if (*input->p == 'L') {
    724 		multi = MULTICHAR;
    725 		input->p++;
    726 		tp = wchartype;
    727 	}
    728 
    729 	lim = getlimits(tp);
    730 
    731 	d = 0;
    732 	input->p++;
    733 	for (i = 0; *input->p != '\''; i++) {
    734 		r = decode(multi);
    735 		if (r > lim->max.i)
    736 			warn("character too large for enclosing character literal type");
    737 		d |= r;
    738 		input->p++;
    739 	}
    740 	input->p++;
    741 
    742 	if (i == 0)
    743 		errorp("empty character constant");
    744 	if (i > 1)
    745 		warn("multi-character character constant");
    746 
    747 	if (tp == chartype) {
    748 		if (tp->prop & TSIGNED && d > lim->max.i)
    749 			d -= getlimits(uchartype)->max.i + 1;
    750 		tp = inttype;
    751 	}
    752 
    753 	sym = newsym(NS_IDEN, NULL);
    754 	sym->u.i = d;
    755 	sym->type = tp;
    756 	yylval.sym = sym;
    757 	tok2str();
    758 
    759 	return CONSTANT;
    760 }
    761 
    762 static int
    763 rstring(void)
    764 {
    765 	char *beg = input->p++;
    766 	Rune c, buff[STRINGSIZ+1], *bp = buff;
    767 
    768 	for (++input->p; ; ++input->p) {
    769 		switch (*input->p) {
    770 		case '\0':
    771 			errorp("missing terminating '\"' character");
    772 		case '"':
    773 			goto end_loop;
    774 		}
    775 
    776 		c = decode(MULTICHAR);
    777 		if (input->p - beg == STRINGSIZ + 1) {
    778 			/* too long, ignore everything until next quote */
    779 			for (++input->p; *input->p != '"'; ++input->p) {
    780 				if (*input->p == '\\')
    781 					++input->p;
    782 				if (*input->p == '\0')
    783 					break;
    784 			}
    785 			--bp;
    786 			errorp("string too long");
    787 			break;
    788 		}
    789 		*bp++ = c;
    790 	}
    791 
    792 end_loop:
    793 	*bp++ = '\0';
    794 	input->p++;
    795 
    796 	yylval.sym = newrstring(buff, bp - buff);
    797 	tok2str();
    798 
    799 	return STRING;
    800 }
    801 
    802 static int
    803 sstring(void)
    804 {
    805 	int c;
    806 	struct limits *lim;
    807 	char buff[STRINGSIZ+1], *bp = buff, *beg = input->p;
    808 
    809 	for (++input->p; ; ++input->p) {
    810 		switch (*input->p) {
    811 		case '\0':
    812 			errorp("missing terminating '\"' character");
    813 		case '"':
    814 			goto end_loop;
    815 		}
    816 
    817 		c = decode(NOMULTICHAR);
    818 		if (input->p - beg == STRINGSIZ + 1) {
    819 			/* too long, ignore everything until next quote */
    820 			for (++input->p; *input->p != '"'; ++input->p) {
    821 				if (*input->p == '\\')
    822 					++input->p;
    823 				if (*input->p == '\0')
    824 					break;
    825 			}
    826 			--bp;
    827 			errorp("string too long");
    828 			break;
    829 		}
    830 		*bp++ = c;
    831 	}
    832 
    833 end_loop:
    834 	*bp++ = '\0';
    835 	input->p++;
    836 
    837 	yylval.sym = newstring(buff, bp - buff);
    838 	tok2str();
    839 
    840 	return STRING;
    841 }
    842 
    843 static int
    844 string(void)
    845 {
    846 	if (*input->p == 'L')
    847 		return rstring();
    848 	return sstring();
    849 }
    850 
    851 static int
    852 iden(void)
    853 {
    854 	Symbol *sym;
    855 	char *p, *begin;
    856 
    857 	if (input->p[0] == 'L' && input->p[1] == '\'')
    858 		return character();
    859 
    860 	if (input->p[0] == 'L' && input->p[1] == '"')
    861 		return string();
    862 
    863 	begin = input->p;
    864 	for (p = begin; isalnum(*p) || *p == '_'; ++p)
    865 		;
    866 	input->p = p;
    867 	tok2str();
    868 	if ((sym = lookup(NS_CPP, yytext, NOALLOC)) != NULL) {
    869 		if (expand(sym))
    870 			return next();
    871 	}
    872 	sym = lookup(namespace, yytext, ALLOC);
    873 	yylval.sym = sym;
    874 	if (sym->flags & SCONSTANT)
    875 		return CONSTANT;
    876 	if (sym->token != IDEN)
    877 		yylval.token = sym->u.token;
    878 	return sym->token;
    879 }
    880 
    881 static int
    882 follow(int expect, int ifyes, int ifno)
    883 {
    884 	if (*input->p++ == expect)
    885 		return ifyes;
    886 	--input->p;
    887 	return ifno;
    888 }
    889 
    890 static int
    891 minus(void)
    892 {
    893 	switch (*input->p++) {
    894 	case '-':
    895 		return DEC;
    896 	case '>':
    897 		return INDIR;
    898 	case '=':
    899 		return SUB_EQ;
    900 	default:
    901 		--input->p;
    902 		return '-';
    903 	}
    904 }
    905 
    906 static int
    907 plus(void)
    908 {
    909 	switch (*input->p++) {
    910 	case '+':
    911 		return INC;
    912 	case '=':
    913 		return ADD_EQ;
    914 	default:
    915 		--input->p;
    916 		return '+';
    917 	}
    918 }
    919 
    920 static int
    921 relational(int op, int equal, int shift, int assig)
    922 {
    923 	int c;
    924 
    925 	if ((c = *input->p++) == '=')
    926 		return equal;
    927 	if (c == op)
    928 		return follow('=', assig, shift);
    929 	--input->p;
    930 	return op;
    931 }
    932 
    933 static int
    934 logic(int op, int equal, int logic)
    935 {
    936 	int c;
    937 
    938 	if ((c = *input->p++) == '=')
    939 		return equal;
    940 	if (c == op)
    941 		return logic;
    942 	--input->p;
    943 	return op;
    944 }
    945 
    946 static int
    947 dot(void)
    948 {
    949 	int c;
    950 
    951 	if (isdigit(*input->p))
    952 		return number();
    953 	if ((c = *input->p) != '.')
    954 		return '.';
    955 	if ((c = *++input->p) != '.')
    956 		error("incorrect token '..'");
    957 	++input->p;
    958 	return ELLIPSIS;
    959 }
    960 
    961 static int
    962 operator(void)
    963 {
    964 	int t;
    965 
    966 	switch (t = *input->p++) {
    967 	case '<':
    968 		t = relational('<', LE, SHL, SHL_EQ);
    969 		break;
    970 	case '>':
    971 		t = relational('>', GE, SHR, SHR_EQ);
    972 		break;
    973 	case '&':
    974 		t = logic('&', AND_EQ, AND);
    975 		break;
    976 	case '|':
    977 		t = logic('|', OR_EQ, OR);
    978 		break;
    979 	case '=':
    980 		t = follow('=', EQ, '=');
    981 		break;
    982 	case '^':
    983 		t = follow('=', XOR_EQ, '^');
    984 		break;
    985 	case '*':
    986 		t = follow('=', MUL_EQ, '*');
    987 		break;
    988 	case '/':
    989 		t = follow('=', DIV_EQ, '/');
    990 		break;
    991 	case '%':
    992 		t = follow('=', MOD_EQ, '%');
    993 		break;
    994 	case '!':
    995 		t = follow('=', NE, '!');
    996 		break;
    997 	case '-':
    998 		t = minus();
    999 		break;
   1000 	case '+':
   1001 		t = plus();
   1002 		break;
   1003 	case '.':
   1004 		t = dot();
   1005 		break;
   1006 	}
   1007 	tok2str();
   1008 	return t;
   1009 }
   1010 
   1011 /* TODO: Ensure that namespace is NS_IDEN after a recovery */
   1012 
   1013 /*
   1014  * skip all the spaces until the next token. When we are in
   1015  * CPPMODE \n is not considered a whitespace
   1016  */
   1017 static int
   1018 skipspaces(void)
   1019 {
   1020 	int c;
   1021 
   1022 	if (!input)
   1023 		return EOF;
   1024 
   1025 	for (;;) {
   1026 		switch (c = *input->p) {
   1027 		case '\n':
   1028 			if (lexmode == CPPMODE)
   1029 				goto return_byte;
   1030 			++input->p;
   1031 		case '\0':
   1032 			if (!moreinput())
   1033 				return EOF;
   1034 			break;
   1035 		case ' ':
   1036 		case '\t':
   1037 		case '\v':
   1038 		case '\r':
   1039 		case '\f':
   1040 			++input->p;
   1041 			break;
   1042 		default:
   1043 			goto return_byte;
   1044 		}
   1045 	}
   1046 
   1047 return_byte:
   1048 	input->begin = input->p;
   1049 	return c;
   1050 }
   1051 
   1052 int
   1053 next(void)
   1054 {
   1055 	int c;
   1056 
   1057 	if ((c = skipspaces()) == EOF)
   1058 		yytoken = EOFTOK;
   1059 	else if (isalpha(c) || c == '_')
   1060 		yytoken = iden();
   1061 	else if (isdigit(c))
   1062 		yytoken = number();
   1063 	else if (c == '"')
   1064 		yytoken = string();
   1065 	else if (c == '\'')
   1066 		yytoken = character();
   1067 	else
   1068 		yytoken = operator();
   1069 
   1070 	if (yytoken == EOFTOK) {
   1071 		strcpy(yytext, "<EOF>");
   1072 		if (cppctx && !input)
   1073 			errorp("#endif expected");
   1074 	}
   1075 
   1076 	DBG("TOKEN %s", yytext);
   1077 	return yytoken;
   1078 }
   1079 
   1080 void
   1081 expect(int tok)
   1082 {
   1083 	if (yytoken != tok) {
   1084 		if (isgraph(tok))
   1085 			errorp("expected '%c' before '%s'", tok, yytext);
   1086 		else
   1087 			errorp("unexpected '%s'", yytext);
   1088 	} else {
   1089 		next();
   1090 	}
   1091 }
   1092 
   1093 int
   1094 ahead(void)
   1095 {
   1096 	skipspaces();
   1097 	return *input->begin;
   1098 }
   1099 
   1100 void
   1101 setsafe(int type)
   1102 {
   1103 	safe = type;
   1104 }
   1105 
   1106 void
   1107 discard(void)
   1108 {
   1109 	extern jmp_buf recover;
   1110 	int c;
   1111 
   1112 	if (!input)
   1113 		exit(EXIT_FAILURE);
   1114 	for (c = yytoken; ; c = *input->p++) {
   1115 		switch (safe) {
   1116 		case END_COMP:
   1117 			if (c == '}')
   1118 				goto jump;
   1119 			goto semicolon;
   1120 		case END_COND:
   1121 			if (c == ')')
   1122 				goto jump;
   1123 			break;
   1124 		case END_LDECL:
   1125 			if (c == ',')
   1126 				goto jump;
   1127 		case END_DECL:
   1128 		semicolon:
   1129 			if (c == ';')
   1130 				goto jump;
   1131 			break;
   1132 		}
   1133 		if ((c == '\0' || c == EOFTOK) && !moreinput())
   1134 			exit(EXIT_FAILURE);
   1135 	}
   1136 jump:
   1137 	input->begin = input->p;
   1138 	yytoken = c;
   1139 	yytext[0] = c;
   1140 	yytext[1] = '\0';
   1141 	exit(EXIT_FAILURE);
   1142 
   1143 	/*
   1144 	 * FIXME: We don't have a proper recover mechanism at this moment
   1145 	 * and we don't set the recover point ever, so executing this
   1146 	 * longjmp will generate surely a segmentation fault, so it does
   1147 	 * not make sense to do it. We just exit until we can find time
   1148 	 * to solve this problem.
   1149 	 */
   1150 	longjmp(recover, 1);
   1151 }