scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

lex.c (16273B)


      1 #include <assert.h>
      2 #include <ctype.h>
      3 #include <errno.h>
      4 #include <limits.h>
      5 #include <setjmp.h>
      6 #include <stdio.h>
      7 #include <stdlib.h>
      8 #include <string.h>
      9 
     10 #include <scc/cstd.h>
     11 #include <scc/scc.h>
     12 #include "cc1.h"
     13 
     14 int yytoken;
     15 struct yystype yylval;
     16 char yytext[STRINGSIZ+3];
     17 unsigned short yylen;
     18 int lexmode = CCMODE;
     19 unsigned lineno;
     20 char filenam[FILENAME_MAX];
     21 
     22 int namespace = NS_IDEN;
     23 static int safe;
     24 Input *input;
     25 
     26 void
     27 setloc(char *fname, unsigned line)
     28 {
     29 	size_t len;
     30 
     31 	if (fname) {
     32 		if ((len = strlen(fname)) >= FILENAME_MAX)
     33 			die("cc1: %s: file name too long", fname);
     34 		memmove(filenam, fname, len);
     35 		filenam[len] = '\0';
     36 
     37 		/*
     38 		 * There are cases where we want to call setloc()
     39 		 * with the data in input, and then we have t be
     40 		 * careful about freeing input->filenam
     41 		 */
     42 		if (fname != input->filenam) {
     43 			free(input->filenam);
     44 			input->filenam = xstrdup(fname);
     45 		}
     46 	}
     47 
     48 	lineno = input->lineno = line;
     49 }
     50 
     51 int
     52 addinput(int type, void *arg, int fail)
     53 {
     54 	FILE *fp;
     55 	char *extp, *fname, *buffer, *infile;
     56 	int infileln;
     57 	Macro *mp;
     58 	Symbol *sym;
     59 	Input *newip, *curip = input;
     60 
     61 	if (curip)
     62 		curip->lineno = lineno;
     63 
     64 	switch (type) {
     65 	case IMACRO:
     66 		fp = NULL;
     67 		mp = arg;
     68 		sym = mp->sym;
     69 		fname = mp->fname;
     70 		buffer = mp->buffer;
     71 		DBG("INPUT: expanding macro %s", sym->name);
     72 		break;
     73 	case IPARAM:
     74 		fp = NULL;
     75 		mp = NULL;
     76 		buffer = arg;
     77 		fname = filenam;
     78 		DBG("INPUT: macro parameter '%s'", buffer);
     79 		break;
     80 	case IFILE:
     81 		fname = arg;
     82 		mp = NULL;
     83 		buffer = NULL;
     84 
     85 		if ((fp = fopen(fname, "r")) == NULL) {
     86 			if (!fail)
     87 				return 0;
     88 			die("cc1: %s: %s", fname, strerror(errno));
     89 		}
     90 		if (curip && onlyheader) {
     91 			infile = curip->filenam;
     92 			infileln = strlen(infile);
     93 			if (extp = strrchr(infile, '.'))
     94 				infileln -= strlen(extp);
     95 			printf("%.*s.o: %s %s\n",
     96 			       infileln, infile, infile, fname);
     97 		}
     98 		lineno = 0;
     99 		DBG("INPUT: file input '%s'", fname);
    100 		break;
    101 	case ISTDIN:
    102 		fp = stdin;
    103 		mp = NULL;
    104 		fname = "<stdin>";
    105 		buffer = NULL;
    106 		lineno = 0;
    107 		DBG("INPUT: file input 'stdin'");
    108 		break;
    109 	default:
    110 		abort();
    111 	}
    112 
    113 	if (!buffer) {
    114 		buffer = xmalloc(INPUTSIZ);
    115 		buffer[0] = '\0';
    116 	} else {
    117 		buffer = xstrdup(buffer);
    118 	}
    119 
    120 	newip = xmalloc(sizeof(*newip));
    121 	newip->next = curip;
    122 	newip->macro = mp;
    123 	newip->p = newip->begin = newip->line = buffer;
    124 	newip->filenam = NULL;
    125 	newip->lineno = 0;
    126 	newip->fp = fp;
    127 	newip->flags = type;
    128 	input = newip;
    129 
    130 	setloc(fname, lineno);
    131 	return 1;
    132 }
    133 
    134 void
    135 delinput(void)
    136 {
    137 	Input *ip = input;
    138 
    139 	switch (ip->flags & ITYPE) {
    140 	case IFILE:
    141 		DBG("INPUT: file finished '%s'", ip->filenam);
    142 		if (fclose(ip->fp))
    143 			die("cc1: %s: %s", ip->filenam, strerror(errno));
    144 		break;
    145 	case IMACRO:
    146 		DBG("INPUT: macro %s finished", ip->macro->sym->name);
    147 		delmacro(ip->macro);
    148 		break;
    149 	case IPARAM:
    150 		DBG("INPUT: macro param finished");
    151 		break;
    152 	case ISTDIN:
    153 		DBG("INPUT: stdin finished");
    154 		break;
    155 	default:
    156 		abort();
    157 	}
    158 
    159 	input = ip->next;
    160 	free(ip->filenam);
    161 	free(ip->line);
    162 	free(ip);
    163 	if (input)
    164 		setloc(input->filenam, input->lineno);
    165 }
    166 
    167 static void
    168 newline(void)
    169 {
    170 	if (++lineno == 0)
    171 		die("cc1: %s: file too long", filenam);
    172 }
    173 
    174 /*
    175  * Read the next character from the input file, counting number of lines
    176  * and joining lines escaped with \
    177  */
    178 static int
    179 readchar(void)
    180 {
    181 	FILE *fp = input->fp;
    182 	int c;
    183 
    184 repeat:
    185 	switch (c = getc(fp)) {
    186 	case '\\':
    187 		if ((c = getc(fp)) == '\n') {
    188 			newline();
    189 			goto repeat;
    190 		}
    191 		ungetc(c, fp);
    192 		c = '\\';
    193 		break;
    194 	case '\n':
    195 		newline();
    196 		break;
    197 	case EOF:
    198 		break;
    199 	}
    200 
    201 	return c;
    202 }
    203 
    204 /*
    205  * discard a C comment. This function is only called from readline
    206  * because it is impossible to have a comment in a macro, because
    207  * comments are always discarded before processing any cpp directive
    208  */
    209 static void
    210 comment(int type)
    211 {
    212 	int c;
    213 
    214 repeat:
    215 	while ((c = readchar()) != EOF && c != type)
    216 		;
    217 
    218 	if (c == EOF) {
    219 		errorp("unterminated comment");
    220 		return;
    221 	}
    222 
    223 	if (type == '*' && (c = readchar()) != '/')
    224 		goto repeat;
    225 }
    226 
    227 /*
    228  * readline is used to read a full logic line from a file.
    229  * It discards comments and check that the line fits in
    230  * the input buffer
    231  */
    232 static int
    233 readline(void)
    234 {
    235 	char *bp, *lim;
    236 	int c, peekc = 0, delim = 0;
    237 
    238 	if (feof(input->fp)) {
    239 		input->flags |= IEOF;
    240 		*input->p = '\0';
    241 		return 0;
    242 	}
    243 
    244 	*input->line = '\0';
    245 	lim = &input->line[INPUTSIZ-1];
    246 	for (bp = input->line; bp < lim-1; *bp++ = c) {
    247 		c = (peekc) ? peekc : readchar();
    248 		peekc = 0;
    249 		if (c == '\n' || c == EOF)
    250 			break;
    251 		if (c == '\\') {
    252 			peekc = readchar();
    253 			if (peekc == '\n' || peekc == EOF)
    254 				continue;
    255 			if (bp == lim-2)
    256 				break;
    257 			*bp++ = c;
    258 			c = peekc;
    259 			peekc = 0;
    260 			continue;
    261 		}
    262 
    263 		if (delim && c == delim)
    264 			delim = 0;
    265 		else if (!delim && (c == '"' || c == '\''))
    266 			delim = c;
    267 		if (c != '/' || delim)
    268 			continue;
    269 
    270 		/* check for /* or // */
    271 		peekc = readchar();
    272 		if (peekc != '*' && peekc != '/')
    273 			continue;
    274 
    275 		if (peekc == '/') {
    276 			comment('\n');
    277 			break;
    278 		} else {
    279 			comment('*');
    280 			c = ' ';
    281 		}
    282 		peekc = 0;
    283 	}
    284 
    285 	input->begin = input->p = input->line;
    286 	if (bp == lim-1) {
    287 		errorp("line too long");
    288 		--bp;
    289 	}
    290 	*bp++ = '\n';
    291 	*bp = '\0';
    292 
    293 	return 1;
    294 }
    295 
    296 /*
    297  * moreinput gets more bytes to be passed to the lexer.
    298  * It can take more bytes from macro expansions or
    299  * directly reading from files. When a cpp directive
    300  * is processed the line is discarded because it must not
    301  * be passed to the lexer
    302  */
    303 static int
    304 moreinput(void)
    305 {
    306 	int wasexpand = 0;
    307 
    308 repeat:
    309 	if (!input)
    310 		return 0;
    311 
    312 	if (*input->p == '\0') {
    313 		int t = input->flags & ITYPE;
    314 		if (t == IPARAM) {
    315 			input->flags |= IEOF;
    316 			return 0;
    317 		}
    318 		if (t == IMACRO) {
    319 			wasexpand = 1;
    320 			input->flags |= IEOF;
    321 		}
    322 		if (input->flags & IEOF) {
    323 			delinput();
    324 			goto repeat;
    325 		}
    326 		if (!readline()) {
    327 			*input->p = '\0';
    328 			goto repeat;
    329 		}
    330 		if (cpp())
    331 			goto repeat;
    332 	}
    333 
    334 	if (onlycpp && !wasexpand)
    335 		ppragmaln();
    336 	return 1;
    337 }
    338 
    339 static void
    340 tok2str(void)
    341 {
    342 	if ((yylen = input->p - input->begin) > INTIDENTSIZ)
    343 		error("token too big");
    344 	memcpy(yytext, input->begin, yylen);
    345 	yytext[yylen] = '\0';
    346 	input->begin = input->p;
    347 }
    348 
    349 static Symbol *
    350 readint(char *s, int base, int sign, Symbol *sym)
    351 {
    352 	Type *tp = sym->type;
    353 	struct limits *lim;
    354 	TUINT u, val, max;
    355 	int c;
    356 
    357 	lim = getlimits(tp);
    358 	max = lim->max.i;
    359 	if (*s == '0')
    360 		++s;
    361 	if (toupper(*s) == 'X')
    362 		++s;
    363 
    364 	for (u = 0; isxdigit(c = *s++); u = u*base + val) {
    365 		static char letters[] = "0123456789ABCDEF";
    366 		val = strchr(letters, toupper(c)) - letters;
    367 	repeat:
    368 		if (u <= max/base && u*base <= max - val)
    369 			continue;
    370 		if (tp->prop & TSIGNED) {
    371 			if (tp == inttype)
    372 				tp = (base==10) ? longtype : uinttype;
    373 			else if (tp == longtype)
    374 				tp = (base==10) ? llongtype : ulongtype;
    375 			else
    376 				goto overflow;
    377 		} else {
    378 			if (tp == uinttype)
    379 				tp = (sign==UNSIGNED) ? ulongtype : longtype;
    380 			else if (tp == ulongtype)
    381 				tp = (sign==UNSIGNED) ? ullongtype : llongtype;
    382 			else
    383 				goto overflow;
    384 		}
    385 		sym->type = tp;
    386 		lim = getlimits(tp);
    387 		max = lim->max.i;
    388 		goto repeat;
    389 	}
    390 
    391 	if (tp->prop & TSIGNED)
    392 		sym->u.i = u;
    393 	else
    394 		sym->u.u = u;
    395 
    396 	return sym;
    397 
    398 overflow:
    399 	errorp("overflow in integer constant");
    400 	return sym;
    401 }
    402 
    403 static int
    404 integer(char *s, int base)
    405 {
    406 	Type *tp;
    407 	Symbol *sym;
    408 	unsigned size, sign;
    409 
    410 	for (size = sign = 0; ; ++input->p) {
    411 		switch (toupper(*input->p)) {
    412 		case 'L':
    413 			if (size == LLONG)
    414 				goto wrong_type;
    415 			size = (size == LONG) ? LLONG : LONG;
    416 			continue;
    417 		case 'U':
    418 			if (sign == UNSIGNED)
    419 				goto wrong_type;
    420 			sign = UNSIGNED;
    421 			continue;
    422 		default:
    423 			goto convert;
    424 		wrong_type:
    425 			error("invalid suffix in integer constant");
    426 		}
    427 	}
    428 
    429 convert:
    430 	tok2str();
    431 	tp = ctype(INT, sign, size);
    432 	sym = newsym(NS_IDEN, NULL);
    433 	sym->type = tp;
    434 	sym->flags |= SCONSTANT;
    435 	yylval.sym = readint(s, base, sign, sym);
    436 	return CONSTANT;
    437 }
    438 
    439 static char *
    440 digits(int base)
    441 {
    442 	char *p;
    443 	int c;
    444 
    445 	for (p = input->p; c = *p; ++p) {
    446 		switch (base) {
    447 		case 8:
    448 			if (!strchr("01234567", c))
    449 				goto end;
    450 			break;
    451 		case 10:
    452 			if (!isdigit(c))
    453 				goto end;
    454 			break;
    455 		case 16:
    456 			if (!isxdigit(c))
    457 				goto end;
    458 			break;
    459 		}
    460 	}
    461 end:
    462 	input->p = p;
    463 	return yytext;
    464 }
    465 
    466 static int
    467 number(void)
    468 {
    469 	int base;
    470 
    471 	if (*input->p != '0') {
    472 		base = 10;
    473 	} else {
    474 		if (toupper(*++input->p) == 'X') {
    475 			++input->p;
    476 			base = 16;
    477 		} else {
    478 			base = 8;
    479 		}
    480 	}
    481 
    482 	return integer(digits(base), base);
    483 }
    484 
    485 static int
    486 escape(void)
    487 {
    488 	int c, d, i, cnt, base;
    489 
    490 	switch (*++input->p) {
    491 	case 'a':
    492 		return '\a';
    493 	case 'b':
    494 		return '\b';
    495 	case 'f':
    496 		return '\f';
    497 	case 'n':
    498 		return '\n';
    499 	case 'r':
    500 		return '\r';
    501 	case 't':
    502 		return '\t';
    503 	case 'v':
    504 		return '\v';
    505 	case '"':
    506 		return '"';
    507 	case '\'':
    508 		return '\'';
    509 	case '\\':
    510 		return '\\';
    511 	case '\?':
    512 		return '\?';
    513 	case 'u':
    514 		/*
    515 		 * FIXME: universal constants are not correctly handled
    516 		 */
    517 		if (!isdigit(*++input->p))
    518 			warn("incorrect digit for numerical character constant");
    519 		base = 10;
    520 		break;
    521 	case 'x':
    522 		if (!isxdigit(*++input->p))
    523 			warn("\\x used with no following hex digits");
    524 		cnt = 2;
    525 		base = 16;
    526 		break;
    527 	case '0':
    528 	case '1':
    529 	case '2':
    530 	case '3':
    531 	case '4':
    532 	case '5':
    533 	case '6':
    534 	case '7':
    535 		cnt = 3;
    536 		base = 8;
    537 		break;
    538 	default:
    539 		warn("unknown escape sequence");
    540 		return ' ';
    541 	}
    542 
    543 	for (c = i = 0; i < cnt; ++i) {
    544 		static char digits[] = "0123456789ABCDEF";
    545 		char *p = strchr(digits, toupper(*input->p));
    546 
    547 		if (!p || (d = p - digits) > base)
    548 			break;
    549 		c *= base;
    550 		c += d;
    551 		++input->p;
    552 	}
    553 	--input->p;
    554 
    555 	return c;
    556 }
    557 
    558 static Rune
    559 utf8rune(void)
    560 {
    561 	Rune wc;
    562 	unsigned c;
    563 	size_t i, len;
    564 
    565 	c = *input->p;
    566 	for (len = 0; c & 0x80; len++)
    567 		c <<= 1;
    568 	if (len == 0)
    569 		return c;
    570 	if (len == 1 || len == 8)
    571 		goto invalid;
    572 
    573 	wc = (c & 0xFF) >> len;
    574 	for (i = 0; i < len-1; i++) {
    575 		c = input->p[1];
    576 		if ((c & 0xC0) != 0x80)
    577 			goto invalid;
    578 		input->p++;
    579 		wc <<= 6;
    580 		wc |= c & 0x3F;
    581 	}
    582 	return wc;
    583 
    584 invalid:
    585 	errorp("invalid multibyte sequence");
    586 	return 0xFFFD;
    587 }
    588 
    589 static Rune
    590 decode(int multi)
    591 {
    592 	Rune r;
    593 
    594 	if (*input->p == '\\') {
    595 		r = escape();
    596 		return r;
    597 	}
    598 
    599 	return multi ? utf8rune() : *input->p;
    600 }
    601 
    602 static int
    603 character(void)
    604 {
    605 	int i, multi = 0;
    606 	Rune r, d;
    607 	Type *tp = inttype;
    608 	Symbol *sym;
    609 
    610 	if (*input->p == 'L') {
    611 		multi = 1;
    612 		tp = wchartype;
    613 		input->p++;
    614 	}
    615 
    616 	d = 0;
    617 	input->p++;
    618 	for (i = 0; *input->p != '\''; i++) {
    619 		r = decode(multi);
    620 		if (r > getlimits(tp)->max.i)
    621 			warn("character too large for enclosing character literal type");
    622 		d |= r;
    623 		input->p++;
    624 	}
    625 	input->p++;
    626 
    627 	if (i == 0)
    628 		errorp("empty character constant");
    629 	if (i > 1)
    630 		warn("multi-character character constant");
    631 
    632 	sym = newsym(NS_IDEN, NULL);
    633 	sym->u.i = d;
    634 	sym->type = tp;
    635 	yylval.sym = sym;
    636 	tok2str();
    637 	return CONSTANT;
    638 }
    639 
    640 /*
    641  * string() parses a constant string, and convert all the
    642  * escape sequences into single characters. This behaviour
    643  * is correct except when we parse a #define, where we want
    644  * to preserve the literal content of the string. In that
    645  * case cpp.c:/^define( sets the variable disescape to
    646  * disable converting escape sequences into characters.
    647  */
    648 static int
    649 string(void)
    650 {
    651 	char *bp = yytext;
    652 	int c, esc;
    653 
    654 	*bp++ = '"';
    655 	esc = 0;
    656 	for (++input->p; ; ++input->p) {
    657 		c = *input->p;
    658 
    659 		if (c == '"' && !esc)
    660 			break;
    661 
    662 		if (c == '\0') {
    663 			errorp("missing terminating '\"' character");
    664 			break;
    665 		}
    666 
    667 		esc = (c == '\\' && !esc && disescape);
    668 
    669 		if (c == '\\' && !esc)
    670 			c = escape();
    671 
    672 		if (bp == &yytext[STRINGSIZ+1]) {
    673 			/* too long, ignore everything until next quote */
    674 			for (++input->p; *input->p != '"'; ++input->p) {
    675 				if (*input->p == '\\')
    676 					++input->p;
    677 				if (*input->p == '\0')
    678 					break;
    679 			}
    680 			--bp;
    681 			errorp("string too long");
    682 			break;
    683 		}
    684 		*bp++ = c;
    685 	}
    686 
    687 	input->begin = ++input->p;
    688 	*bp = '\0';
    689 
    690 	yylen = bp - yytext + 1;
    691 	yylval.sym = newstring(yytext+1, yylen-1);
    692 	*bp++ = '"';
    693 	*bp = '\0';
    694 	return STRING;
    695 }
    696 
    697 static int
    698 iden(void)
    699 {
    700 	Symbol *sym;
    701 	char *p, *begin;
    702 
    703 	if (input->p[0] == 'L' && input->p[1] == '\'')
    704 		return character();
    705 
    706 	begin = input->p;
    707 	for (p = begin; isalnum(*p) || *p == '_'; ++p)
    708 		;
    709 	input->p = p;
    710 	tok2str();
    711 	if ((sym = lookup(NS_CPP, yytext, NOALLOC)) != NULL) {
    712 		if (expand(sym))
    713 			return next();
    714 	}
    715 	sym = lookup(namespace, yytext, ALLOC);
    716 	yylval.sym = sym;
    717 	if (sym->flags & SCONSTANT)
    718 		return CONSTANT;
    719 	if (sym->token != IDEN)
    720 		yylval.token = sym->u.token;
    721 	return sym->token;
    722 }
    723 
    724 static int
    725 follow(int expect, int ifyes, int ifno)
    726 {
    727 	if (*input->p++ == expect)
    728 		return ifyes;
    729 	--input->p;
    730 	return ifno;
    731 }
    732 
    733 static int
    734 minus(void)
    735 {
    736 	switch (*input->p++) {
    737 	case '-':
    738 		return DEC;
    739 	case '>':
    740 		return INDIR;
    741 	case '=':
    742 		return SUB_EQ;
    743 	default:
    744 		--input->p;
    745 		return '-';
    746 	}
    747 }
    748 
    749 static int
    750 plus(void)
    751 {
    752 	switch (*input->p++) {
    753 	case '+':
    754 		return INC;
    755 	case '=':
    756 		return ADD_EQ;
    757 	default:
    758 		--input->p;
    759 		return '+';
    760 	}
    761 }
    762 
    763 static int
    764 relational(int op, int equal, int shift, int assig)
    765 {
    766 	int c;
    767 
    768 	if ((c = *input->p++) == '=')
    769 		return equal;
    770 	if (c == op)
    771 		return follow('=', assig, shift);
    772 	--input->p;
    773 	return op;
    774 }
    775 
    776 static int
    777 logic(int op, int equal, int logic)
    778 {
    779 	int c;
    780 
    781 	if ((c = *input->p++) == '=')
    782 		return equal;
    783 	if (c == op)
    784 		return logic;
    785 	--input->p;
    786 	return op;
    787 }
    788 
    789 static int
    790 dot(void)
    791 {
    792 	int c;
    793 
    794 	if ((c = *input->p) != '.')
    795 		return '.';
    796 	if ((c = *++input->p) != '.')
    797 		error("incorrect token '..'");
    798 	++input->p;
    799 	return ELLIPSIS;
    800 }
    801 
    802 static int
    803 operator(void)
    804 {
    805 	int t;
    806 
    807 	switch (t = *input->p++) {
    808 	case '<':
    809 		t = relational('<', LE, SHL, SHL_EQ);
    810 		break;
    811 	case '>':
    812 		t = relational('>', GE, SHR, SHR_EQ);
    813 		break;
    814 	case '&':
    815 		t = logic('&', AND_EQ, AND);
    816 		break;
    817 	case '|':
    818 		t = logic('|', OR_EQ, OR);
    819 		break;
    820 	case '=':
    821 		t = follow('=', EQ, '=');
    822 		break;
    823 	case '^':
    824 		t = follow('=', XOR_EQ, '^');
    825 		break;
    826 	case '*':
    827 		t = follow('=', MUL_EQ, '*');
    828 		break;
    829 	case '/':
    830 		t = follow('=', DIV_EQ, '/');
    831 		break;
    832 	case '%':
    833 		t = follow('=', MOD_EQ, '%');
    834 		break;
    835 	case '!':
    836 		t = follow('=', NE, '!');
    837 		break;
    838 	case '#':
    839 		t = follow('#', CONCAT, STRINGIZE);
    840 		break;
    841 	case '-':
    842 		t = minus();
    843 		break;
    844 	case '+':
    845 		t = plus();
    846 		break;
    847 	case '.':
    848 		t = dot();
    849 		break;
    850 	}
    851 	tok2str();
    852 	return t;
    853 }
    854 
    855 /* TODO: Ensure that namespace is NS_IDEN after a recovery */
    856 
    857 /*
    858  * skip all the spaces until the next token. When we are in
    859  * CPPMODE \n is not considered a whitespace
    860  */
    861 static int
    862 skipspaces(void)
    863 {
    864 	int c;
    865 
    866 	if (!input)
    867 		return EOF;
    868 
    869 	for (;;) {
    870 		switch (c = *input->p) {
    871 		case '\n':
    872 			if (lexmode == CPPMODE)
    873 				goto return_byte;
    874 			++input->p;
    875 		case '\0':
    876 			if (!moreinput())
    877 				return EOF;
    878 			break;
    879 		case ' ':
    880 		case '\t':
    881 		case '\v':
    882 		case '\r':
    883 		case '\f':
    884 			++input->p;
    885 			break;
    886 		default:
    887 			goto return_byte;
    888 		}
    889 	}
    890 
    891 return_byte:
    892 	input->begin = input->p;
    893 	return c;
    894 }
    895 
    896 int
    897 next(void)
    898 {
    899 	int c;
    900 
    901 	if ((c = skipspaces()) == EOF)
    902 		yytoken = EOFTOK;
    903 	else if (isalpha(c) || c == '_')
    904 		yytoken = iden();
    905 	else if (isdigit(c))
    906 		yytoken = number();
    907 	else if (c == '"')
    908 		yytoken = string();
    909 	else if (c == '\'')
    910 		yytoken = character();
    911 	else
    912 		yytoken = operator();
    913 
    914 	if (yytoken == EOFTOK) {
    915 		strcpy(yytext, "<EOF>");
    916 		if (cppctx && !input)
    917 			errorp("#endif expected");
    918 	}
    919 
    920 	DBG("TOKEN %s", yytext);
    921 	return yytoken;
    922 }
    923 
    924 void
    925 expect(int tok)
    926 {
    927 	if (yytoken != tok) {
    928 		if (isgraph(tok))
    929 			errorp("expected '%c' before '%s'", tok, yytext);
    930 		else
    931 			errorp("unexpected '%s'", yytext);
    932 	} else {
    933 		next();
    934 	}
    935 }
    936 
    937 int
    938 ahead(void)
    939 {
    940 	skipspaces();
    941 	return *input->begin;
    942 }
    943 
    944 void
    945 setsafe(int type)
    946 {
    947 	safe = type;
    948 }
    949 
    950 void
    951 discard(void)
    952 {
    953 	extern jmp_buf recover;
    954 	int c;
    955 
    956 	for (c = yytoken; ; c = *input->p++) {
    957 		switch (safe) {
    958 		case END_COMP:
    959 			if (c == '}')
    960 				goto jump;
    961 			goto semicolon;
    962 		case END_COND:
    963 			if (c == ')')
    964 				goto jump;
    965 			break;
    966 		case END_LDECL:
    967 			if (c == ',')
    968 				goto jump;
    969 		case END_DECL:
    970 		semicolon:
    971 			if (c == ';')
    972 				goto jump;
    973 			break;
    974 		}
    975 		if ((c == '\0' || c == EOFTOK) && !moreinput())
    976 			exit(EXIT_FAILURE);
    977 	}
    978 jump:
    979 	input->begin = input->p;
    980 	yytoken = c;
    981 	yytext[0] = c;
    982 	yytext[1] = '\0';
    983 	exit(EXIT_FAILURE);
    984 
    985 	/*
    986 	 * FIXME: We don't have a proper recover mechanism at this moment
    987 	 * and we don't set the recover point ever, so executing this
    988 	 * longjmp will generate surely a segmentation fault, so it does
    989 	 * not make sense to do it. We just exit until we can find time
    990 	 * to solve this problem.
    991 	 */
    992 	longjmp(recover, 1);
    993 }