scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | Submodules | README | LICENSE

lex.c (16737B)


      1 #include <assert.h>
      2 #include <ctype.h>
      3 #include <errno.h>
      4 #include <limits.h>
      5 #include <setjmp.h>
      6 #include <stdio.h>
      7 #include <stdlib.h>
      8 #include <string.h>
      9 
     10 #include <scc/cstd.h>
     11 #include <scc/scc.h>
     12 #include "cc1.h"
     13 
     14 int yytoken;
     15 struct yystype yylval;
     16 char yytext[STRINGSIZ+3];
     17 unsigned short yylen;
     18 int lexmode = CCMODE;
     19 unsigned lineno;
     20 char filenam[FILENAME_MAX];
     21 
     22 int namespace = NS_IDEN;
     23 static int safe;
     24 Input *input;
     25 
     26 void
     27 setloc(char *fname, unsigned line)
     28 {
     29 	size_t len;
     30 
     31 	if (fname) {
     32 		if ((len = strlen(fname)) >= FILENAME_MAX)
     33 			die("cc1: %s: file name too long", fname);
     34 		memmove(filenam, fname, len);
     35 		filenam[len] = '\0';
     36 
     37 		/*
     38 		 * There are cases where we want to call setloc()
     39 		 * with the data in input, and then we have t be
     40 		 * careful about freeing input->filenam
     41 		 */
     42 		if (fname != input->filenam) {
     43 			free(input->filenam);
     44 			input->filenam = xstrdup(fname);
     45 		}
     46 	}
     47 
     48 	lineno = input->lineno = line;
     49 }
     50 
     51 int
     52 addinput(int type, void *arg, int fail)
     53 {
     54 	FILE *fp;
     55 	char *extp, *fname, *buffer, *infile;
     56 	int infileln;
     57 	Macro *mp;
     58 	Symbol *sym;
     59 	Input *newip, *curip = input;
     60 
     61 	if (curip)
     62 		curip->lineno = lineno;
     63 
     64 	switch (type) {
     65 	case IMACRO:
     66 		fp = NULL;
     67 		mp = arg;
     68 		sym = mp->sym;
     69 		fname = mp->fname;
     70 		buffer = mp->buffer;
     71 		DBG("INPUT: expanding macro %s", sym->name);
     72 		break;
     73 	case IPARAM:
     74 		fp = NULL;
     75 		mp = NULL;
     76 		buffer = arg;
     77 		fname = filenam;
     78 		DBG("INPUT: macro parameter '%s'", buffer);
     79 		break;
     80 	case IFILE:
     81 		fname = arg;
     82 		mp = NULL;
     83 		buffer = NULL;
     84 
     85 		if ((fp = fopen(fname, "r")) == NULL) {
     86 			if (!fail)
     87 				return 0;
     88 			die("cc1: %s: %s", fname, strerror(errno));
     89 		}
     90 		if (curip && onlyheader) {
     91 			infile = curip->filenam;
     92 			infileln = strlen(infile);
     93 			if (extp = strrchr(infile, '.'))
     94 				infileln -= strlen(extp);
     95 			printf("%.*s.o: %s %s\n",
     96 			       infileln, infile, infile, fname);
     97 		}
     98 		lineno = 0;
     99 		DBG("INPUT: file input '%s'", fname);
    100 		break;
    101 	case ISTDIN:
    102 		fp = stdin;
    103 		mp = NULL;
    104 		fname = "<stdin>";
    105 		buffer = NULL;
    106 		lineno = 0;
    107 		DBG("INPUT: file input 'stdin'");
    108 		break;
    109 	default:
    110 		abort();
    111 	}
    112 
    113 	if (!buffer) {
    114 		buffer = xmalloc(INPUTSIZ);
    115 		buffer[0] = '\0';
    116 	} else {
    117 		buffer = xstrdup(buffer);
    118 	}
    119 
    120 	newip = xmalloc(sizeof(*newip));
    121 	newip->next = curip;
    122 	newip->macro = mp;
    123 	newip->p = newip->begin = newip->line = buffer;
    124 	newip->filenam = NULL;
    125 	newip->lineno = 0;
    126 	newip->fp = fp;
    127 	newip->flags = type;
    128 	input = newip;
    129 
    130 	setloc(fname, lineno);
    131 	return 1;
    132 }
    133 
    134 void
    135 delinput(void)
    136 {
    137 	Input *ip = input;
    138 
    139 	switch (ip->flags & ITYPE) {
    140 	case IFILE:
    141 		DBG("INPUT: file finished '%s'", ip->filenam);
    142 		if (fclose(ip->fp))
    143 			die("cc1: %s: %s", ip->filenam, strerror(errno));
    144 		break;
    145 	case IMACRO:
    146 		DBG("INPUT: macro %s finished", ip->macro->sym->name);
    147 		delmacro(ip->macro);
    148 		break;
    149 	case IPARAM:
    150 		DBG("INPUT: macro param finished");
    151 		break;
    152 	case ISTDIN:
    153 		DBG("INPUT: stdin finished");
    154 		break;
    155 	default:
    156 		abort();
    157 	}
    158 
    159 	input = ip->next;
    160 	free(ip->filenam);
    161 	free(ip->line);
    162 	free(ip);
    163 	if (input)
    164 		setloc(input->filenam, input->lineno);
    165 }
    166 
    167 static void
    168 newline(void)
    169 {
    170 	if (++lineno == 0)
    171 		die("cc1: %s: file too long", filenam);
    172 }
    173 
    174 /*
    175  * Read the next character from the input file, counting number of lines
    176  * and joining lines escaped with \
    177  */
    178 static int
    179 readchar(void)
    180 {
    181 	FILE *fp = input->fp;
    182 	int c;
    183 
    184 repeat:
    185 	switch (c = getc(fp)) {
    186 	case '\\':
    187 		if ((c = getc(fp)) == '\n') {
    188 			newline();
    189 			goto repeat;
    190 		}
    191 		ungetc(c, fp);
    192 		c = '\\';
    193 		break;
    194 	case '\n':
    195 		newline();
    196 		break;
    197 	case EOF:
    198 		break;
    199 	}
    200 
    201 	return c;
    202 }
    203 
    204 /*
    205  * discard a C comment. This function is only called from readline
    206  * because it is impossible to have a comment in a macro, because
    207  * comments are always discarded before processing any cpp directive
    208  */
    209 static void
    210 comment(int type)
    211 {
    212 	int c;
    213 
    214 	c = readchar();
    215 repeat:
    216 	for ( ; c != EOF && c != type; c = readchar())
    217 		;
    218 
    219 	if (c == EOF) {
    220 		errorp("unterminated comment");
    221 		return;
    222 	}
    223 
    224 	if (type == '*' && (c = readchar()) != '/')
    225 		goto repeat;
    226 }
    227 
    228 /*
    229  * readline is used to read a full logic line from a file.
    230  * It discards comments and check that the line fits in
    231  * the input buffer
    232  */
    233 static int
    234 readline(void)
    235 {
    236 	char *bp, *lim;
    237 	int c, peekc = 0, delim = 0;
    238 
    239 	if (feof(input->fp)) {
    240 		input->flags |= IEOF;
    241 		*input->p = '\0';
    242 		return 0;
    243 	}
    244 
    245 	*input->line = '\0';
    246 	lim = &input->line[INPUTSIZ-1];
    247 	for (bp = input->line; bp < lim-1; *bp++ = c) {
    248 		c = (peekc) ? peekc : readchar();
    249 		peekc = 0;
    250 		if (c == '\n' || c == EOF)
    251 			break;
    252 		if (c == '\\') {
    253 			peekc = readchar();
    254 			if (peekc == '\n' || peekc == EOF)
    255 				continue;
    256 			if (bp == lim-2)
    257 				break;
    258 			*bp++ = c;
    259 			c = peekc;
    260 			peekc = 0;
    261 			continue;
    262 		}
    263 
    264 		if (delim && c == delim)
    265 			delim = 0;
    266 		else if (!delim && (c == '"' || c == '\''))
    267 			delim = c;
    268 		if (c != '/' || delim)
    269 			continue;
    270 
    271 		/* check for /* or // */
    272 		peekc = readchar();
    273 		if (peekc != '*' && peekc != '/')
    274 			continue;
    275 
    276 		if (peekc == '/') {
    277 			comment('\n');
    278 			break;
    279 		} else {
    280 			comment('*');
    281 			c = ' ';
    282 		}
    283 		peekc = 0;
    284 	}
    285 
    286 	input->begin = input->p = input->line;
    287 	if (bp == lim-1) {
    288 		errorp("line too long");
    289 		--bp;
    290 	}
    291 	*bp++ = '\n';
    292 	*bp = '\0';
    293 
    294 	return 1;
    295 }
    296 
    297 /*
    298  * moreinput gets more bytes to be passed to the lexer.
    299  * It can take more bytes from macro expansions or
    300  * directly reading from files. When a cpp directive
    301  * is processed the line is discarded because it must not
    302  * be passed to the lexer
    303  */
    304 static int
    305 moreinput(void)
    306 {
    307 	int wasexpand = 0;
    308 
    309 repeat:
    310 	if (!input)
    311 		return 0;
    312 
    313 	if (*input->p == '\0') {
    314 		int t = input->flags & ITYPE;
    315 		if (t == IPARAM) {
    316 			input->flags |= IEOF;
    317 			return 0;
    318 		}
    319 		if (t == IMACRO) {
    320 			wasexpand = 1;
    321 			input->flags |= IEOF;
    322 		}
    323 		if (input->flags & IEOF) {
    324 			delinput();
    325 			goto repeat;
    326 		}
    327 		if (!readline()) {
    328 			*input->p = '\0';
    329 			goto repeat;
    330 		}
    331 		if (cpp())
    332 			goto repeat;
    333 	}
    334 
    335 	if (onlycpp && !wasexpand)
    336 		ppragmaln();
    337 	return 1;
    338 }
    339 
    340 static void
    341 tok2str(void)
    342 {
    343 	if ((yylen = input->p - input->begin) > INTIDENTSIZ)
    344 		error("token too big");
    345 	memcpy(yytext, input->begin, yylen);
    346 	yytext[yylen] = '\0';
    347 	input->begin = input->p;
    348 }
    349 
    350 static Symbol *
    351 readint(int base, int sign, Symbol *sym)
    352 {
    353 	char *s = yytext;
    354 	Type *tp = sym->type;
    355 	struct limits *lim;
    356 	unsigned long long u, val, max;
    357 	int c;
    358 
    359 	lim = getlimits(tp);
    360 	max = lim->max.i;
    361 	if (*s == '0')
    362 		++s;
    363 	if (toupper(*s) == 'X')
    364 		++s;
    365 
    366 	for (u = 0; isxdigit(c = *s++); u = u*base + val) {
    367 		static char letters[] = "0123456789ABCDEF";
    368 		val = strchr(letters, toupper(c)) - letters;
    369 	repeat:
    370 		if (u <= max/base && u*base <= max - val)
    371 			continue;
    372 		if (tp->prop & TSIGNED) {
    373 			if (tp == inttype)
    374 				tp = (base==10) ? longtype : uinttype;
    375 			else if (tp == longtype)
    376 				tp = (base==10) ? llongtype : ulongtype;
    377 			else
    378 				goto overflow;
    379 		} else {
    380 			if (tp == uinttype)
    381 				tp = (sign==UNSIGNED) ? ulongtype : longtype;
    382 			else if (tp == ulongtype)
    383 				tp = (sign==UNSIGNED) ? ullongtype : llongtype;
    384 			else
    385 				goto overflow;
    386 		}
    387 		sym->type = tp;
    388 		lim = getlimits(tp);
    389 		max = lim->max.i;
    390 		goto repeat;
    391 	}
    392 
    393 	if (tp->prop & TSIGNED)
    394 		sym->u.i = u;
    395 	else
    396 		sym->u.u = u;
    397 
    398 	return sym;
    399 
    400 overflow:
    401 	errorp("overflow in integer constant");
    402 	return sym;
    403 }
    404 
    405 static int
    406 integer(int base)
    407 {
    408 	Type *tp;
    409 	Symbol *sym;
    410 	unsigned size, sign;
    411 
    412 	for (size = sign = 0; ; ++input->p) {
    413 		switch (toupper(*input->p)) {
    414 		case 'L':
    415 			if (size == LLONG)
    416 				goto wrong_type;
    417 			size = (size == LONG) ? LLONG : LONG;
    418 			continue;
    419 		case 'U':
    420 			if (sign == UNSIGNED)
    421 				goto wrong_type;
    422 			sign = UNSIGNED;
    423 			continue;
    424 		default:
    425 			goto convert;
    426 		wrong_type:
    427 			error("invalid suffix in integer constant");
    428 		}
    429 	}
    430 
    431 convert:
    432 	tok2str();
    433 	tp = ctype(INT, sign, size);
    434 	sym = newsym(NS_IDEN, NULL);
    435 	sym->type = tp;
    436 	sym->flags |= SCONSTANT;
    437 	yylval.sym = readint(base, sign, sym);
    438 	return CONSTANT;
    439 }
    440 
    441 static void
    442 digits(int base)
    443 {
    444 	char *p;
    445 	int c;
    446 
    447 	for (p = input->p; c = *p; ++p) {
    448 		switch (base) {
    449 		case 8:
    450 			if (!strchr("01234567", c))
    451 				goto end;
    452 			break;
    453 		case 10:
    454 			if (!isdigit(c))
    455 				goto end;
    456 			break;
    457 		case 16:
    458 			if (!isxdigit(c))
    459 				goto end;
    460 			break;
    461 		}
    462 	}
    463 end:
    464 	input->p = p;
    465 }
    466 
    467 static int
    468 number(void)
    469 {
    470 	Type *tp;
    471 	Symbol *sym;
    472 	int ch, base;
    473 	long double ld;
    474 
    475 	if (*input->p != '0') {
    476 		base = 10;
    477 	} else {
    478 		if (toupper(*++input->p) == 'X') {
    479 			++input->p;
    480 			base = 16;
    481 		} else {
    482 			base = 8;
    483 		}
    484 	}
    485 	digits(base);
    486 
    487 	if (*input->p != '.')
    488 		return integer(base);
    489 
    490 	sym = newsym(NS_IDEN, NULL);
    491 
    492 	ld = strtold(input->begin, &input->p);
    493 	switch (toupper(*input->p)) {
    494 	case 'F':
    495 		++input->p;
    496 		tp = floattype;
    497 		sym->u.f = ld;
    498 		break;
    499 	case 'L':
    500 		++input->p;
    501 		tp = ldoubletype;
    502 		sym->u.ld = ld;
    503 		break;
    504 	default:
    505 		tp = doubletype;
    506 		sym->u.d = ld;
    507 		break;
    508 	}
    509 
    510 	tok2str();
    511 	sym->type = tp;
    512 	sym->flags |= SCONSTANT;
    513 	yylval.sym = sym;
    514 	return CONSTANT;
    515 }
    516 
    517 static int
    518 escape(void)
    519 {
    520 	int c, d, i, cnt, base;
    521 
    522 	switch (*++input->p) {
    523 	case 'a':
    524 		return '\a';
    525 	case 'b':
    526 		return '\b';
    527 	case 'f':
    528 		return '\f';
    529 	case 'n':
    530 		return '\n';
    531 	case 'r':
    532 		return '\r';
    533 	case 't':
    534 		return '\t';
    535 	case 'v':
    536 		return '\v';
    537 	case '"':
    538 		return '"';
    539 	case '\'':
    540 		return '\'';
    541 	case '\\':
    542 		return '\\';
    543 	case '\?':
    544 		return '\?';
    545 	case 'u':
    546 		/*
    547 		 * FIXME: universal constants are not correctly handled
    548 		 */
    549 		if (!isdigit(*++input->p))
    550 			warn("incorrect digit for numerical character constant");
    551 		base = 10;
    552 		break;
    553 	case 'x':
    554 		if (!isxdigit(*++input->p))
    555 			warn("\\x used with no following hex digits");
    556 		cnt = 2;
    557 		base = 16;
    558 		break;
    559 	case '0':
    560 	case '1':
    561 	case '2':
    562 	case '3':
    563 	case '4':
    564 	case '5':
    565 	case '6':
    566 	case '7':
    567 		cnt = 3;
    568 		base = 8;
    569 		break;
    570 	default:
    571 		warn("unknown escape sequence");
    572 		return ' ';
    573 	}
    574 
    575 	for (c = i = 0; i < cnt; ++i) {
    576 		static char digits[] = "0123456789ABCDEF";
    577 		char *p = strchr(digits, toupper(*input->p));
    578 
    579 		if (!p || (d = p - digits) > base)
    580 			break;
    581 		c *= base;
    582 		c += d;
    583 		++input->p;
    584 	}
    585 	--input->p;
    586 
    587 	return c;
    588 }
    589 
    590 static Rune
    591 utf8rune(void)
    592 {
    593 	Rune wc;
    594 	unsigned c;
    595 	size_t i, len;
    596 
    597 	c = *input->p;
    598 	for (len = 0; c & 0x80; len++)
    599 		c <<= 1;
    600 	if (len == 0)
    601 		return c;
    602 	if (len == 1 || len == 8)
    603 		goto invalid;
    604 
    605 	wc = (c & 0xFF) >> len;
    606 	for (i = 0; i < len-1; i++) {
    607 		c = input->p[1];
    608 		if ((c & 0xC0) != 0x80)
    609 			goto invalid;
    610 		input->p++;
    611 		wc <<= 6;
    612 		wc |= c & 0x3F;
    613 	}
    614 	return wc;
    615 
    616 invalid:
    617 	errorp("invalid multibyte sequence");
    618 	return 0xFFFD;
    619 }
    620 
    621 static Rune
    622 decode(int multi)
    623 {
    624 	Rune r;
    625 
    626 	if (*input->p == '\\') {
    627 		r = escape();
    628 		return r;
    629 	}
    630 
    631 	return multi ? utf8rune() : *input->p;
    632 }
    633 
    634 static int
    635 character(void)
    636 {
    637 	int i, multi = 0;
    638 	Rune r, d;
    639 	Type *tp = inttype;
    640 	Symbol *sym;
    641 
    642 	if (*input->p == 'L') {
    643 		multi = 1;
    644 		tp = wchartype;
    645 		input->p++;
    646 	}
    647 
    648 	d = 0;
    649 	input->p++;
    650 	for (i = 0; *input->p != '\''; i++) {
    651 		r = decode(multi);
    652 		if (r > getlimits(tp)->max.i)
    653 			warn("character too large for enclosing character literal type");
    654 		d |= r;
    655 		input->p++;
    656 	}
    657 	input->p++;
    658 
    659 	if (i == 0)
    660 		errorp("empty character constant");
    661 	if (i > 1)
    662 		warn("multi-character character constant");
    663 
    664 	sym = newsym(NS_IDEN, NULL);
    665 	sym->u.i = d;
    666 	sym->type = tp;
    667 	yylval.sym = sym;
    668 	tok2str();
    669 	return CONSTANT;
    670 }
    671 
    672 /*
    673  * string() parses a constant string, and convert all the
    674  * escape sequences into single characters. This behaviour
    675  * is correct except when we parse a #define, where we want
    676  * to preserve the literal content of the string. In that
    677  * case cpp.c:/^define( sets the variable disescape to
    678  * disable converting escape sequences into characters.
    679  */
    680 static int
    681 string(void)
    682 {
    683 	char *bp = yytext;
    684 	int c, esc;
    685 
    686 	*bp++ = '"';
    687 	esc = 0;
    688 	for (++input->p; ; ++input->p) {
    689 		c = *input->p;
    690 
    691 		if (c == '"' && !esc)
    692 			break;
    693 
    694 		if (c == '\0') {
    695 			errorp("missing terminating '\"' character");
    696 			break;
    697 		}
    698 
    699 		esc = (c == '\\' && !esc && disescape);
    700 
    701 		if (c == '\\' && !esc)
    702 			c = escape();
    703 
    704 		if (bp == &yytext[STRINGSIZ+1]) {
    705 			/* too long, ignore everything until next quote */
    706 			for (++input->p; *input->p != '"'; ++input->p) {
    707 				if (*input->p == '\\')
    708 					++input->p;
    709 				if (*input->p == '\0')
    710 					break;
    711 			}
    712 			--bp;
    713 			errorp("string too long");
    714 			break;
    715 		}
    716 		*bp++ = c;
    717 	}
    718 
    719 	input->begin = ++input->p;
    720 	*bp = '\0';
    721 
    722 	yylen = bp - yytext + 1;
    723 	yylval.sym = newstring(yytext+1, yylen-1);
    724 	*bp++ = '"';
    725 	*bp = '\0';
    726 	return STRING;
    727 }
    728 
    729 static int
    730 iden(void)
    731 {
    732 	Symbol *sym;
    733 	char *p, *begin;
    734 
    735 	if (input->p[0] == 'L' && input->p[1] == '\'')
    736 		return character();
    737 
    738 	begin = input->p;
    739 	for (p = begin; isalnum(*p) || *p == '_'; ++p)
    740 		;
    741 	input->p = p;
    742 	tok2str();
    743 	if ((sym = lookup(NS_CPP, yytext, NOALLOC)) != NULL) {
    744 		if (expand(sym))
    745 			return next();
    746 	}
    747 	sym = lookup(namespace, yytext, ALLOC);
    748 	yylval.sym = sym;
    749 	if (sym->flags & SCONSTANT)
    750 		return CONSTANT;
    751 	if (sym->token != IDEN)
    752 		yylval.token = sym->u.token;
    753 	return sym->token;
    754 }
    755 
    756 static int
    757 follow(int expect, int ifyes, int ifno)
    758 {
    759 	if (*input->p++ == expect)
    760 		return ifyes;
    761 	--input->p;
    762 	return ifno;
    763 }
    764 
    765 static int
    766 minus(void)
    767 {
    768 	switch (*input->p++) {
    769 	case '-':
    770 		return DEC;
    771 	case '>':
    772 		return INDIR;
    773 	case '=':
    774 		return SUB_EQ;
    775 	default:
    776 		--input->p;
    777 		return '-';
    778 	}
    779 }
    780 
    781 static int
    782 plus(void)
    783 {
    784 	switch (*input->p++) {
    785 	case '+':
    786 		return INC;
    787 	case '=':
    788 		return ADD_EQ;
    789 	default:
    790 		--input->p;
    791 		return '+';
    792 	}
    793 }
    794 
    795 static int
    796 relational(int op, int equal, int shift, int assig)
    797 {
    798 	int c;
    799 
    800 	if ((c = *input->p++) == '=')
    801 		return equal;
    802 	if (c == op)
    803 		return follow('=', assig, shift);
    804 	--input->p;
    805 	return op;
    806 }
    807 
    808 static int
    809 logic(int op, int equal, int logic)
    810 {
    811 	int c;
    812 
    813 	if ((c = *input->p++) == '=')
    814 		return equal;
    815 	if (c == op)
    816 		return logic;
    817 	--input->p;
    818 	return op;
    819 }
    820 
    821 static int
    822 dot(void)
    823 {
    824 	int c;
    825 
    826 	if (isdigit(*input->p))
    827 		return number();
    828 	if ((c = *input->p) != '.')
    829 		return '.';
    830 	if ((c = *++input->p) != '.')
    831 		error("incorrect token '..'");
    832 	++input->p;
    833 	return ELLIPSIS;
    834 }
    835 
    836 static int
    837 operator(void)
    838 {
    839 	int t;
    840 
    841 	switch (t = *input->p++) {
    842 	case '<':
    843 		t = relational('<', LE, SHL, SHL_EQ);
    844 		break;
    845 	case '>':
    846 		t = relational('>', GE, SHR, SHR_EQ);
    847 		break;
    848 	case '&':
    849 		t = logic('&', AND_EQ, AND);
    850 		break;
    851 	case '|':
    852 		t = logic('|', OR_EQ, OR);
    853 		break;
    854 	case '=':
    855 		t = follow('=', EQ, '=');
    856 		break;
    857 	case '^':
    858 		t = follow('=', XOR_EQ, '^');
    859 		break;
    860 	case '*':
    861 		t = follow('=', MUL_EQ, '*');
    862 		break;
    863 	case '/':
    864 		t = follow('=', DIV_EQ, '/');
    865 		break;
    866 	case '%':
    867 		t = follow('=', MOD_EQ, '%');
    868 		break;
    869 	case '!':
    870 		t = follow('=', NE, '!');
    871 		break;
    872 	case '-':
    873 		t = minus();
    874 		break;
    875 	case '+':
    876 		t = plus();
    877 		break;
    878 	case '.':
    879 		t = dot();
    880 		break;
    881 	}
    882 	tok2str();
    883 	return t;
    884 }
    885 
    886 /* TODO: Ensure that namespace is NS_IDEN after a recovery */
    887 
    888 /*
    889  * skip all the spaces until the next token. When we are in
    890  * CPPMODE \n is not considered a whitespace
    891  */
    892 static int
    893 skipspaces(void)
    894 {
    895 	int c;
    896 
    897 	if (!input)
    898 		return EOF;
    899 
    900 	for (;;) {
    901 		switch (c = *input->p) {
    902 		case '\n':
    903 			if (lexmode == CPPMODE)
    904 				goto return_byte;
    905 			++input->p;
    906 		case '\0':
    907 			if (!moreinput())
    908 				return EOF;
    909 			break;
    910 		case ' ':
    911 		case '\t':
    912 		case '\v':
    913 		case '\r':
    914 		case '\f':
    915 			++input->p;
    916 			break;
    917 		default:
    918 			goto return_byte;
    919 		}
    920 	}
    921 
    922 return_byte:
    923 	input->begin = input->p;
    924 	return c;
    925 }
    926 
    927 int
    928 next(void)
    929 {
    930 	int c;
    931 
    932 	if ((c = skipspaces()) == EOF)
    933 		yytoken = EOFTOK;
    934 	else if (isalpha(c) || c == '_')
    935 		yytoken = iden();
    936 	else if (isdigit(c))
    937 		yytoken = number();
    938 	else if (c == '"')
    939 		yytoken = string();
    940 	else if (c == '\'')
    941 		yytoken = character();
    942 	else
    943 		yytoken = operator();
    944 
    945 	if (yytoken == EOFTOK) {
    946 		strcpy(yytext, "<EOF>");
    947 		if (cppctx && !input)
    948 			errorp("#endif expected");
    949 	}
    950 
    951 	DBG("TOKEN %s", yytext);
    952 	return yytoken;
    953 }
    954 
    955 void
    956 expect(int tok)
    957 {
    958 	if (yytoken != tok) {
    959 		if (isgraph(tok))
    960 			errorp("expected '%c' before '%s'", tok, yytext);
    961 		else
    962 			errorp("unexpected '%s'", yytext);
    963 	} else {
    964 		next();
    965 	}
    966 }
    967 
    968 int
    969 ahead(void)
    970 {
    971 	skipspaces();
    972 	return *input->begin;
    973 }
    974 
    975 void
    976 setsafe(int type)
    977 {
    978 	safe = type;
    979 }
    980 
    981 void
    982 discard(void)
    983 {
    984 	extern jmp_buf recover;
    985 	int c;
    986 
    987 	for (c = yytoken; ; c = *input->p++) {
    988 		switch (safe) {
    989 		case END_COMP:
    990 			if (c == '}')
    991 				goto jump;
    992 			goto semicolon;
    993 		case END_COND:
    994 			if (c == ')')
    995 				goto jump;
    996 			break;
    997 		case END_LDECL:
    998 			if (c == ',')
    999 				goto jump;
   1000 		case END_DECL:
   1001 		semicolon:
   1002 			if (c == ';')
   1003 				goto jump;
   1004 			break;
   1005 		}
   1006 		if ((c == '\0' || c == EOFTOK) && !moreinput())
   1007 			exit(EXIT_FAILURE);
   1008 	}
   1009 jump:
   1010 	input->begin = input->p;
   1011 	yytoken = c;
   1012 	yytext[0] = c;
   1013 	yytext[1] = '\0';
   1014 	exit(EXIT_FAILURE);
   1015 
   1016 	/*
   1017 	 * FIXME: We don't have a proper recover mechanism at this moment
   1018 	 * and we don't set the recover point ever, so executing this
   1019 	 * longjmp will generate surely a segmentation fault, so it does
   1020 	 * not make sense to do it. We just exit until we can find time
   1021 	 * to solve this problem.
   1022 	 */
   1023 	longjmp(recover, 1);
   1024 }