scc

simple c99 compiler
git clone git://git.simple-cc.org/scc
Log | Files | Refs | README | LICENSE

lex.c (19285B)


      1 #include <assert.h>
      2 #include <ctype.h>
      3 #include <errno.h>
      4 #include <limits.h>
      5 #include <setjmp.h>
      6 #include <stdio.h>
      7 #include <stdlib.h>
      8 #include <string.h>
      9 
     10 #include <scc/cstd.h>
     11 #include <scc/scc.h>
     12 #include "cc1.h"
     13 
     14 #define REPLACECHAR 0xFFFD
     15 #define NOMULTICHAR 0
     16 #define MULTICHAR   1
     17 #define UTF8_MAX    4
     18 
     19 int yytoken;
     20 struct yystype yylval;
     21 char yytext[STRINGSIZ+3];
     22 unsigned short yylen;
     23 int yyspace, lexmode = CCMODE;
     24 unsigned lineno;
     25 char filenam[FILENAME_MAX];
     26 
     27 int namespace = NS_IDEN;
     28 static int safe;
     29 Input *input;
     30 
     31 void
     32 setloc(char *fname, unsigned line)
     33 {
     34 	size_t len;
     35 
     36 	if (fname) {
     37 		if ((len = strlen(fname)) >= FILENAME_MAX)
     38 			die("cc1: %s: file name too long", fname);
     39 		memmove(filenam, fname, len);
     40 		filenam[len] = '\0';
     41 
     42 		/*
     43 		 * There are cases where we want to call setloc()
     44 		 * with the data in input, and then we have t be
     45 		 * careful about freeing input->filenam
     46 		 */
     47 		if (fname != input->filenam) {
     48 			free(input->filenam);
     49 			input->filenam = xstrdup(fname);
     50 		}
     51 	}
     52 
     53 	lineno = input->lineno = line;
     54 }
     55 
     56 int
     57 addinput(int type, void *arg, int fail)
     58 {
     59 	FILE *fp;
     60 	char *extp, *fname, *buffer, *infile;
     61 	int infileln;
     62 	Macro *mp;
     63 	Symbol *sym, **hs;
     64 	Input *newip, *curip = input;
     65 
     66 	if (curip)
     67 		curip->lineno = lineno;
     68 
     69 	switch (type) {
     70 	case IMACRO:
     71 		fp = NULL;
     72 		mp = arg;
     73 		sym = mp->sym;
     74 		fname = mp->fname;
     75 		buffer = mp->buffer;
     76 		DBG("INPUT expanding macro %s", sym->name);
     77 		break;
     78 	case IPARAM:
     79 		fp = NULL;
     80 		mp = NULL;
     81 		buffer = arg;
     82 		fname = filenam;
     83 		DBG("INPUT macro parameter '%s'", buffer);
     84 		break;
     85 	case IFILE:
     86 		fname = arg;
     87 		mp = NULL;
     88 		buffer = NULL;
     89 
     90 		if ((fp = fopen(fname, "r")) == NULL) {
     91 			if (!fail)
     92 				return 0;
     93 			die("cc1: %s: %s", fname, strerror(errno));
     94 		}
     95 		if (curip && onlyheader) {
     96 			infile = curip->filenam;
     97 			infileln = strlen(infile);
     98 			if (extp = strrchr(infile, '.'))
     99 				infileln -= strlen(extp);
    100 			printf("%.*s.o: %s %s\n",
    101 			       infileln, infile, infile, fname);
    102 		}
    103 		lineno = 0;
    104 		DBG("INPUT file input '%s'", fname);
    105 		break;
    106 	case ISTDIN:
    107 		fp = stdin;
    108 		mp = NULL;
    109 		fname = "<stdin>";
    110 		buffer = NULL;
    111 		lineno = 0;
    112 		DBG("INPUT file input 'stdin'");
    113 		break;
    114 	default:
    115 		abort();
    116 	}
    117 
    118 	if (!buffer) {
    119 		buffer = xmalloc(INPUTSIZ);
    120 		buffer[0] = '\0';
    121 	} else {
    122 		buffer = xstrdup(buffer);
    123 	}
    124 
    125 	if (input)
    126 		unsethideset(input);
    127 
    128 	newip = xmalloc(sizeof(*newip));
    129 	newip->next = curip;
    130 	newip->macro = mp;
    131 	newip->p = newip->begin = newip->line = buffer;
    132 	newip->filenam = NULL;
    133 	newip->lineno = 0;
    134 	newip->fp = fp;
    135 	newip->flags = type;
    136 	memset(newip->hideset, 0, sizeof(newip->hideset));
    137 	input = newip;
    138 
    139 	hs = NULL;
    140 	if (curip)
    141 		hs = curip->hideset;
    142 	if (type == IMACRO)
    143 		hs = mp->hideset;
    144 	if (type == IPARAM && curip)
    145 		hs = curip->macro->hideset;
    146 	if (hs) {
    147 		memcpy(input->hideset, hs, sizeof(input->hideset));
    148 		sethideset(input);
    149 	}
    150 
    151 	setloc(fname, lineno);
    152 	return 1;
    153 }
    154 
    155 void
    156 delinput(void)
    157 {
    158 	Input *ip = input;
    159 
    160 	switch (ip->flags & ITYPE) {
    161 	case IFILE:
    162 		DBG("INPUT file finished '%s'", ip->filenam);
    163 		if (fclose(ip->fp))
    164 			die("cc1: %s: %s", ip->filenam, strerror(errno));
    165 		break;
    166 	case IMACRO:
    167 		DBG("INPUT macro %s finished", ip->macro->sym->name);
    168 		delmacro(ip->macro);
    169 		break;
    170 	case IPARAM:
    171 		DBG("INPUT macro param finished");
    172 		break;
    173 	case ISTDIN:
    174 		DBG("INPUT stdin finished");
    175 		break;
    176 	default:
    177 		abort();
    178 	}
    179 	unsethideset(input);
    180 
    181 	input = ip->next;
    182 	free(ip->filenam);
    183 	free(ip->line);
    184 	free(ip);
    185 	if (input) {
    186 		sethideset(input);
    187 		setloc(input->filenam, input->lineno);
    188 	}
    189 }
    190 
    191 static void
    192 newline(void)
    193 {
    194 	if (++lineno == 0)
    195 		die("cc1: %s: file too long", filenam);
    196 }
    197 
    198 /*
    199  * Read the next character from the input file, counting number of lines
    200  * and joining lines escaped with \
    201  */
    202 static int
    203 readchar(void)
    204 {
    205 	FILE *fp = input->fp;
    206 	int c;
    207 
    208 repeat:
    209 	switch (c = getc(fp)) {
    210 	case '\\':
    211 		if ((c = getc(fp)) == '\n') {
    212 			newline();
    213 			goto repeat;
    214 		}
    215 		ungetc(c, fp);
    216 		c = '\\';
    217 		break;
    218 	case '\n':
    219 		newline();
    220 		break;
    221 	case EOF:
    222 		break;
    223 	}
    224 
    225 	return c;
    226 }
    227 
    228 /*
    229  * discard a C comment. This function is only called from readline
    230  * because it is impossible to have a comment in a macro, because
    231  * comments are always discarded before processing any cpp directive
    232  */
    233 static void
    234 comment(int type)
    235 {
    236 	int c;
    237 
    238 	c = readchar();
    239 repeat:
    240 	for ( ; c != EOF && c != type; c = readchar())
    241 		;
    242 
    243 	if (c == EOF) {
    244 		errorp("unterminated comment");
    245 		return;
    246 	}
    247 
    248 	if (type == '*' && (c = readchar()) != '/')
    249 		goto repeat;
    250 }
    251 
    252 /*
    253  * readline is used to read a full logic line from a file.
    254  * It discards comments and check that the line fits in
    255  * the input buffer
    256  */
    257 static int
    258 readline(void)
    259 {
    260 	char *bp, *lim;
    261 	int c, peekc = 0, delim = 0;
    262 
    263 	if (feof(input->fp)) {
    264 		input->flags |= IEOF;
    265 		*input->p = '\0';
    266 		return 0;
    267 	}
    268 
    269 	*input->line = '\0';
    270 	lim = &input->line[INPUTSIZ-1];
    271 	for (bp = input->line; bp < lim-1; *bp++ = c) {
    272 		c = (peekc) ? peekc : readchar();
    273 		peekc = 0;
    274 		if (c == '\n' || c == EOF)
    275 			break;
    276 		if (c == '\\') {
    277 			peekc = readchar();
    278 			if (peekc == '\n' || peekc == EOF)
    279 				continue;
    280 			if (bp == lim-2)
    281 				break;
    282 			*bp++ = c;
    283 			c = peekc;
    284 			peekc = 0;
    285 			continue;
    286 		}
    287 
    288 		if (delim && c == delim)
    289 			delim = 0;
    290 		else if (!delim && (c == '"' || c == '\''))
    291 			delim = c;
    292 		if (c != '/' || delim)
    293 			continue;
    294 
    295 		/* check for /* or // */
    296 		peekc = readchar();
    297 		if (peekc != '*' && peekc != '/')
    298 			continue;
    299 
    300 		if (peekc == '/') {
    301 			comment('\n');
    302 			break;
    303 		} else {
    304 			comment('*');
    305 			c = ' ';
    306 		}
    307 		peekc = 0;
    308 	}
    309 
    310 	input->begin = input->p = input->line;
    311 	if (bp == lim-1) {
    312 		errorp("line too long");
    313 		--bp;
    314 	}
    315 	*bp++ = '\n';
    316 	*bp = '\0';
    317 
    318 	return 1;
    319 }
    320 
    321 /*
    322  * moreinput gets more bytes to be passed to the lexer.
    323  * It can take more bytes from macro expansions or
    324  * directly reading from files. When a cpp directive
    325  * is processed the line is discarded because it must not
    326  * be passed to the lexer
    327  */
    328 int
    329 moreinput(void)
    330 {
    331 	int wasexpand = 0;
    332 
    333 repeat:
    334 	if (!input)
    335 		return 0;
    336 
    337 	if (*input->p == '\0') {
    338 		int t = input->flags & ITYPE;
    339 		if (t == IPARAM) {
    340 			input->flags |= IEOF;
    341 			return 0;
    342 		}
    343 		if (t == IMACRO) {
    344 			wasexpand = 1;
    345 			input->flags |= IEOF;
    346 		}
    347 		if (input->flags & IEOF) {
    348 			delinput();
    349 			goto repeat;
    350 		}
    351 		if (!readline()) {
    352 			*input->p = '\0';
    353 			goto repeat;
    354 		}
    355 		if (cpp())
    356 			goto repeat;
    357 	}
    358 
    359 	if (onlycpp && !wasexpand)
    360 		ppragmaln();
    361 	return 1;
    362 }
    363 
    364 static void
    365 tok2str(void)
    366 {
    367 	if ((yylen = input->p - input->begin) >= sizeof(yytext))
    368 		error("token too big");
    369 	memcpy(yytext, input->begin, yylen);
    370 	yytext[yylen] = '\0';
    371 	input->begin = input->p;
    372 }
    373 
    374 static Symbol *
    375 readint(int base, int sign, Symbol *sym)
    376 {
    377 	char *s = yytext;
    378 	Type *tp = sym->type;
    379 	struct limits *lim;
    380 	unsigned long long u, val, max;
    381 	int c;
    382 
    383 	lim = getlimits(tp);
    384 	max = lim->max.i;
    385 	if (*s == '0')
    386 		++s;
    387 	if (toupper(*s) == 'X')
    388 		++s;
    389 
    390 	for (u = 0; isxdigit(c = *s++); u = u*base + val) {
    391 		static char letters[] = "0123456789ABCDEF";
    392 		val = strchr(letters, toupper(c)) - letters;
    393 	repeat:
    394 		if (u <= max/base && u*base <= max - val)
    395 			continue;
    396 		if (tp->prop & TSIGNED) {
    397 			if (tp == inttype)
    398 				tp = (base==10) ? longtype : uinttype;
    399 			else if (tp == longtype)
    400 				tp = (base==10) ? llongtype : ulongtype;
    401 			else if (tp == llongtype && base != 10)
    402 				tp = ullongtype;
    403 			else
    404 				goto overflow;
    405 		} else {
    406 			if (tp == uinttype)
    407 				tp = (sign==UNSIGNED) ? ulongtype : longtype;
    408 			else if (tp == ulongtype)
    409 				tp = (sign==UNSIGNED) ? ullongtype : llongtype;
    410 			else
    411 				goto overflow;
    412 		}
    413 		sym->type = tp;
    414 		lim = getlimits(tp);
    415 		max = lim->max.i;
    416 		goto repeat;
    417 	}
    418 
    419 	if (tp->prop & TSIGNED)
    420 		sym->u.i = u;
    421 	else
    422 		sym->u.u = u;
    423 
    424 	return sym;
    425 
    426 overflow:
    427 	errorp("overflow in integer constant");
    428 	return sym;
    429 }
    430 
    431 static int
    432 integer(int base)
    433 {
    434 	Type *tp;
    435 	Symbol *sym;
    436 	unsigned size, sign;
    437 
    438 	for (size = sign = 0; ; ++input->p) {
    439 		switch (toupper(*input->p)) {
    440 		case 'L':
    441 			if (size == LLONG)
    442 				goto wrong_type;
    443 			size = (size == LONG) ? LLONG : LONG;
    444 			continue;
    445 		case 'U':
    446 			if (sign == UNSIGNED)
    447 				goto wrong_type;
    448 			sign = UNSIGNED;
    449 			continue;
    450 		default:
    451 			goto convert;
    452 		wrong_type:
    453 			error("invalid suffix in integer constant");
    454 		}
    455 	}
    456 
    457 convert:
    458 	tok2str();
    459 	tp = ctype(INT, sign, size);
    460 	sym = newsym(NS_IDEN, NULL);
    461 	sym->type = tp;
    462 	sym->flags |= SCONSTANT;
    463 	yylval.sym = readint(base, sign, sym);
    464 	return CONSTANT;
    465 }
    466 
    467 static void
    468 digits(int base)
    469 {
    470 	char *p;
    471 	int c;
    472 
    473 	for (p = input->p; c = *p; ++p) {
    474 		switch (base) {
    475 		case 8:
    476 			if (!strchr("01234567", c))
    477 				goto end;
    478 			break;
    479 		case 10:
    480 			if (!isdigit(c))
    481 				goto end;
    482 			break;
    483 		case 16:
    484 			if (!isxdigit(c))
    485 				goto end;
    486 			break;
    487 		}
    488 	}
    489 end:
    490 	input->p = p;
    491 }
    492 
    493 static int
    494 number(void)
    495 {
    496 	Type *tp;
    497 	Symbol *sym;
    498 	int ch, base;
    499 	long double ld;
    500 
    501 	if (*input->p != '0') {
    502 		base = 10;
    503 	} else {
    504 		if (toupper(*++input->p) == 'X') {
    505 			++input->p;
    506 			base = 16;
    507 		} else {
    508 			base = 8;
    509 		}
    510 	}
    511 	digits(base);
    512 
    513 	if (*input->p != '.')
    514 		return integer(base);
    515 
    516 	sym = newsym(NS_IDEN, NULL);
    517 
    518 	ld = strtold(input->begin, &input->p);
    519 	switch (toupper(*input->p)) {
    520 	case 'F':
    521 		++input->p;
    522 		tp = floattype;
    523 		sym->u.f = ld;
    524 		break;
    525 	case 'L':
    526 		++input->p;
    527 		tp = ldoubletype;
    528 		sym->u.ld = ld;
    529 		break;
    530 	default:
    531 		tp = doubletype;
    532 		sym->u.d = ld;
    533 		break;
    534 	}
    535 
    536 	tok2str();
    537 	sym->type = tp;
    538 	sym->flags |= SCONSTANT;
    539 	yylval.sym = sym;
    540 	return CONSTANT;
    541 }
    542 
    543 static Rune
    544 escape(int multi)
    545 {
    546 	Rune c;
    547 	int uni, d, i, cnt, base;
    548 
    549 	switch (*++input->p) {
    550 	case 'a':
    551 		return '\a';
    552 	case 'b':
    553 		return '\b';
    554 	case 'f':
    555 		return '\f';
    556 	case 'n':
    557 		return '\n';
    558 	case 'r':
    559 		return '\r';
    560 	case 't':
    561 		return '\t';
    562 	case 'v':
    563 		return '\v';
    564 	case '"':
    565 		return '"';
    566 	case '\'':
    567 		return '\'';
    568 	case '\\':
    569 		return '\\';
    570 	case '\?':
    571 		return '\?';
    572 	case 'U':
    573 		cnt = 8;
    574 		goto check_universal;
    575 	case 'u':
    576 		cnt = 4;
    577 	check_universal:
    578 		if (!multi)
    579 			warn("multi-character character constant");
    580 		++input->p;
    581 		uni = 1;
    582 		base = 16;
    583 		break;
    584 	case 'x':
    585 		++input->p;
    586 		uni = 0;
    587 		cnt = 2;
    588 		base = 16;
    589 		break;
    590 	case '0':
    591 	case '1':
    592 	case '2':
    593 	case '3':
    594 	case '4':
    595 	case '5':
    596 	case '6':
    597 	case '7':
    598 		uni = 0;
    599 		cnt = 3;
    600 		base = 8;
    601 		break;
    602 	default:
    603 		warn("unknown escape sequence");
    604 		return ' ';
    605 	}
    606 
    607 	for (c = i = 0; i < cnt && isxdigit(*input->p); ++i) {
    608 		static char digits[] = "0123456789ABCDEF";
    609 		char *p = strchr(digits, toupper(*input->p));
    610 
    611 		if (!p || (d = p - digits) > base)
    612 			break;
    613 		c *= base;
    614 		c += d;
    615 		++input->p;
    616 	}
    617 	--input->p;
    618 
    619 	if (base == 16 && i != cnt) {
    620 		if (uni) {
    621 			errorp("incorrect digit for universal character constant");
    622 			c = REPLACECHAR;
    623 		} else {
    624 			errorp("\\x used with no following hex digits");
    625 			c = ' ';
    626 		}
    627 	}
    628 
    629 	if (!uni)
    630 		return c;
    631 
    632 	if (c < 0xa0 && c != 0x24 && c != 0x40 && c != 0x60
    633 	|| c >= 0xD800 && c <= 0xDFFF
    634 	|| c >= 0x110000) {
    635 		errorp("invalid universal character constant");
    636 		c = REPLACECHAR;
    637 	}
    638 
    639 	return c;
    640 }
    641 
    642 static int
    643 validutf8(Rune wc, int *nbytes)
    644 {
    645 	static struct range {
    646 		unsigned long begin, end;
    647 		int valid;
    648 		int nbytes;
    649 	} ranges[] = {
    650 		{0,         0x80,       1, 1},
    651 		{0x80,      0x800,      1, 2},
    652 		{0x800,     0xD800,     1, 3},
    653 		{0xD800,    0xDD00,     0, 3},
    654 		{0xDD00,    0x10000,    1, 3},
    655 		{0x10000,   0x110000,   1, 4},
    656 		{0x110000,  -1ul,       0, 0},
    657 	};
    658 	struct range *bp;
    659 
    660 	for (bp = ranges; bp->begin > wc || bp->end <= wc; ++
    661 bp)
    662 		;
    663 	*nbytes = bp->nbytes;
    664 
    665 	return bp->valid;
    666 }
    667 
    668 static Rune
    669 utf8rune(void)
    670 {
    671 	Rune wc;
    672 	int i, sh, n;
    673 	unsigned oc, c;
    674 	unsigned char *s = (unsigned char *) input->p;
    675 
    676 	/* fast track for ascii */
    677 	if ((c = *s) < 0x80)
    678 		return c;
    679 
    680 	/* out of sequence multibyte? */
    681 	if ((c & 0xc0) != 0xc0)
    682 		goto invalid;
    683 
    684 	sh = 1;
    685 	wc = 0;
    686 	oc = c << 1;
    687 
    688 	for (i = 0; i < UTF8_MAX; ++i) {
    689 		c = s[1];
    690 		if ((c & 0xc0) != 0x80)
    691 			goto invalid;
    692 		++s;
    693 
    694 		wc <<= 6;
    695 		wc |= c & 0x3f;
    696 		oc <<= 1;
    697 		sh++;
    698 
    699 		if ((oc & 0x80) == 0) {
    700 			oc = (oc & 0xff) >> sh;
    701 			wc |= oc << (sh-1) * 6;
    702 
    703 			if (!validutf8(wc, &n) || sh != n)
    704 				goto invalid;
    705 			goto return_code;
    706 		}
    707 	}
    708 
    709 invalid:
    710 	errorp("invalid multibyte sequence");
    711 	wc = REPLACECHAR;
    712 
    713 return_code:
    714 	input->p = s;
    715 	return wc;
    716 }
    717 
    718 static Rune
    719 decode(int multi)
    720 {
    721 	Rune r;
    722 
    723 	if (*input->p == '\\') {
    724 		r = escape(multi);
    725 		return r;
    726 	}
    727 
    728 	return multi ? utf8rune() : *input->p;
    729 }
    730 
    731 static int
    732 character(void)
    733 {
    734 	int i, multi = NOMULTICHAR;
    735 	Rune r;
    736 	Type *tp;
    737 	Symbol *sym;
    738 	long long d;
    739 	struct limits *lim;
    740 
    741 	tp = chartype;
    742 	if (*input->p == 'L') {
    743 		multi = MULTICHAR;
    744 		input->p++;
    745 		tp = wchartype;
    746 	}
    747 
    748 	lim = getlimits(tp);
    749 
    750 	d = 0;
    751 	input->p++;
    752 	for (i = 0; *input->p != '\''; i++) {
    753 		r = decode(multi);
    754 		if (r > lim->max.i)
    755 			warn("character too large for enclosing character literal type");
    756 		d |= r;
    757 		input->p++;
    758 	}
    759 	input->p++;
    760 
    761 	if (i == 0)
    762 		errorp("empty character constant");
    763 	if (i > 1)
    764 		warn("multi-character character constant");
    765 
    766 	if (tp == chartype) {
    767 		if (tp->prop & TSIGNED && d > lim->max.i)
    768 			d -= getlimits(uchartype)->max.i + 1;
    769 		tp = inttype;
    770 	}
    771 
    772 	sym = newsym(NS_IDEN, NULL);
    773 	sym->u.i = d;
    774 	sym->type = tp;
    775 	yylval.sym = sym;
    776 	tok2str();
    777 
    778 	return CONSTANT;
    779 }
    780 
    781 static int
    782 rstring(void)
    783 {
    784 	char *beg = input->p++;
    785 	Rune c, buff[STRINGSIZ+1], *bp = buff;
    786 
    787 	for (++input->p; ; ++input->p) {
    788 		switch (*input->p) {
    789 		case '\0':
    790 			errorp("missing terminating '\"' character");
    791 		case '"':
    792 			goto end_loop;
    793 		}
    794 
    795 		c = decode(MULTICHAR);
    796 		if (input->p - beg == STRINGSIZ + 1) {
    797 			/* too long, ignore everything until next quote */
    798 			for (++input->p; *input->p != '"'; ++input->p) {
    799 				if (*input->p == '\\')
    800 					++input->p;
    801 				if (*input->p == '\0')
    802 					break;
    803 			}
    804 			--bp;
    805 			errorp("string too long");
    806 			break;
    807 		}
    808 		*bp++ = c;
    809 	}
    810 
    811 end_loop:
    812 	*bp++ = '\0';
    813 	input->p++;
    814 
    815 	yylval.sym = newrstring(buff, bp - buff);
    816 	tok2str();
    817 
    818 	return STRING;
    819 }
    820 
    821 static int
    822 sstring(void)
    823 {
    824 	int c;
    825 	struct limits *lim;
    826 	char buff[STRINGSIZ+1], *bp = buff, *beg = input->p;
    827 
    828 	for (++input->p; ; ++input->p) {
    829 		switch (*input->p) {
    830 		case '\0':
    831 			errorp("missing terminating '\"' character");
    832 		case '"':
    833 			goto end_loop;
    834 		}
    835 
    836 		c = decode(NOMULTICHAR);
    837 		if (input->p - beg == STRINGSIZ + 1) {
    838 			/* too long, ignore everything until next quote */
    839 			for (++input->p; *input->p != '"'; ++input->p) {
    840 				if (*input->p == '\\')
    841 					++input->p;
    842 				if (*input->p == '\0')
    843 					break;
    844 			}
    845 			--bp;
    846 			errorp("string too long");
    847 			break;
    848 		}
    849 		*bp++ = c;
    850 	}
    851 
    852 end_loop:
    853 	*bp++ = '\0';
    854 	input->p++;
    855 
    856 	yylval.sym = newstring(buff, bp - buff);
    857 	tok2str();
    858 
    859 	return STRING;
    860 }
    861 
    862 static int
    863 string(void)
    864 {
    865 	if (*input->p == 'L')
    866 		return rstring();
    867 	return sstring();
    868 }
    869 
    870 static int
    871 iden(void)
    872 {
    873 	Symbol *sym;
    874 	char *p, *begin;
    875 
    876 	if (input->p[0] == 'L' && input->p[1] == '\'')
    877 		return character();
    878 
    879 	if (input->p[0] == 'L' && input->p[1] == '"')
    880 		return string();
    881 
    882 	begin = input->p;
    883 	for (p = begin; isalnum(*p) || *p == '_'; ++p)
    884 		;
    885 	input->p = p;
    886 	tok2str();
    887 
    888 	sym = lookup(NS_CPP, yytext, NOALLOC);
    889 	if ((yylval.cppsym = sym) != NULL) {
    890 		if (expand(sym))
    891 			return next();
    892 	}
    893 
    894 	sym = lookup(namespace, yytext, ALLOC);
    895 	yylval.sym = sym;
    896 	if (sym->flags & SCONSTANT)
    897 		return CONSTANT;
    898 	if (sym->token != IDEN)
    899 		yylval.token = sym->u.token;
    900 	return sym->token;
    901 }
    902 
    903 static int
    904 follow(int expect, int ifyes, int ifno)
    905 {
    906 	if (*input->p++ == expect)
    907 		return ifyes;
    908 	--input->p;
    909 	return ifno;
    910 }
    911 
    912 static int
    913 minus(void)
    914 {
    915 	switch (*input->p++) {
    916 	case '-':
    917 		return DEC;
    918 	case '>':
    919 		return INDIR;
    920 	case '=':
    921 		return SUB_EQ;
    922 	default:
    923 		--input->p;
    924 		return '-';
    925 	}
    926 }
    927 
    928 static int
    929 plus(void)
    930 {
    931 	switch (*input->p++) {
    932 	case '+':
    933 		return INC;
    934 	case '=':
    935 		return ADD_EQ;
    936 	default:
    937 		--input->p;
    938 		return '+';
    939 	}
    940 }
    941 
    942 static int
    943 relational(int op, int equal, int shift, int assig)
    944 {
    945 	int c;
    946 
    947 	if ((c = *input->p++) == '=')
    948 		return equal;
    949 	if (c == op)
    950 		return follow('=', assig, shift);
    951 	--input->p;
    952 	return op;
    953 }
    954 
    955 static int
    956 logic(int op, int equal, int logic)
    957 {
    958 	int c;
    959 
    960 	if ((c = *input->p++) == '=')
    961 		return equal;
    962 	if (c == op)
    963 		return logic;
    964 	--input->p;
    965 	return op;
    966 }
    967 
    968 static int
    969 dot(void)
    970 {
    971 	int c;
    972 
    973 	if (isdigit(*input->p))
    974 		return number();
    975 	if ((c = *input->p) != '.')
    976 		return '.';
    977 	if ((c = *++input->p) != '.')
    978 		error("incorrect token '..'");
    979 	++input->p;
    980 	return ELLIPSIS;
    981 }
    982 
    983 static int
    984 operator(void)
    985 {
    986 	int t;
    987 
    988 	switch (t = *input->p++) {
    989 	case '<':
    990 		t = relational('<', LE, SHL, SHL_EQ);
    991 		break;
    992 	case '>':
    993 		t = relational('>', GE, SHR, SHR_EQ);
    994 		break;
    995 	case '&':
    996 		t = logic('&', AND_EQ, AND);
    997 		break;
    998 	case '|':
    999 		t = logic('|', OR_EQ, OR);
   1000 		break;
   1001 	case '=':
   1002 		t = follow('=', EQ, '=');
   1003 		break;
   1004 	case '^':
   1005 		t = follow('=', XOR_EQ, '^');
   1006 		break;
   1007 	case '*':
   1008 		t = follow('=', MUL_EQ, '*');
   1009 		break;
   1010 	case '/':
   1011 		t = follow('=', DIV_EQ, '/');
   1012 		break;
   1013 	case '%':
   1014 		t = follow('=', MOD_EQ, '%');
   1015 		break;
   1016 	case '!':
   1017 		t = follow('=', NE, '!');
   1018 		break;
   1019 	case '-':
   1020 		t = minus();
   1021 		break;
   1022 	case '+':
   1023 		t = plus();
   1024 		break;
   1025 	case '.':
   1026 		t = dot();
   1027 		break;
   1028 	}
   1029 	tok2str();
   1030 	return t;
   1031 }
   1032 
   1033 /* TODO: Ensure that namespace is NS_IDEN after a recovery */
   1034 
   1035 /*
   1036  * skip all the spaces until the next token. When we are in
   1037  * CPPMODE \n is not considered a whitespace
   1038  */
   1039 static int
   1040 skipspaces(void)
   1041 {
   1042 	int c;
   1043 
   1044 	if (!input)
   1045 		return EOF;
   1046 
   1047 	for (;;) {
   1048 		switch (c = *input->p) {
   1049 		case '\0':
   1050 			if (!moreinput())
   1051 				return EOF;
   1052 			break;
   1053 		case '\n':
   1054 		case ' ':
   1055 		case '\t':
   1056 		case '\v':
   1057 		case '\r':
   1058 		case '\f':
   1059 			yyspace = c;
   1060 			if (c == '\n' && lexmode == CPPMODE)
   1061 				goto return_byte;
   1062 			++input->p;
   1063 			break;
   1064 		default:
   1065 			goto return_byte;
   1066 		}
   1067 	}
   1068 
   1069 return_byte:
   1070 	input->begin = input->p;
   1071 	return c;
   1072 }
   1073 
   1074 static int
   1075 lex(void)
   1076 {
   1077 	int c;
   1078 
   1079 repeat:
   1080 	switch (c = skipspaces()) {
   1081 	case EOF:
   1082 		return EOFTOK;
   1083 	case NOEXPAND:
   1084 	case EXPAND:
   1085 		disexpand += (c == NOEXPAND) ? +1 : -1;
   1086 		input->p++;
   1087 		goto repeat;
   1088 	case '_':
   1089 		return iden();
   1090 	case '"':
   1091 		return string();
   1092 	case '\'':
   1093 		return character();
   1094 	default:
   1095 		if (isalpha(c))
   1096 			return iden();
   1097 		if (isdigit(c))
   1098 			return number();
   1099 		return operator();
   1100 	}
   1101 }
   1102 
   1103 int
   1104 next(void)
   1105 {
   1106 	yytoken = lex();
   1107 
   1108 	if (yytoken == EOFTOK) {
   1109 		strcpy(yytext, "<EOF>");
   1110 		if (cppctx && !input)
   1111 			errorp("#endif expected");
   1112 	}
   1113 
   1114 	DBG("TOKEN %s", yytext);
   1115 	return yytoken;
   1116 }
   1117 
   1118 void
   1119 expect(int tok)
   1120 {
   1121 	if (yytoken != tok) {
   1122 		if (isgraph(tok))
   1123 			errorp("expected '%c' before '%s'", tok, yytext);
   1124 		else
   1125 			errorp("unexpected '%s'", yytext);
   1126 	} else {
   1127 		next();
   1128 	}
   1129 }
   1130 
   1131 int
   1132 ahead(void)
   1133 {
   1134 	skipspaces();
   1135 	return *input->begin;
   1136 }
   1137 
   1138 void
   1139 setsafe(int type)
   1140 {
   1141 	safe = type;
   1142 }
   1143 
   1144 void
   1145 discard(void)
   1146 {
   1147 	extern jmp_buf recover;
   1148 	int c;
   1149 
   1150 	if (!input)
   1151 		exit(EXIT_FAILURE);
   1152 	for (c = yytoken; ; c = *input->p++) {
   1153 		switch (safe) {
   1154 		case END_COMP:
   1155 			if (c == '}')
   1156 				goto jump;
   1157 			goto semicolon;
   1158 		case END_COND:
   1159 			if (c == ')')
   1160 				goto jump;
   1161 			break;
   1162 		case END_LDECL:
   1163 			if (c == ',')
   1164 				goto jump;
   1165 		case END_DECL:
   1166 		semicolon:
   1167 			if (c == ';')
   1168 				goto jump;
   1169 			break;
   1170 		}
   1171 		if ((c == '\0' || c == EOFTOK) && !moreinput())
   1172 			exit(EXIT_FAILURE);
   1173 	}
   1174 jump:
   1175 	input->begin = input->p;
   1176 	yytoken = c;
   1177 	yytext[0] = c;
   1178 	yytext[1] = '\0';
   1179 	exit(EXIT_FAILURE);
   1180 
   1181 	/*
   1182 	 * FIXME: We don't have a proper recover mechanism at this moment
   1183 	 * and we don't set the recover point ever, so executing this
   1184 	 * longjmp will generate surely a segmentation fault, so it does
   1185 	 * not make sense to do it. We just exit until we can find time
   1186 	 * to solve this problem.
   1187 	 */
   1188 	longjmp(recover, 1);
   1189 }