lex.c (18724B)
1 #include <assert.h> 2 #include <ctype.h> 3 #include <errno.h> 4 #include <limits.h> 5 #include <setjmp.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 10 #include <scc/cstd.h> 11 #include <scc/scc.h> 12 #include "cc1.h" 13 14 #define REPLACECHAR 0xFFFD 15 #define NOMULTICHAR 0 16 #define MULTICHAR 1 17 #define UTF8_MAX 4 18 19 int yytoken; 20 struct yystype yylval; 21 char yytext[STRINGSIZ+3]; 22 unsigned short yylen; 23 int lexmode = CCMODE; 24 unsigned lineno; 25 char filenam[FILENAME_MAX]; 26 27 int namespace = NS_IDEN; 28 static int safe; 29 Input *input; 30 31 void 32 setloc(char *fname, unsigned line) 33 { 34 size_t len; 35 36 if (fname) { 37 if ((len = strlen(fname)) >= FILENAME_MAX) 38 die("cc1: %s: file name too long", fname); 39 memmove(filenam, fname, len); 40 filenam[len] = '\0'; 41 42 /* 43 * There are cases where we want to call setloc() 44 * with the data in input, and then we have t be 45 * careful about freeing input->filenam 46 */ 47 if (fname != input->filenam) { 48 free(input->filenam); 49 input->filenam = xstrdup(fname); 50 } 51 } 52 53 lineno = input->lineno = line; 54 } 55 56 int 57 addinput(int type, void *arg, int fail) 58 { 59 FILE *fp; 60 char *extp, *fname, *buffer, *infile; 61 int infileln; 62 Macro *mp; 63 Symbol *sym; 64 Input *newip, *curip = input; 65 66 if (curip) 67 curip->lineno = lineno; 68 69 switch (type) { 70 case IMACRO: 71 fp = NULL; 72 mp = arg; 73 sym = mp->sym; 74 fname = mp->fname; 75 buffer = mp->buffer; 76 DBG("INPUT: expanding macro %s", sym->name); 77 break; 78 case IPARAM: 79 fp = NULL; 80 mp = NULL; 81 buffer = arg; 82 fname = filenam; 83 DBG("INPUT: macro parameter '%s'", buffer); 84 break; 85 case IFILE: 86 fname = arg; 87 mp = NULL; 88 buffer = NULL; 89 90 if ((fp = fopen(fname, "r")) == NULL) { 91 if (!fail) 92 return 0; 93 die("cc1: %s: %s", fname, strerror(errno)); 94 } 95 if (curip && onlyheader) { 96 infile = curip->filenam; 97 infileln = strlen(infile); 98 if (extp = strrchr(infile, '.')) 99 infileln -= strlen(extp); 100 printf("%.*s.o: %s %s\n", 101 infileln, infile, infile, fname); 102 } 103 lineno = 0; 104 DBG("INPUT: file input '%s'", fname); 105 break; 106 case ISTDIN: 107 fp = stdin; 108 mp = NULL; 109 fname = "<stdin>"; 110 buffer = NULL; 111 lineno = 0; 112 DBG("INPUT: file input 'stdin'"); 113 break; 114 default: 115 abort(); 116 } 117 118 if (!buffer) { 119 buffer = xmalloc(INPUTSIZ); 120 buffer[0] = '\0'; 121 } else { 122 buffer = xstrdup(buffer); 123 } 124 125 newip = xmalloc(sizeof(*newip)); 126 newip->next = curip; 127 newip->macro = mp; 128 newip->p = newip->begin = newip->line = buffer; 129 newip->filenam = NULL; 130 newip->lineno = 0; 131 newip->fp = fp; 132 newip->flags = type; 133 input = newip; 134 135 setloc(fname, lineno); 136 return 1; 137 } 138 139 void 140 delinput(void) 141 { 142 Input *ip = input; 143 144 switch (ip->flags & ITYPE) { 145 case IFILE: 146 DBG("INPUT: file finished '%s'", ip->filenam); 147 if (fclose(ip->fp)) 148 die("cc1: %s: %s", ip->filenam, strerror(errno)); 149 break; 150 case IMACRO: 151 DBG("INPUT: macro %s finished", ip->macro->sym->name); 152 delmacro(ip->macro); 153 break; 154 case IPARAM: 155 DBG("INPUT: macro param finished"); 156 break; 157 case ISTDIN: 158 DBG("INPUT: stdin finished"); 159 break; 160 default: 161 abort(); 162 } 163 164 input = ip->next; 165 free(ip->filenam); 166 free(ip->line); 167 free(ip); 168 if (input) 169 setloc(input->filenam, input->lineno); 170 } 171 172 static void 173 newline(void) 174 { 175 if (++lineno == 0) 176 die("cc1: %s: file too long", filenam); 177 } 178 179 /* 180 * Read the next character from the input file, counting number of lines 181 * and joining lines escaped with \ 182 */ 183 static int 184 readchar(void) 185 { 186 FILE *fp = input->fp; 187 int c; 188 189 repeat: 190 switch (c = getc(fp)) { 191 case '\\': 192 if ((c = getc(fp)) == '\n') { 193 newline(); 194 goto repeat; 195 } 196 ungetc(c, fp); 197 c = '\\'; 198 break; 199 case '\n': 200 newline(); 201 break; 202 case EOF: 203 break; 204 } 205 206 return c; 207 } 208 209 /* 210 * discard a C comment. This function is only called from readline 211 * because it is impossible to have a comment in a macro, because 212 * comments are always discarded before processing any cpp directive 213 */ 214 static void 215 comment(int type) 216 { 217 int c; 218 219 c = readchar(); 220 repeat: 221 for ( ; c != EOF && c != type; c = readchar()) 222 ; 223 224 if (c == EOF) { 225 errorp("unterminated comment"); 226 return; 227 } 228 229 if (type == '*' && (c = readchar()) != '/') 230 goto repeat; 231 } 232 233 /* 234 * readline is used to read a full logic line from a file. 235 * It discards comments and check that the line fits in 236 * the input buffer 237 */ 238 static int 239 readline(void) 240 { 241 char *bp, *lim; 242 int c, peekc = 0, delim = 0; 243 244 if (feof(input->fp)) { 245 input->flags |= IEOF; 246 *input->p = '\0'; 247 return 0; 248 } 249 250 *input->line = '\0'; 251 lim = &input->line[INPUTSIZ-1]; 252 for (bp = input->line; bp < lim-1; *bp++ = c) { 253 c = (peekc) ? peekc : readchar(); 254 peekc = 0; 255 if (c == '\n' || c == EOF) 256 break; 257 if (c == '\\') { 258 peekc = readchar(); 259 if (peekc == '\n' || peekc == EOF) 260 continue; 261 if (bp == lim-2) 262 break; 263 *bp++ = c; 264 c = peekc; 265 peekc = 0; 266 continue; 267 } 268 269 if (delim && c == delim) 270 delim = 0; 271 else if (!delim && (c == '"' || c == '\'')) 272 delim = c; 273 if (c != '/' || delim) 274 continue; 275 276 /* check for /* or // */ 277 peekc = readchar(); 278 if (peekc != '*' && peekc != '/') 279 continue; 280 281 if (peekc == '/') { 282 comment('\n'); 283 break; 284 } else { 285 comment('*'); 286 c = ' '; 287 } 288 peekc = 0; 289 } 290 291 input->begin = input->p = input->line; 292 if (bp == lim-1) { 293 errorp("line too long"); 294 --bp; 295 } 296 *bp++ = '\n'; 297 *bp = '\0'; 298 299 return 1; 300 } 301 302 /* 303 * moreinput gets more bytes to be passed to the lexer. 304 * It can take more bytes from macro expansions or 305 * directly reading from files. When a cpp directive 306 * is processed the line is discarded because it must not 307 * be passed to the lexer 308 */ 309 int 310 moreinput(void) 311 { 312 int wasexpand = 0; 313 314 repeat: 315 if (!input) 316 return 0; 317 318 if (*input->p == '\0') { 319 int t = input->flags & ITYPE; 320 if (t == IPARAM) { 321 input->flags |= IEOF; 322 return 0; 323 } 324 if (t == IMACRO) { 325 wasexpand = 1; 326 input->flags |= IEOF; 327 } 328 if (input->flags & IEOF) { 329 delinput(); 330 goto repeat; 331 } 332 if (!readline()) { 333 *input->p = '\0'; 334 goto repeat; 335 } 336 if (cpp()) 337 goto repeat; 338 } 339 340 if (onlycpp && !wasexpand) 341 ppragmaln(); 342 return 1; 343 } 344 345 static void 346 tok2str(void) 347 { 348 if ((yylen = input->p - input->begin) >= sizeof(yytext)) 349 error("token too big"); 350 memcpy(yytext, input->begin, yylen); 351 yytext[yylen] = '\0'; 352 input->begin = input->p; 353 } 354 355 static Symbol * 356 readint(int base, int sign, Symbol *sym) 357 { 358 char *s = yytext; 359 Type *tp = sym->type; 360 struct limits *lim; 361 unsigned long long u, val, max; 362 int c; 363 364 lim = getlimits(tp); 365 max = lim->max.i; 366 if (*s == '0') 367 ++s; 368 if (toupper(*s) == 'X') 369 ++s; 370 371 for (u = 0; isxdigit(c = *s++); u = u*base + val) { 372 static char letters[] = "0123456789ABCDEF"; 373 val = strchr(letters, toupper(c)) - letters; 374 repeat: 375 if (u <= max/base && u*base <= max - val) 376 continue; 377 if (tp->prop & TSIGNED) { 378 if (tp == inttype) 379 tp = (base==10) ? longtype : uinttype; 380 else if (tp == longtype) 381 tp = (base==10) ? llongtype : ulongtype; 382 else if (tp == llongtype && base != 10) 383 tp = ullongtype; 384 else 385 goto overflow; 386 } else { 387 if (tp == uinttype) 388 tp = (sign==UNSIGNED) ? ulongtype : longtype; 389 else if (tp == ulongtype) 390 tp = (sign==UNSIGNED) ? ullongtype : llongtype; 391 else 392 goto overflow; 393 } 394 sym->type = tp; 395 lim = getlimits(tp); 396 max = lim->max.i; 397 goto repeat; 398 } 399 400 if (tp->prop & TSIGNED) 401 sym->u.i = u; 402 else 403 sym->u.u = u; 404 405 return sym; 406 407 overflow: 408 errorp("overflow in integer constant"); 409 return sym; 410 } 411 412 static int 413 integer(int base) 414 { 415 Type *tp; 416 Symbol *sym; 417 unsigned size, sign; 418 419 for (size = sign = 0; ; ++input->p) { 420 switch (toupper(*input->p)) { 421 case 'L': 422 if (size == LLONG) 423 goto wrong_type; 424 size = (size == LONG) ? LLONG : LONG; 425 continue; 426 case 'U': 427 if (sign == UNSIGNED) 428 goto wrong_type; 429 sign = UNSIGNED; 430 continue; 431 default: 432 goto convert; 433 wrong_type: 434 error("invalid suffix in integer constant"); 435 } 436 } 437 438 convert: 439 tok2str(); 440 tp = ctype(INT, sign, size); 441 sym = newsym(NS_IDEN, NULL); 442 sym->type = tp; 443 sym->flags |= SCONSTANT; 444 yylval.sym = readint(base, sign, sym); 445 return CONSTANT; 446 } 447 448 static void 449 digits(int base) 450 { 451 char *p; 452 int c; 453 454 for (p = input->p; c = *p; ++p) { 455 switch (base) { 456 case 8: 457 if (!strchr("01234567", c)) 458 goto end; 459 break; 460 case 10: 461 if (!isdigit(c)) 462 goto end; 463 break; 464 case 16: 465 if (!isxdigit(c)) 466 goto end; 467 break; 468 } 469 } 470 end: 471 input->p = p; 472 } 473 474 static int 475 number(void) 476 { 477 Type *tp; 478 Symbol *sym; 479 int ch, base; 480 long double ld; 481 482 if (*input->p != '0') { 483 base = 10; 484 } else { 485 if (toupper(*++input->p) == 'X') { 486 ++input->p; 487 base = 16; 488 } else { 489 base = 8; 490 } 491 } 492 digits(base); 493 494 if (*input->p != '.') 495 return integer(base); 496 497 sym = newsym(NS_IDEN, NULL); 498 499 ld = strtold(input->begin, &input->p); 500 switch (toupper(*input->p)) { 501 case 'F': 502 ++input->p; 503 tp = floattype; 504 sym->u.f = ld; 505 break; 506 case 'L': 507 ++input->p; 508 tp = ldoubletype; 509 sym->u.ld = ld; 510 break; 511 default: 512 tp = doubletype; 513 sym->u.d = ld; 514 break; 515 } 516 517 tok2str(); 518 sym->type = tp; 519 sym->flags |= SCONSTANT; 520 yylval.sym = sym; 521 return CONSTANT; 522 } 523 524 static Rune 525 escape(int multi) 526 { 527 Rune c; 528 int uni, d, i, cnt, base; 529 530 switch (*++input->p) { 531 case 'a': 532 return '\a'; 533 case 'b': 534 return '\b'; 535 case 'f': 536 return '\f'; 537 case 'n': 538 return '\n'; 539 case 'r': 540 return '\r'; 541 case 't': 542 return '\t'; 543 case 'v': 544 return '\v'; 545 case '"': 546 return '"'; 547 case '\'': 548 return '\''; 549 case '\\': 550 return '\\'; 551 case '\?': 552 return '\?'; 553 case 'U': 554 cnt = 8; 555 goto check_universal; 556 case 'u': 557 cnt = 4; 558 check_universal: 559 if (!multi) 560 warn("multi-character character constant"); 561 ++input->p; 562 uni = 1; 563 base = 16; 564 break; 565 case 'x': 566 ++input->p; 567 uni = 0; 568 cnt = 2; 569 base = 16; 570 break; 571 case '0': 572 case '1': 573 case '2': 574 case '3': 575 case '4': 576 case '5': 577 case '6': 578 case '7': 579 uni = 0; 580 cnt = 3; 581 base = 8; 582 break; 583 default: 584 warn("unknown escape sequence"); 585 return ' '; 586 } 587 588 for (c = i = 0; i < cnt && isxdigit(*input->p); ++i) { 589 static char digits[] = "0123456789ABCDEF"; 590 char *p = strchr(digits, toupper(*input->p)); 591 592 if (!p || (d = p - digits) > base) 593 break; 594 c *= base; 595 c += d; 596 ++input->p; 597 } 598 --input->p; 599 600 if (base == 16 && i != cnt) { 601 if (uni) { 602 errorp("incorrect digit for universal character constant"); 603 c = REPLACECHAR; 604 } else { 605 errorp("\\x used with no following hex digits"); 606 c = ' '; 607 } 608 } 609 610 if (!uni) 611 return c; 612 613 if (c < 0xa0 && c != 0x24 && c != 0x40 && c != 0x60 614 || c >= 0xD800 && c <= 0xDFFF 615 || c >= 0x110000) { 616 errorp("invalid universal character constant"); 617 c = REPLACECHAR; 618 } 619 620 return c; 621 } 622 623 static int 624 validutf8(Rune wc, int *nbytes) 625 { 626 static struct range { 627 unsigned long begin, end; 628 int valid; 629 int nbytes; 630 } ranges[] = { 631 {0, 0x80, 1, 1}, 632 {0x80, 0x800, 1, 2}, 633 {0x800, 0xD800, 1, 3}, 634 {0xD800, 0xDD00, 0, 3}, 635 {0xDD00, 0x10000, 1, 3}, 636 {0x10000, 0x110000, 1, 4}, 637 {0x110000, -1ul, 0, 0}, 638 }; 639 struct range *bp; 640 641 for (bp = ranges; bp->begin > wc || bp->end <= wc; ++ 642 bp) 643 ; 644 *nbytes = bp->nbytes; 645 646 return bp->valid; 647 } 648 649 static Rune 650 utf8rune(void) 651 { 652 Rune wc; 653 int i, sh, n; 654 unsigned oc, c; 655 unsigned char *s = (unsigned char *) input->p; 656 657 /* fast track for ascii */ 658 if ((c = *s) < 0x80) 659 return c; 660 661 /* out of sequence multibyte? */ 662 if ((c & 0xc0) != 0xc0) 663 goto invalid; 664 665 sh = 1; 666 wc = 0; 667 oc = c << 1; 668 669 for (i = 0; i < UTF8_MAX; ++i) { 670 c = s[1]; 671 if ((c & 0xc0) != 0x80) 672 goto invalid; 673 ++s; 674 675 wc <<= 6; 676 wc |= c & 0x3f; 677 oc <<= 1; 678 sh++; 679 680 if ((oc & 0x80) == 0) { 681 oc = (oc & 0xff) >> sh; 682 wc |= oc << (sh-1) * 6; 683 684 if (!validutf8(wc, &n) || sh != n) 685 goto invalid; 686 goto return_code; 687 } 688 } 689 690 invalid: 691 errorp("invalid multibyte sequence"); 692 wc = REPLACECHAR; 693 694 return_code: 695 input->p = s; 696 return wc; 697 } 698 699 static Rune 700 decode(int multi) 701 { 702 Rune r; 703 704 if (*input->p == '\\') { 705 r = escape(multi); 706 return r; 707 } 708 709 return multi ? utf8rune() : *input->p; 710 } 711 712 static int 713 character(void) 714 { 715 int i, multi = NOMULTICHAR; 716 Rune r; 717 Type *tp; 718 Symbol *sym; 719 long long d; 720 struct limits *lim; 721 722 tp = chartype; 723 if (*input->p == 'L') { 724 multi = MULTICHAR; 725 input->p++; 726 tp = wchartype; 727 } 728 729 lim = getlimits(tp); 730 731 d = 0; 732 input->p++; 733 for (i = 0; *input->p != '\''; i++) { 734 r = decode(multi); 735 if (r > lim->max.i) 736 warn("character too large for enclosing character literal type"); 737 d |= r; 738 input->p++; 739 } 740 input->p++; 741 742 if (i == 0) 743 errorp("empty character constant"); 744 if (i > 1) 745 warn("multi-character character constant"); 746 747 if (tp == chartype) { 748 if (tp->prop & TSIGNED && d > lim->max.i) 749 d -= getlimits(uchartype)->max.i + 1; 750 tp = inttype; 751 } 752 753 sym = newsym(NS_IDEN, NULL); 754 sym->u.i = d; 755 sym->type = tp; 756 yylval.sym = sym; 757 tok2str(); 758 759 return CONSTANT; 760 } 761 762 static int 763 rstring(void) 764 { 765 char *beg = input->p++; 766 Rune c, buff[STRINGSIZ+1], *bp = buff; 767 768 for (++input->p; ; ++input->p) { 769 switch (*input->p) { 770 case '\0': 771 errorp("missing terminating '\"' character"); 772 case '"': 773 goto end_loop; 774 } 775 776 c = decode(MULTICHAR); 777 if (input->p - beg == STRINGSIZ + 1) { 778 /* too long, ignore everything until next quote */ 779 for (++input->p; *input->p != '"'; ++input->p) { 780 if (*input->p == '\\') 781 ++input->p; 782 if (*input->p == '\0') 783 break; 784 } 785 --bp; 786 errorp("string too long"); 787 break; 788 } 789 *bp++ = c; 790 } 791 792 end_loop: 793 *bp++ = '\0'; 794 input->p++; 795 796 yylval.sym = newrstring(buff, bp - buff); 797 tok2str(); 798 799 return STRING; 800 } 801 802 static int 803 sstring(void) 804 { 805 int c; 806 struct limits *lim; 807 char buff[STRINGSIZ+1], *bp = buff, *beg = input->p; 808 809 for (++input->p; ; ++input->p) { 810 switch (*input->p) { 811 case '\0': 812 errorp("missing terminating '\"' character"); 813 case '"': 814 goto end_loop; 815 } 816 817 c = decode(NOMULTICHAR); 818 if (input->p - beg == STRINGSIZ + 1) { 819 /* too long, ignore everything until next quote */ 820 for (++input->p; *input->p != '"'; ++input->p) { 821 if (*input->p == '\\') 822 ++input->p; 823 if (*input->p == '\0') 824 break; 825 } 826 --bp; 827 errorp("string too long"); 828 break; 829 } 830 *bp++ = c; 831 } 832 833 end_loop: 834 *bp++ = '\0'; 835 input->p++; 836 837 yylval.sym = newstring(buff, bp - buff); 838 tok2str(); 839 840 return STRING; 841 } 842 843 static int 844 string(void) 845 { 846 if (*input->p == 'L') 847 return rstring(); 848 return sstring(); 849 } 850 851 static int 852 iden(void) 853 { 854 Symbol *sym; 855 char *p, *begin; 856 857 if (input->p[0] == 'L' && input->p[1] == '\'') 858 return character(); 859 860 if (input->p[0] == 'L' && input->p[1] == '"') 861 return string(); 862 863 begin = input->p; 864 for (p = begin; isalnum(*p) || *p == '_'; ++p) 865 ; 866 input->p = p; 867 tok2str(); 868 if ((sym = lookup(NS_CPP, yytext, NOALLOC)) != NULL) { 869 if (expand(sym)) 870 return next(); 871 } 872 sym = lookup(namespace, yytext, ALLOC); 873 yylval.sym = sym; 874 if (sym->flags & SCONSTANT) 875 return CONSTANT; 876 if (sym->token != IDEN) 877 yylval.token = sym->u.token; 878 return sym->token; 879 } 880 881 static int 882 follow(int expect, int ifyes, int ifno) 883 { 884 if (*input->p++ == expect) 885 return ifyes; 886 --input->p; 887 return ifno; 888 } 889 890 static int 891 minus(void) 892 { 893 switch (*input->p++) { 894 case '-': 895 return DEC; 896 case '>': 897 return INDIR; 898 case '=': 899 return SUB_EQ; 900 default: 901 --input->p; 902 return '-'; 903 } 904 } 905 906 static int 907 plus(void) 908 { 909 switch (*input->p++) { 910 case '+': 911 return INC; 912 case '=': 913 return ADD_EQ; 914 default: 915 --input->p; 916 return '+'; 917 } 918 } 919 920 static int 921 relational(int op, int equal, int shift, int assig) 922 { 923 int c; 924 925 if ((c = *input->p++) == '=') 926 return equal; 927 if (c == op) 928 return follow('=', assig, shift); 929 --input->p; 930 return op; 931 } 932 933 static int 934 logic(int op, int equal, int logic) 935 { 936 int c; 937 938 if ((c = *input->p++) == '=') 939 return equal; 940 if (c == op) 941 return logic; 942 --input->p; 943 return op; 944 } 945 946 static int 947 dot(void) 948 { 949 int c; 950 951 if (isdigit(*input->p)) 952 return number(); 953 if ((c = *input->p) != '.') 954 return '.'; 955 if ((c = *++input->p) != '.') 956 error("incorrect token '..'"); 957 ++input->p; 958 return ELLIPSIS; 959 } 960 961 static int 962 operator(void) 963 { 964 int t; 965 966 switch (t = *input->p++) { 967 case '<': 968 t = relational('<', LE, SHL, SHL_EQ); 969 break; 970 case '>': 971 t = relational('>', GE, SHR, SHR_EQ); 972 break; 973 case '&': 974 t = logic('&', AND_EQ, AND); 975 break; 976 case '|': 977 t = logic('|', OR_EQ, OR); 978 break; 979 case '=': 980 t = follow('=', EQ, '='); 981 break; 982 case '^': 983 t = follow('=', XOR_EQ, '^'); 984 break; 985 case '*': 986 t = follow('=', MUL_EQ, '*'); 987 break; 988 case '/': 989 t = follow('=', DIV_EQ, '/'); 990 break; 991 case '%': 992 t = follow('=', MOD_EQ, '%'); 993 break; 994 case '!': 995 t = follow('=', NE, '!'); 996 break; 997 case '-': 998 t = minus(); 999 break; 1000 case '+': 1001 t = plus(); 1002 break; 1003 case '.': 1004 t = dot(); 1005 break; 1006 } 1007 tok2str(); 1008 return t; 1009 } 1010 1011 /* TODO: Ensure that namespace is NS_IDEN after a recovery */ 1012 1013 /* 1014 * skip all the spaces until the next token. When we are in 1015 * CPPMODE \n is not considered a whitespace 1016 */ 1017 static int 1018 skipspaces(void) 1019 { 1020 int c; 1021 1022 if (!input) 1023 return EOF; 1024 1025 for (;;) { 1026 switch (c = *input->p) { 1027 case '\n': 1028 if (lexmode == CPPMODE) 1029 goto return_byte; 1030 ++input->p; 1031 case '\0': 1032 if (!moreinput()) 1033 return EOF; 1034 break; 1035 case ' ': 1036 case '\t': 1037 case '\v': 1038 case '\r': 1039 case '\f': 1040 ++input->p; 1041 break; 1042 default: 1043 goto return_byte; 1044 } 1045 } 1046 1047 return_byte: 1048 input->begin = input->p; 1049 return c; 1050 } 1051 1052 int 1053 next(void) 1054 { 1055 int c; 1056 1057 if ((c = skipspaces()) == EOF) 1058 yytoken = EOFTOK; 1059 else if (isalpha(c) || c == '_') 1060 yytoken = iden(); 1061 else if (isdigit(c)) 1062 yytoken = number(); 1063 else if (c == '"') 1064 yytoken = string(); 1065 else if (c == '\'') 1066 yytoken = character(); 1067 else 1068 yytoken = operator(); 1069 1070 if (yytoken == EOFTOK) { 1071 strcpy(yytext, "<EOF>"); 1072 if (cppctx && !input) 1073 errorp("#endif expected"); 1074 } 1075 1076 DBG("TOKEN %s", yytext); 1077 return yytoken; 1078 } 1079 1080 void 1081 expect(int tok) 1082 { 1083 if (yytoken != tok) { 1084 if (isgraph(tok)) 1085 errorp("expected '%c' before '%s'", tok, yytext); 1086 else 1087 errorp("unexpected '%s'", yytext); 1088 } else { 1089 next(); 1090 } 1091 } 1092 1093 int 1094 ahead(void) 1095 { 1096 skipspaces(); 1097 return *input->begin; 1098 } 1099 1100 void 1101 setsafe(int type) 1102 { 1103 safe = type; 1104 } 1105 1106 void 1107 discard(void) 1108 { 1109 extern jmp_buf recover; 1110 int c; 1111 1112 if (!input) 1113 exit(EXIT_FAILURE); 1114 for (c = yytoken; ; c = *input->p++) { 1115 switch (safe) { 1116 case END_COMP: 1117 if (c == '}') 1118 goto jump; 1119 goto semicolon; 1120 case END_COND: 1121 if (c == ')') 1122 goto jump; 1123 break; 1124 case END_LDECL: 1125 if (c == ',') 1126 goto jump; 1127 case END_DECL: 1128 semicolon: 1129 if (c == ';') 1130 goto jump; 1131 break; 1132 } 1133 if ((c == '\0' || c == EOFTOK) && !moreinput()) 1134 exit(EXIT_FAILURE); 1135 } 1136 jump: 1137 input->begin = input->p; 1138 yytoken = c; 1139 yytext[0] = c; 1140 yytext[1] = '\0'; 1141 exit(EXIT_FAILURE); 1142 1143 /* 1144 * FIXME: We don't have a proper recover mechanism at this moment 1145 * and we don't set the recover point ever, so executing this 1146 * longjmp will generate surely a segmentation fault, so it does 1147 * not make sense to do it. We just exit until we can find time 1148 * to solve this problem. 1149 */ 1150 longjmp(recover, 1); 1151 }