lex.c (19285B)
1 #include <assert.h> 2 #include <ctype.h> 3 #include <errno.h> 4 #include <limits.h> 5 #include <setjmp.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 10 #include <scc/cstd.h> 11 #include <scc/scc.h> 12 #include "cc1.h" 13 14 #define REPLACECHAR 0xFFFD 15 #define NOMULTICHAR 0 16 #define MULTICHAR 1 17 #define UTF8_MAX 4 18 19 int yytoken; 20 struct yystype yylval; 21 char yytext[STRINGSIZ+3]; 22 unsigned short yylen; 23 int yyspace, lexmode = CCMODE; 24 unsigned lineno; 25 char filenam[FILENAME_MAX]; 26 27 int namespace = NS_IDEN; 28 static int safe; 29 Input *input; 30 31 void 32 setloc(char *fname, unsigned line) 33 { 34 size_t len; 35 36 if (fname) { 37 if ((len = strlen(fname)) >= FILENAME_MAX) 38 die("cc1: %s: file name too long", fname); 39 memmove(filenam, fname, len); 40 filenam[len] = '\0'; 41 42 /* 43 * There are cases where we want to call setloc() 44 * with the data in input, and then we have t be 45 * careful about freeing input->filenam 46 */ 47 if (fname != input->filenam) { 48 free(input->filenam); 49 input->filenam = xstrdup(fname); 50 } 51 } 52 53 lineno = input->lineno = line; 54 } 55 56 int 57 addinput(int type, void *arg, int fail) 58 { 59 FILE *fp; 60 char *extp, *fname, *buffer, *infile; 61 int infileln; 62 Macro *mp; 63 Symbol *sym, **hs; 64 Input *newip, *curip = input; 65 66 if (curip) 67 curip->lineno = lineno; 68 69 switch (type) { 70 case IMACRO: 71 fp = NULL; 72 mp = arg; 73 sym = mp->sym; 74 fname = mp->fname; 75 buffer = mp->buffer; 76 DBG("INPUT expanding macro %s", sym->name); 77 break; 78 case IPARAM: 79 fp = NULL; 80 mp = NULL; 81 buffer = arg; 82 fname = filenam; 83 DBG("INPUT macro parameter '%s'", buffer); 84 break; 85 case IFILE: 86 fname = arg; 87 mp = NULL; 88 buffer = NULL; 89 90 if ((fp = fopen(fname, "r")) == NULL) { 91 if (!fail) 92 return 0; 93 die("cc1: %s: %s", fname, strerror(errno)); 94 } 95 if (curip && onlyheader) { 96 infile = curip->filenam; 97 infileln = strlen(infile); 98 if (extp = strrchr(infile, '.')) 99 infileln -= strlen(extp); 100 printf("%.*s.o: %s %s\n", 101 infileln, infile, infile, fname); 102 } 103 lineno = 0; 104 DBG("INPUT file input '%s'", fname); 105 break; 106 case ISTDIN: 107 fp = stdin; 108 mp = NULL; 109 fname = "<stdin>"; 110 buffer = NULL; 111 lineno = 0; 112 DBG("INPUT file input 'stdin'"); 113 break; 114 default: 115 abort(); 116 } 117 118 if (!buffer) { 119 buffer = xmalloc(INPUTSIZ); 120 buffer[0] = '\0'; 121 } else { 122 buffer = xstrdup(buffer); 123 } 124 125 if (input) 126 unsethideset(input); 127 128 newip = xmalloc(sizeof(*newip)); 129 newip->next = curip; 130 newip->macro = mp; 131 newip->p = newip->begin = newip->line = buffer; 132 newip->filenam = NULL; 133 newip->lineno = 0; 134 newip->fp = fp; 135 newip->flags = type; 136 memset(newip->hideset, 0, sizeof(newip->hideset)); 137 input = newip; 138 139 hs = NULL; 140 if (curip) 141 hs = curip->hideset; 142 if (type == IMACRO) 143 hs = mp->hideset; 144 if (type == IPARAM && curip) 145 hs = curip->macro->hideset; 146 if (hs) { 147 memcpy(input->hideset, hs, sizeof(input->hideset)); 148 sethideset(input); 149 } 150 151 setloc(fname, lineno); 152 return 1; 153 } 154 155 void 156 delinput(void) 157 { 158 Input *ip = input; 159 160 switch (ip->flags & ITYPE) { 161 case IFILE: 162 DBG("INPUT file finished '%s'", ip->filenam); 163 if (fclose(ip->fp)) 164 die("cc1: %s: %s", ip->filenam, strerror(errno)); 165 break; 166 case IMACRO: 167 DBG("INPUT macro %s finished", ip->macro->sym->name); 168 delmacro(ip->macro); 169 break; 170 case IPARAM: 171 DBG("INPUT macro param finished"); 172 break; 173 case ISTDIN: 174 DBG("INPUT stdin finished"); 175 break; 176 default: 177 abort(); 178 } 179 unsethideset(input); 180 181 input = ip->next; 182 free(ip->filenam); 183 free(ip->line); 184 free(ip); 185 if (input) { 186 sethideset(input); 187 setloc(input->filenam, input->lineno); 188 } 189 } 190 191 static void 192 newline(void) 193 { 194 if (++lineno == 0) 195 die("cc1: %s: file too long", filenam); 196 } 197 198 /* 199 * Read the next character from the input file, counting number of lines 200 * and joining lines escaped with \ 201 */ 202 static int 203 readchar(void) 204 { 205 FILE *fp = input->fp; 206 int c; 207 208 repeat: 209 switch (c = getc(fp)) { 210 case '\\': 211 if ((c = getc(fp)) == '\n') { 212 newline(); 213 goto repeat; 214 } 215 ungetc(c, fp); 216 c = '\\'; 217 break; 218 case '\n': 219 newline(); 220 break; 221 case EOF: 222 break; 223 } 224 225 return c; 226 } 227 228 /* 229 * discard a C comment. This function is only called from readline 230 * because it is impossible to have a comment in a macro, because 231 * comments are always discarded before processing any cpp directive 232 */ 233 static void 234 comment(int type) 235 { 236 int c; 237 238 c = readchar(); 239 repeat: 240 for ( ; c != EOF && c != type; c = readchar()) 241 ; 242 243 if (c == EOF) { 244 errorp("unterminated comment"); 245 return; 246 } 247 248 if (type == '*' && (c = readchar()) != '/') 249 goto repeat; 250 } 251 252 /* 253 * readline is used to read a full logic line from a file. 254 * It discards comments and check that the line fits in 255 * the input buffer 256 */ 257 static int 258 readline(void) 259 { 260 char *bp, *lim; 261 int c, peekc = 0, delim = 0; 262 263 if (feof(input->fp)) { 264 input->flags |= IEOF; 265 *input->p = '\0'; 266 return 0; 267 } 268 269 *input->line = '\0'; 270 lim = &input->line[INPUTSIZ-1]; 271 for (bp = input->line; bp < lim-1; *bp++ = c) { 272 c = (peekc) ? peekc : readchar(); 273 peekc = 0; 274 if (c == '\n' || c == EOF) 275 break; 276 if (c == '\\') { 277 peekc = readchar(); 278 if (peekc == '\n' || peekc == EOF) 279 continue; 280 if (bp == lim-2) 281 break; 282 *bp++ = c; 283 c = peekc; 284 peekc = 0; 285 continue; 286 } 287 288 if (delim && c == delim) 289 delim = 0; 290 else if (!delim && (c == '"' || c == '\'')) 291 delim = c; 292 if (c != '/' || delim) 293 continue; 294 295 /* check for /* or // */ 296 peekc = readchar(); 297 if (peekc != '*' && peekc != '/') 298 continue; 299 300 if (peekc == '/') { 301 comment('\n'); 302 break; 303 } else { 304 comment('*'); 305 c = ' '; 306 } 307 peekc = 0; 308 } 309 310 input->begin = input->p = input->line; 311 if (bp == lim-1) { 312 errorp("line too long"); 313 --bp; 314 } 315 *bp++ = '\n'; 316 *bp = '\0'; 317 318 return 1; 319 } 320 321 /* 322 * moreinput gets more bytes to be passed to the lexer. 323 * It can take more bytes from macro expansions or 324 * directly reading from files. When a cpp directive 325 * is processed the line is discarded because it must not 326 * be passed to the lexer 327 */ 328 int 329 moreinput(void) 330 { 331 int wasexpand = 0; 332 333 repeat: 334 if (!input) 335 return 0; 336 337 if (*input->p == '\0') { 338 int t = input->flags & ITYPE; 339 if (t == IPARAM) { 340 input->flags |= IEOF; 341 return 0; 342 } 343 if (t == IMACRO) { 344 wasexpand = 1; 345 input->flags |= IEOF; 346 } 347 if (input->flags & IEOF) { 348 delinput(); 349 goto repeat; 350 } 351 if (!readline()) { 352 *input->p = '\0'; 353 goto repeat; 354 } 355 if (cpp()) 356 goto repeat; 357 } 358 359 if (onlycpp && !wasexpand) 360 ppragmaln(); 361 return 1; 362 } 363 364 static void 365 tok2str(void) 366 { 367 if ((yylen = input->p - input->begin) >= sizeof(yytext)) 368 error("token too big"); 369 memcpy(yytext, input->begin, yylen); 370 yytext[yylen] = '\0'; 371 input->begin = input->p; 372 } 373 374 static Symbol * 375 readint(int base, int sign, Symbol *sym) 376 { 377 char *s = yytext; 378 Type *tp = sym->type; 379 struct limits *lim; 380 unsigned long long u, val, max; 381 int c; 382 383 lim = getlimits(tp); 384 max = lim->max.i; 385 if (*s == '0') 386 ++s; 387 if (toupper(*s) == 'X') 388 ++s; 389 390 for (u = 0; isxdigit(c = *s++); u = u*base + val) { 391 static char letters[] = "0123456789ABCDEF"; 392 val = strchr(letters, toupper(c)) - letters; 393 repeat: 394 if (u <= max/base && u*base <= max - val) 395 continue; 396 if (tp->prop & TSIGNED) { 397 if (tp == inttype) 398 tp = (base==10) ? longtype : uinttype; 399 else if (tp == longtype) 400 tp = (base==10) ? llongtype : ulongtype; 401 else if (tp == llongtype && base != 10) 402 tp = ullongtype; 403 else 404 goto overflow; 405 } else { 406 if (tp == uinttype) 407 tp = (sign==UNSIGNED) ? ulongtype : longtype; 408 else if (tp == ulongtype) 409 tp = (sign==UNSIGNED) ? ullongtype : llongtype; 410 else 411 goto overflow; 412 } 413 sym->type = tp; 414 lim = getlimits(tp); 415 max = lim->max.i; 416 goto repeat; 417 } 418 419 if (tp->prop & TSIGNED) 420 sym->u.i = u; 421 else 422 sym->u.u = u; 423 424 return sym; 425 426 overflow: 427 errorp("overflow in integer constant"); 428 return sym; 429 } 430 431 static int 432 integer(int base) 433 { 434 Type *tp; 435 Symbol *sym; 436 unsigned size, sign; 437 438 for (size = sign = 0; ; ++input->p) { 439 switch (toupper(*input->p)) { 440 case 'L': 441 if (size == LLONG) 442 goto wrong_type; 443 size = (size == LONG) ? LLONG : LONG; 444 continue; 445 case 'U': 446 if (sign == UNSIGNED) 447 goto wrong_type; 448 sign = UNSIGNED; 449 continue; 450 default: 451 goto convert; 452 wrong_type: 453 error("invalid suffix in integer constant"); 454 } 455 } 456 457 convert: 458 tok2str(); 459 tp = ctype(INT, sign, size); 460 sym = newsym(NS_IDEN, NULL); 461 sym->type = tp; 462 sym->flags |= SCONSTANT; 463 yylval.sym = readint(base, sign, sym); 464 return CONSTANT; 465 } 466 467 static void 468 digits(int base) 469 { 470 char *p; 471 int c; 472 473 for (p = input->p; c = *p; ++p) { 474 switch (base) { 475 case 8: 476 if (!strchr("01234567", c)) 477 goto end; 478 break; 479 case 10: 480 if (!isdigit(c)) 481 goto end; 482 break; 483 case 16: 484 if (!isxdigit(c)) 485 goto end; 486 break; 487 } 488 } 489 end: 490 input->p = p; 491 } 492 493 static int 494 number(void) 495 { 496 Type *tp; 497 Symbol *sym; 498 int ch, base; 499 long double ld; 500 501 if (*input->p != '0') { 502 base = 10; 503 } else { 504 if (toupper(*++input->p) == 'X') { 505 ++input->p; 506 base = 16; 507 } else { 508 base = 8; 509 } 510 } 511 digits(base); 512 513 if (*input->p != '.') 514 return integer(base); 515 516 sym = newsym(NS_IDEN, NULL); 517 518 ld = strtold(input->begin, &input->p); 519 switch (toupper(*input->p)) { 520 case 'F': 521 ++input->p; 522 tp = floattype; 523 sym->u.f = ld; 524 break; 525 case 'L': 526 ++input->p; 527 tp = ldoubletype; 528 sym->u.ld = ld; 529 break; 530 default: 531 tp = doubletype; 532 sym->u.d = ld; 533 break; 534 } 535 536 tok2str(); 537 sym->type = tp; 538 sym->flags |= SCONSTANT; 539 yylval.sym = sym; 540 return CONSTANT; 541 } 542 543 static Rune 544 escape(int multi) 545 { 546 Rune c; 547 int uni, d, i, cnt, base; 548 549 switch (*++input->p) { 550 case 'a': 551 return '\a'; 552 case 'b': 553 return '\b'; 554 case 'f': 555 return '\f'; 556 case 'n': 557 return '\n'; 558 case 'r': 559 return '\r'; 560 case 't': 561 return '\t'; 562 case 'v': 563 return '\v'; 564 case '"': 565 return '"'; 566 case '\'': 567 return '\''; 568 case '\\': 569 return '\\'; 570 case '\?': 571 return '\?'; 572 case 'U': 573 cnt = 8; 574 goto check_universal; 575 case 'u': 576 cnt = 4; 577 check_universal: 578 if (!multi) 579 warn("multi-character character constant"); 580 ++input->p; 581 uni = 1; 582 base = 16; 583 break; 584 case 'x': 585 ++input->p; 586 uni = 0; 587 cnt = 2; 588 base = 16; 589 break; 590 case '0': 591 case '1': 592 case '2': 593 case '3': 594 case '4': 595 case '5': 596 case '6': 597 case '7': 598 uni = 0; 599 cnt = 3; 600 base = 8; 601 break; 602 default: 603 warn("unknown escape sequence"); 604 return ' '; 605 } 606 607 for (c = i = 0; i < cnt && isxdigit(*input->p); ++i) { 608 static char digits[] = "0123456789ABCDEF"; 609 char *p = strchr(digits, toupper(*input->p)); 610 611 if (!p || (d = p - digits) > base) 612 break; 613 c *= base; 614 c += d; 615 ++input->p; 616 } 617 --input->p; 618 619 if (base == 16 && i != cnt) { 620 if (uni) { 621 errorp("incorrect digit for universal character constant"); 622 c = REPLACECHAR; 623 } else { 624 errorp("\\x used with no following hex digits"); 625 c = ' '; 626 } 627 } 628 629 if (!uni) 630 return c; 631 632 if (c < 0xa0 && c != 0x24 && c != 0x40 && c != 0x60 633 || c >= 0xD800 && c <= 0xDFFF 634 || c >= 0x110000) { 635 errorp("invalid universal character constant"); 636 c = REPLACECHAR; 637 } 638 639 return c; 640 } 641 642 static int 643 validutf8(Rune wc, int *nbytes) 644 { 645 static struct range { 646 unsigned long begin, end; 647 int valid; 648 int nbytes; 649 } ranges[] = { 650 {0, 0x80, 1, 1}, 651 {0x80, 0x800, 1, 2}, 652 {0x800, 0xD800, 1, 3}, 653 {0xD800, 0xDD00, 0, 3}, 654 {0xDD00, 0x10000, 1, 3}, 655 {0x10000, 0x110000, 1, 4}, 656 {0x110000, -1ul, 0, 0}, 657 }; 658 struct range *bp; 659 660 for (bp = ranges; bp->begin > wc || bp->end <= wc; ++ 661 bp) 662 ; 663 *nbytes = bp->nbytes; 664 665 return bp->valid; 666 } 667 668 static Rune 669 utf8rune(void) 670 { 671 Rune wc; 672 int i, sh, n; 673 unsigned oc, c; 674 unsigned char *s = (unsigned char *) input->p; 675 676 /* fast track for ascii */ 677 if ((c = *s) < 0x80) 678 return c; 679 680 /* out of sequence multibyte? */ 681 if ((c & 0xc0) != 0xc0) 682 goto invalid; 683 684 sh = 1; 685 wc = 0; 686 oc = c << 1; 687 688 for (i = 0; i < UTF8_MAX; ++i) { 689 c = s[1]; 690 if ((c & 0xc0) != 0x80) 691 goto invalid; 692 ++s; 693 694 wc <<= 6; 695 wc |= c & 0x3f; 696 oc <<= 1; 697 sh++; 698 699 if ((oc & 0x80) == 0) { 700 oc = (oc & 0xff) >> sh; 701 wc |= oc << (sh-1) * 6; 702 703 if (!validutf8(wc, &n) || sh != n) 704 goto invalid; 705 goto return_code; 706 } 707 } 708 709 invalid: 710 errorp("invalid multibyte sequence"); 711 wc = REPLACECHAR; 712 713 return_code: 714 input->p = s; 715 return wc; 716 } 717 718 static Rune 719 decode(int multi) 720 { 721 Rune r; 722 723 if (*input->p == '\\') { 724 r = escape(multi); 725 return r; 726 } 727 728 return multi ? utf8rune() : *input->p; 729 } 730 731 static int 732 character(void) 733 { 734 int i, multi = NOMULTICHAR; 735 Rune r; 736 Type *tp; 737 Symbol *sym; 738 long long d; 739 struct limits *lim; 740 741 tp = chartype; 742 if (*input->p == 'L') { 743 multi = MULTICHAR; 744 input->p++; 745 tp = wchartype; 746 } 747 748 lim = getlimits(tp); 749 750 d = 0; 751 input->p++; 752 for (i = 0; *input->p != '\''; i++) { 753 r = decode(multi); 754 if (r > lim->max.i) 755 warn("character too large for enclosing character literal type"); 756 d |= r; 757 input->p++; 758 } 759 input->p++; 760 761 if (i == 0) 762 errorp("empty character constant"); 763 if (i > 1) 764 warn("multi-character character constant"); 765 766 if (tp == chartype) { 767 if (tp->prop & TSIGNED && d > lim->max.i) 768 d -= getlimits(uchartype)->max.i + 1; 769 tp = inttype; 770 } 771 772 sym = newsym(NS_IDEN, NULL); 773 sym->u.i = d; 774 sym->type = tp; 775 yylval.sym = sym; 776 tok2str(); 777 778 return CONSTANT; 779 } 780 781 static int 782 rstring(void) 783 { 784 char *beg = input->p++; 785 Rune c, buff[STRINGSIZ+1], *bp = buff; 786 787 for (++input->p; ; ++input->p) { 788 switch (*input->p) { 789 case '\0': 790 errorp("missing terminating '\"' character"); 791 case '"': 792 goto end_loop; 793 } 794 795 c = decode(MULTICHAR); 796 if (input->p - beg == STRINGSIZ + 1) { 797 /* too long, ignore everything until next quote */ 798 for (++input->p; *input->p != '"'; ++input->p) { 799 if (*input->p == '\\') 800 ++input->p; 801 if (*input->p == '\0') 802 break; 803 } 804 --bp; 805 errorp("string too long"); 806 break; 807 } 808 *bp++ = c; 809 } 810 811 end_loop: 812 *bp++ = '\0'; 813 input->p++; 814 815 yylval.sym = newrstring(buff, bp - buff); 816 tok2str(); 817 818 return STRING; 819 } 820 821 static int 822 sstring(void) 823 { 824 int c; 825 struct limits *lim; 826 char buff[STRINGSIZ+1], *bp = buff, *beg = input->p; 827 828 for (++input->p; ; ++input->p) { 829 switch (*input->p) { 830 case '\0': 831 errorp("missing terminating '\"' character"); 832 case '"': 833 goto end_loop; 834 } 835 836 c = decode(NOMULTICHAR); 837 if (input->p - beg == STRINGSIZ + 1) { 838 /* too long, ignore everything until next quote */ 839 for (++input->p; *input->p != '"'; ++input->p) { 840 if (*input->p == '\\') 841 ++input->p; 842 if (*input->p == '\0') 843 break; 844 } 845 --bp; 846 errorp("string too long"); 847 break; 848 } 849 *bp++ = c; 850 } 851 852 end_loop: 853 *bp++ = '\0'; 854 input->p++; 855 856 yylval.sym = newstring(buff, bp - buff); 857 tok2str(); 858 859 return STRING; 860 } 861 862 static int 863 string(void) 864 { 865 if (*input->p == 'L') 866 return rstring(); 867 return sstring(); 868 } 869 870 static int 871 iden(void) 872 { 873 Symbol *sym; 874 char *p, *begin; 875 876 if (input->p[0] == 'L' && input->p[1] == '\'') 877 return character(); 878 879 if (input->p[0] == 'L' && input->p[1] == '"') 880 return string(); 881 882 begin = input->p; 883 for (p = begin; isalnum(*p) || *p == '_'; ++p) 884 ; 885 input->p = p; 886 tok2str(); 887 888 sym = lookup(NS_CPP, yytext, NOALLOC); 889 if ((yylval.cppsym = sym) != NULL) { 890 if (expand(sym)) 891 return next(); 892 } 893 894 sym = lookup(namespace, yytext, ALLOC); 895 yylval.sym = sym; 896 if (sym->flags & SCONSTANT) 897 return CONSTANT; 898 if (sym->token != IDEN) 899 yylval.token = sym->u.token; 900 return sym->token; 901 } 902 903 static int 904 follow(int expect, int ifyes, int ifno) 905 { 906 if (*input->p++ == expect) 907 return ifyes; 908 --input->p; 909 return ifno; 910 } 911 912 static int 913 minus(void) 914 { 915 switch (*input->p++) { 916 case '-': 917 return DEC; 918 case '>': 919 return INDIR; 920 case '=': 921 return SUB_EQ; 922 default: 923 --input->p; 924 return '-'; 925 } 926 } 927 928 static int 929 plus(void) 930 { 931 switch (*input->p++) { 932 case '+': 933 return INC; 934 case '=': 935 return ADD_EQ; 936 default: 937 --input->p; 938 return '+'; 939 } 940 } 941 942 static int 943 relational(int op, int equal, int shift, int assig) 944 { 945 int c; 946 947 if ((c = *input->p++) == '=') 948 return equal; 949 if (c == op) 950 return follow('=', assig, shift); 951 --input->p; 952 return op; 953 } 954 955 static int 956 logic(int op, int equal, int logic) 957 { 958 int c; 959 960 if ((c = *input->p++) == '=') 961 return equal; 962 if (c == op) 963 return logic; 964 --input->p; 965 return op; 966 } 967 968 static int 969 dot(void) 970 { 971 int c; 972 973 if (isdigit(*input->p)) 974 return number(); 975 if ((c = *input->p) != '.') 976 return '.'; 977 if ((c = *++input->p) != '.') 978 error("incorrect token '..'"); 979 ++input->p; 980 return ELLIPSIS; 981 } 982 983 static int 984 operator(void) 985 { 986 int t; 987 988 switch (t = *input->p++) { 989 case '<': 990 t = relational('<', LE, SHL, SHL_EQ); 991 break; 992 case '>': 993 t = relational('>', GE, SHR, SHR_EQ); 994 break; 995 case '&': 996 t = logic('&', AND_EQ, AND); 997 break; 998 case '|': 999 t = logic('|', OR_EQ, OR); 1000 break; 1001 case '=': 1002 t = follow('=', EQ, '='); 1003 break; 1004 case '^': 1005 t = follow('=', XOR_EQ, '^'); 1006 break; 1007 case '*': 1008 t = follow('=', MUL_EQ, '*'); 1009 break; 1010 case '/': 1011 t = follow('=', DIV_EQ, '/'); 1012 break; 1013 case '%': 1014 t = follow('=', MOD_EQ, '%'); 1015 break; 1016 case '!': 1017 t = follow('=', NE, '!'); 1018 break; 1019 case '-': 1020 t = minus(); 1021 break; 1022 case '+': 1023 t = plus(); 1024 break; 1025 case '.': 1026 t = dot(); 1027 break; 1028 } 1029 tok2str(); 1030 return t; 1031 } 1032 1033 /* TODO: Ensure that namespace is NS_IDEN after a recovery */ 1034 1035 /* 1036 * skip all the spaces until the next token. When we are in 1037 * CPPMODE \n is not considered a whitespace 1038 */ 1039 static int 1040 skipspaces(void) 1041 { 1042 int c; 1043 1044 if (!input) 1045 return EOF; 1046 1047 for (;;) { 1048 switch (c = *input->p) { 1049 case '\0': 1050 if (!moreinput()) 1051 return EOF; 1052 break; 1053 case '\n': 1054 case ' ': 1055 case '\t': 1056 case '\v': 1057 case '\r': 1058 case '\f': 1059 yyspace = c; 1060 if (c == '\n' && lexmode == CPPMODE) 1061 goto return_byte; 1062 ++input->p; 1063 break; 1064 default: 1065 goto return_byte; 1066 } 1067 } 1068 1069 return_byte: 1070 input->begin = input->p; 1071 return c; 1072 } 1073 1074 static int 1075 lex(void) 1076 { 1077 int c; 1078 1079 repeat: 1080 switch (c = skipspaces()) { 1081 case EOF: 1082 return EOFTOK; 1083 case NOEXPAND: 1084 case EXPAND: 1085 disexpand += (c == NOEXPAND) ? +1 : -1; 1086 input->p++; 1087 goto repeat; 1088 case '_': 1089 return iden(); 1090 case '"': 1091 return string(); 1092 case '\'': 1093 return character(); 1094 default: 1095 if (isalpha(c)) 1096 return iden(); 1097 if (isdigit(c)) 1098 return number(); 1099 return operator(); 1100 } 1101 } 1102 1103 int 1104 next(void) 1105 { 1106 yytoken = lex(); 1107 1108 if (yytoken == EOFTOK) { 1109 strcpy(yytext, "<EOF>"); 1110 if (cppctx && !input) 1111 errorp("#endif expected"); 1112 } 1113 1114 DBG("TOKEN %s", yytext); 1115 return yytoken; 1116 } 1117 1118 void 1119 expect(int tok) 1120 { 1121 if (yytoken != tok) { 1122 if (isgraph(tok)) 1123 errorp("expected '%c' before '%s'", tok, yytext); 1124 else 1125 errorp("unexpected '%s'", yytext); 1126 } else { 1127 next(); 1128 } 1129 } 1130 1131 int 1132 ahead(void) 1133 { 1134 skipspaces(); 1135 return *input->begin; 1136 } 1137 1138 void 1139 setsafe(int type) 1140 { 1141 safe = type; 1142 } 1143 1144 void 1145 discard(void) 1146 { 1147 extern jmp_buf recover; 1148 int c; 1149 1150 if (!input) 1151 exit(EXIT_FAILURE); 1152 for (c = yytoken; ; c = *input->p++) { 1153 switch (safe) { 1154 case END_COMP: 1155 if (c == '}') 1156 goto jump; 1157 goto semicolon; 1158 case END_COND: 1159 if (c == ')') 1160 goto jump; 1161 break; 1162 case END_LDECL: 1163 if (c == ',') 1164 goto jump; 1165 case END_DECL: 1166 semicolon: 1167 if (c == ';') 1168 goto jump; 1169 break; 1170 } 1171 if ((c == '\0' || c == EOFTOK) && !moreinput()) 1172 exit(EXIT_FAILURE); 1173 } 1174 jump: 1175 input->begin = input->p; 1176 yytoken = c; 1177 yytext[0] = c; 1178 yytext[1] = '\0'; 1179 exit(EXIT_FAILURE); 1180 1181 /* 1182 * FIXME: We don't have a proper recover mechanism at this moment 1183 * and we don't set the recover point ever, so executing this 1184 * longjmp will generate surely a segmentation fault, so it does 1185 * not make sense to do it. We just exit until we can find time 1186 * to solve this problem. 1187 */ 1188 longjmp(recover, 1); 1189 }