parser.c (14397B)
1 #include <errno.h> 2 #include <limits.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 7 #include <scc/cstd.h> 8 #include <scc/scc.h> 9 10 #include "cc2.h" 11 12 #define STACKSIZ 50 13 14 Type funetype = { 15 .flags = FUNF | ELLIPS 16 }; 17 18 Type funtype = { 19 .flags = FUNF 20 }; 21 22 union tokenop { 23 void *arg; 24 unsigned op; 25 }; 26 27 Node *laststmt; 28 static Swtch swtbl[NR_BLOCK], *swp = swtbl; 29 static Symbol *lastfun; 30 31 typedef void parsefun(char *, union tokenop); 32 static parsefun type, symbol, getname, unary, binary, ternary, call, 33 constant, composed, binit, einit, 34 jump, oreturn, loop, assign, 35 ocase, bswitch, eswitch, builtin; 36 37 typedef void evalfun(void); 38 static evalfun vardecl, beginfun, endfun, endpars, stmt, 39 array, aggregate, flddecl, labeldcl; 40 41 static struct decoc { 42 void (*eval)(void); 43 void (*parse)(char *token, union tokenop); 44 union tokenop u; 45 } optbl[] = { /* eval parse args */ 46 ['A'] = { vardecl, symbol, .u.op = SAUTO<<8 | OAUTO}, 47 ['R'] = { vardecl, symbol, .u.op = SREG<<8 | OREG}, 48 ['G'] = { vardecl, symbol, .u.op = SGLOB<<8 | OMEM}, 49 ['X'] = { vardecl, symbol, .u.op = SEXTRN<<8 | OMEM}, 50 ['Y'] = { vardecl, symbol, .u.op = SPRIV<<8 | OMEM}, 51 ['T'] = { vardecl, symbol, .u.op = SLOCAL<<8 | OMEM}, 52 ['M'] = { flddecl, symbol, .u.op = SMEMB<<8 | OMEM}, 53 ['L'] = { labeldcl, symbol, .u.op = SLABEL<<8 | OLABEL}, 54 55 ['C'] = { NULL, type, .u.arg = &int8type}, 56 ['I'] = { NULL, type, .u.arg = &int16type}, 57 ['W'] = { NULL, type, .u.arg = &int32type}, 58 ['Q'] = { NULL, type, .u.arg = &int64type}, 59 ['K'] = { NULL, type, .u.arg = &uint8type}, 60 ['N'] = { NULL, type, .u.arg = &uint16type}, 61 ['Z'] = { NULL, type, .u.arg = &uint32type}, 62 ['O'] = { NULL, type, .u.arg = &uint64type}, 63 ['J'] = { NULL, type, .u.arg = &float32type}, 64 ['D'] = { NULL, type, .u.arg = &float64type}, 65 ['H'] = { NULL, type, .u.arg = &float80type}, 66 ['0'] = { NULL, type, .u.arg = &voidtype}, 67 ['B'] = { NULL, type, .u.arg = &booltype}, 68 ['P'] = { NULL, type, .u.arg = &ptrtype}, 69 ['E'] = { NULL, type, .u.arg = &funetype}, 70 ['1'] = { NULL, type, .u.arg = &arg_type}, 71 72 ['F'] = { NULL, type, .u.arg = &funtype}, 73 ['V'] = { array,composed, 0}, 74 ['U'] = {aggregate,composed, 0}, 75 ['S'] = {aggregate,composed, 0}, 76 77 ['"'] = { NULL, getname, 0}, 78 ['{'] = { beginfun, NULL, 0}, 79 ['}'] = { endfun, NULL, 0}, 80 ['('] = { NULL, binit, 0}, 81 [')'] = { NULL, einit, 0}, 82 ['\\'] = { endpars, NULL, 0}, 83 ['\t'] = { stmt, NULL, 0}, 84 85 ['~'] = { NULL, unary, .u.op = OCPL}, 86 ['_'] = { NULL, unary, .u.op = OSNEG}, 87 ['\''] = { NULL, unary, .u.op = OADDR}, 88 ['@'] = { NULL, unary, .u.op = OPTR}, 89 ['g'] = { NULL, unary, .u.op = OCAST}, 90 ['p'] = { NULL, unary, .u.op = OPAR}, 91 ['n'] = { NULL, unary, .u.op = ONEG}, 92 93 ['a'] = { NULL, binary, .u.op = OAND}, 94 ['o'] = { NULL, binary, .u.op = OOR}, 95 ['.'] = { NULL, binary, .u.op = OFIELD}, 96 ['+'] = { NULL, binary, .u.op = OADD}, 97 ['-'] = { NULL, binary, .u.op = OSUB}, 98 ['*'] = { NULL, binary, .u.op = OMUL}, 99 ['%'] = { NULL, binary, .u.op = OMOD}, 100 ['/'] = { NULL, binary, .u.op = ODIV}, 101 ['l'] = { NULL, binary, .u.op = OSHL}, 102 ['r'] = { NULL, binary, .u.op = OSHR}, 103 ['<'] = { NULL, binary, .u.op = OLT}, 104 ['>'] = { NULL, binary, .u.op = OGT}, 105 ['['] = { NULL, binary, .u.op = OLE}, 106 [']'] = { NULL, binary, .u.op = OGE}, 107 ['='] = { NULL, binary, .u.op = OEQ}, 108 ['!'] = { NULL, binary, .u.op = ONE}, 109 ['&'] = { NULL, binary, .u.op = OBAND}, 110 ['|'] = { NULL, binary, .u.op = OBOR}, 111 ['^'] = { NULL, binary, .u.op = OBXOR}, 112 [','] = { NULL, binary, .u.op = OCOMMA}, 113 ['m'] = { NULL, builtin,.u.op = OBUILTIN}, 114 115 [':'] = { NULL, assign, .u.op = OASSIG}, 116 ['?'] = { NULL, ternary, .u.op = OASK}, 117 ['c'] = { NULL, call, .u.op = OCALL}, 118 ['z'] = { NULL, call, .u.op = OCALLE}, 119 120 ['#'] = { NULL,constant, .u.op = OCONST}, 121 122 ['j'] = { NULL, jump, .u.op = OJMP}, 123 ['y'] = { NULL, jump, .u.op = OBRANCH}, 124 ['h'] = { NULL, oreturn, .u.op = ORET}, 125 ['i'] = { NULL, NULL, .u.op = OINC}, 126 ['d'] = { NULL, NULL, .u.op = ODEC}, 127 128 ['b'] = { NULL, loop, .u.op = OBLOOP}, 129 ['e'] = { NULL, loop, .u.op = OELOOP}, 130 131 ['v'] = { NULL, ocase, .u.op = OCASE}, 132 ['f'] = { NULL, ocase, .u.op = ODEFAULT}, 133 ['t'] = { NULL, eswitch, .u.op = OESWITCH}, 134 ['s'] = { NULL, bswitch, .u.op = OBSWITCH}, 135 }; 136 137 static int sclass, inpars, ininit, beginf, endf, lineno; 138 static void *stack[STACKSIZ], **sp = stack; 139 140 static Node * 141 push(void *elem) 142 { 143 if (sp == &stack[STACKSIZ]) 144 error(ESTACKO); 145 return *sp++ = elem; 146 } 147 148 static void * 149 pop(void) 150 { 151 if (sp == stack) 152 error(ESTACKU); 153 return *--sp; 154 } 155 156 static int 157 empty(void) 158 { 159 return sp == stack; 160 } 161 162 static void 163 type(char *token, union tokenop u) 164 { 165 push(u.arg); 166 } 167 168 static void 169 composed(char *token, union tokenop u) 170 { 171 Symbol *sym; 172 173 sym = getsym(atoi(token+1)); 174 sym->type.id = sym->id; 175 push(&sym->type); 176 } 177 178 static void 179 getname(char *t, union tokenop u) 180 { 181 push((*++t) ? xstrdup(t) : NULL); 182 } 183 184 static void 185 symbol(char *token, union tokenop u) 186 { 187 Node *np = node(u.op & 0xFF); 188 Symbol *sym = getsym(atoi(token+1)); 189 190 sclass = u.op >> 8; 191 np->u.sym = sym; 192 np->type = sym->type; 193 push(np); 194 } 195 196 static Type * 197 gettype(char *token) 198 { 199 struct decoc *dp; 200 201 dp = &optbl[*token]; 202 if (!dp->parse) 203 error(ESYNTAX); 204 (*dp->parse)(token, dp->u); 205 return pop(); 206 } 207 208 static void 209 constant(char *token, union tokenop u) 210 { 211 static char letters[] = "0123456789ABCDEF"; 212 Node *np; 213 TUINT v; 214 unsigned c; 215 216 ++token; 217 if (*token == '"') { 218 ++token; 219 np = node(OSTRING); 220 np->type.flags = STRF; 221 np->type.size = strlen(token); 222 np->type.align = int8type.align; 223 np->u.s = xstrdup(token); 224 } else { 225 np = node(OCONST); 226 np->type = *gettype(token++); 227 for (v = 0; c = *token++; v += c) { 228 v <<= 4; 229 c = strchr(letters, c) - letters; 230 } 231 np->u.i = v; 232 } 233 push(np); 234 } 235 236 static void 237 assign(char *token, union tokenop u) 238 { 239 int subop; 240 Node *np = node(u.op); 241 242 switch (subop = *++token) { 243 case '+': 244 case '-': 245 case '*': 246 case '%': 247 case '/': 248 case 'l': 249 case 'r': 250 case '&': 251 case '|': 252 case '^': 253 case 'i': 254 case 'd': 255 ++token; 256 subop = optbl[subop].u.op; 257 break; 258 default: 259 subop = 0; 260 break; 261 } 262 263 np->u.subop = subop; 264 np->type = *gettype(token); 265 np->right = pop(); 266 np->left = pop(); 267 push(np); 268 } 269 270 static void 271 ternary(char *token, union tokenop u) 272 { 273 Node *ask = node(OASK), *colon = node(OCOLON); 274 Type *tp = gettype(token+1); 275 276 colon->right = pop(); 277 colon->left = pop(); 278 279 ask->type = *tp; 280 ask->left = pop(); 281 ask->right = colon; 282 push(ask); 283 } 284 285 static void 286 eval(char *tok) 287 { 288 struct decoc *dp; 289 290 do { 291 dp = &optbl[*tok]; 292 if (!dp->parse) 293 break; 294 (*dp->parse)(tok, dp->u); 295 } while (tok = strtok(NULL, "\t\n")); 296 } 297 298 static int 299 nextline(void) 300 { 301 static char line[LINESIZ]; 302 size_t len; 303 int c; 304 void (*fun)(void); 305 306 repeat: 307 ++lineno; 308 if (!fgets(line, sizeof(line), stdin)) 309 return 0; 310 if ((len = strlen(line)) == 0 || line[0] == '\n') 311 goto repeat; 312 if (line[len-1] != '\n') 313 error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE); 314 line[len-1] = '\0'; 315 316 c = *line; 317 eval(strtok(line, "\t\n")); 318 if ((fun = *optbl[c].eval) != NULL) 319 (*fun)(); 320 if (sp != stack) 321 error(ESTACKA); 322 return 1; 323 } 324 325 static void 326 oreturn(char *token, union tokenop u) 327 { 328 Node *np = node(u.op); 329 330 if (token = strtok(NULL, "\t\n")) 331 eval(token); 332 if (!empty()) 333 np->left = pop(); 334 push(np); 335 } 336 337 static void 338 addcase(Node *np) 339 { 340 TINT val; 341 Node **bp; 342 Swtch *cur; 343 344 if (swp == swtbl) 345 error(EWTACKU); 346 347 cur = swp - 1; 348 if (np->op == ODEFAULT) { 349 cur->defnode = np; 350 } else { 351 cur->nr++; 352 bp = cur->cases; 353 bp = xrealloc(bp, cur->nr * sizeof(Node *)); 354 bp[cur->nr-1] = np; 355 cur->cases = bp; 356 357 val = np->left->u.i; 358 if (val < cur->min) 359 cur->min = val; 360 if (val > cur->max) 361 cur->max = val; 362 } 363 } 364 365 static void 366 bswitch(char *token, union tokenop u) 367 { 368 Swtch *cur; 369 Node *np = node(u.op); 370 371 if (swp == &swtbl[NR_BLOCK]) 372 error(EWTACKO); 373 cur = swp++; 374 cur->nr = 0; 375 cur->min = TINT_MAX; 376 cur->max = TINT_MIN; 377 cur->defnode = NULL; 378 cur->cases = NULL; 379 eval(strtok(NULL, "\t\n")); 380 np->left = pop(); 381 382 push(cur->bswtch = np); 383 } 384 385 static int 386 cmpcase(const void *p1, const void *p2) 387 { 388 TINT d; 389 Node *np1, *np2; 390 391 np1 = *(Node **) p1; 392 np2 = *(Node **) p2; 393 d = np1->left->u.i - np2->left->u.i; 394 if (d < 0) 395 return -1; 396 if (d > 0) 397 return 1; 398 return 0; 399 } 400 401 static void 402 eswitch(char *token, union tokenop u) 403 { 404 Node *p; 405 Swtch *cur; 406 407 if (swp == swtbl) 408 error(EWTACKU); 409 jump(token, u); 410 411 cur = --swp; 412 cur->eswtch = pop(); 413 cur->bswtch->u.swtch = newswtch(cur); 414 qsort(cur->cases, cur->nr, sizeof(Node *), cmpcase); 415 } 416 417 static void 418 ocase(char *token, union tokenop u) 419 { 420 jump(token, u); 421 addcase(pop()); 422 } 423 424 static void 425 jump(char *token, union tokenop u) 426 { 427 Symbol *label; 428 Node *aux, *np = node(u.op); 429 430 eval(strtok(NULL, "\t\n")); 431 432 if (u.op == OBRANCH || u.op == OCASE) 433 np->left = pop(); 434 aux = pop(); 435 label = aux->u.sym; 436 label->refcnt++; 437 np->u.sym = label; 438 delnode(aux); 439 push(np); 440 } 441 442 static void 443 loop(char *token, union tokenop u) 444 { 445 push(node(u.op)); 446 } 447 448 static void 449 unary(char *token, union tokenop u) 450 { 451 Node *np = node(u.op); 452 453 np->type = *gettype(token+1); 454 np->left = pop(); 455 np->right = NULL; 456 push(np); 457 } 458 459 static void 460 call(char *token, union tokenop u) 461 { 462 Node *np, *par, *fun = node(u.op); 463 464 for (par = NULL;; par = np) { 465 np = pop(); 466 if (np->op != OPAR) 467 break; 468 np->right = par; 469 } 470 471 fun->type = *gettype(token+1); 472 fun->left = np; 473 fun->right = par; 474 push(fun); 475 } 476 477 static void 478 builtin(char *token, union tokenop u) 479 { 480 Node *np = node(u.op); 481 char *name; 482 unsigned subop, nchilds; 483 484 np->type = *gettype(token+1); 485 name = pop(); 486 487 if (!strcmp("__builtin_va_arg", name)) { 488 nchilds = 1; 489 subop = BVA_ARG; 490 } else if (!strcmp("__builtin_va_start", name)) { 491 nchilds = 2; 492 subop = BVA_START; 493 } else if (!strcmp("__builtin_va_end", name)) { 494 nchilds = 1; 495 subop = BVA_END; 496 } else if (!strcmp("__builtin_va_copy", name)) { 497 nchilds = 2; 498 subop = BVA_COPY; 499 } else { 500 error(EBBUILT); 501 } 502 503 np->u.subop = subop; 504 np->right = (nchilds == 2) ? pop() : NULL; 505 np->left = (nchilds != 0) ? pop() : NULL; 506 507 free(name); 508 push(np); 509 } 510 511 static void 512 binary(char *token, union tokenop u) 513 { 514 Node *np = node(u.op); 515 516 np->type = *gettype(token+1); 517 np->right = pop(); 518 np->left = pop(); 519 push(np); 520 } 521 522 static void 523 binit(char *token, union tokenop u) 524 { 525 ininit = 1; 526 } 527 528 static void 529 einit(char *token, union tokenop u) 530 { 531 ininit = 0; 532 endinit(); 533 } 534 535 static void 536 endpars(void) 537 { 538 if (!inpars) 539 error(ESYNTAX); 540 inpars = 0; 541 } 542 543 static void 544 aggregate(void) 545 { 546 Node *align, *size; 547 char *name; 548 Type *tp; 549 Symbol *sym; 550 551 align = pop(); 552 size = pop(); 553 name = pop(); 554 tp = pop(); 555 556 tp->size = size->u.i; 557 tp->align = align->u.i; 558 tp->flags = AGGRF; 559 /* 560 * type is the first field of Symbol so we can obtain the 561 * address of the symbol from the address of the type. 562 * We have to do this because composed returns the pointer 563 * to the type, but in this function we also need the 564 * symbol to store the name. 565 */ 566 sym = (Symbol *) tp; 567 sym->name = name; 568 deftype(tp); 569 570 delnode(align); 571 delnode(size); 572 } 573 574 static void 575 array(void) 576 { 577 Type *tp, *base; 578 Node *size; 579 580 size = pop(); 581 base = pop(); 582 tp = pop(); 583 tp->flags = ARRF; 584 585 if (size->u.i > LONG_MAX/base->size) 586 error(EOVERFL); 587 588 tp->size = size->u.i * base->size; 589 tp->align = base->align; 590 591 delnode(size); 592 } 593 594 static void 595 decl(Symbol *sym) 596 { 597 Type *tp = &sym->type; 598 599 if (tp->flags & FUNF) { 600 lastfun = sym; 601 } else { 602 switch (sym->kind) { 603 case SEXTRN: 604 case SGLOB: 605 case SPRIV: 606 case SLOCAL: 607 defglobal(sym); 608 break; 609 case SAUTO: 610 case SREG: 611 if (!beginf) 612 error(ESYNTAX); 613 ((inpars) ? defpar : defvar)(sym); 614 break; 615 default: 616 abort(); 617 } 618 } 619 } 620 621 static void 622 vardecl(void) 623 { 624 Type *tp, *rp; 625 Node *np; 626 Symbol *sym; 627 char *name; 628 629 name = pop(); 630 tp = pop(); 631 if (tp->flags & FUNF) 632 rp = pop(); 633 np = pop(); 634 635 sym = np->u.sym; 636 /* 637 * We have to free sym->name because in tentative declarations 638 * we can have multiple declarations of the same symbol, and in 639 * this case our parser will allocate twice the memory 640 */ 641 free(sym->name); 642 sym->name = name; 643 sym->type = *tp; 644 if (tp->flags & FUNF) 645 sym->rtype = *rp; 646 sym->kind = sclass; 647 648 if (ininit) 649 sym->type.flags |= INITF; 650 decl(sym); 651 delnode(np); 652 } 653 654 static void 655 flddecl(void) 656 { 657 Node *off, *np; 658 char *name; 659 Type *tp; 660 Symbol *sym; 661 662 off = pop(); 663 name = pop(); 664 tp = pop(); 665 np = pop(); 666 667 sym = np->u.sym; 668 sym->u.off = off->u.i; 669 sym->name = name; 670 sym->type = *tp; 671 672 delnode(np); 673 delnode(off); 674 } 675 676 static void 677 labeldcl(void) 678 { 679 Node *np; 680 Symbol *sym; 681 682 np = pop(); 683 np->op = ONOP; 684 sym = np->u.sym; 685 sym->kind = SLABEL; 686 labelstmt(np, sym); 687 addstmt(np); 688 } 689 690 static void 691 stmt(void) 692 { 693 Node *np; 694 695 if (empty()) 696 return; 697 np = pop(); 698 if (ininit) { 699 data(np); 700 deltree(np); 701 return; 702 } 703 addstmt(np); 704 } 705 706 static void 707 beginfun(void) 708 { 709 newfun(lastfun, node(OBFUN)); 710 beginf = inpars = 1; 711 pushctx(); 712 } 713 714 static void 715 endfun(void) 716 { 717 endf = 1; 718 laststmt = addstmt(node(OEFUN)); 719 } 720 721 void 722 parse(void) 723 { 724 /* clean from previous function */ 725 cleanswtch(); 726 cleancfg(); 727 cleannodes(); 728 popctx(); 729 730 inpars = ininit = beginf = endf = 0; 731 732 while (!endf && nextline()) 733 ; 734 735 if (ferror(stdin)) 736 error(EFERROR, strerror(errno)); 737 738 if (beginf && !endf) 739 error(EBAFFUN); 740 741 PRTREE("after parsing"); 742 }