parser.c (14254B)
1 #include <errno.h> 2 #include <limits.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 7 #include <scc/cstd.h> 8 #include <scc/scc.h> 9 10 #include "cc2.h" 11 12 #define STACKSIZ 50 13 14 Type funetype = { 15 .flags = FUNF | ELLIPS 16 }; 17 18 Type funtype = { 19 .flags = FUNF 20 }; 21 22 union tokenop { 23 void *arg; 24 unsigned op; 25 }; 26 27 struct swtch { 28 int nr; 29 Node *first; 30 Node *last; 31 }; 32 33 static struct swtch swtbl[NR_BLOCK], *swp = swtbl; 34 static Symbol *lastfun; 35 36 typedef void parsefun(char *, union tokenop); 37 static parsefun type, symbol, getname, unary, binary, ternary, call, 38 constant, composed, binit, einit, 39 jump, oreturn, loop, assign, 40 ocase, bswitch, eswitch, builtin; 41 42 typedef void evalfun(void); 43 static evalfun vardecl, beginfun, endfun, endpars, stmt, 44 array, aggregate, flddecl, labeldcl; 45 46 static struct decoc { 47 void (*eval)(void); 48 void (*parse)(char *token, union tokenop); 49 union tokenop u; 50 } optbl[] = { /* eval parse args */ 51 ['A'] = { vardecl, symbol, .u.op = SAUTO<<8 | OAUTO}, 52 ['R'] = { vardecl, symbol, .u.op = SREG<<8 | OREG}, 53 ['G'] = { vardecl, symbol, .u.op = SGLOB<<8 | OMEM}, 54 ['X'] = { vardecl, symbol, .u.op = SEXTRN<<8 | OMEM}, 55 ['Y'] = { vardecl, symbol, .u.op = SPRIV<<8 | OMEM}, 56 ['T'] = { vardecl, symbol, .u.op = SLOCAL<<8 | OMEM}, 57 ['M'] = { flddecl, symbol, .u.op = SMEMB<<8 | OMEM}, 58 ['L'] = { labeldcl, symbol, .u.op = SLABEL<<8 | OLABEL}, 59 60 ['C'] = { NULL, type, .u.arg = &int8type}, 61 ['I'] = { NULL, type, .u.arg = &int16type}, 62 ['W'] = { NULL, type, .u.arg = &int32type}, 63 ['Q'] = { NULL, type, .u.arg = &int64type}, 64 ['K'] = { NULL, type, .u.arg = &uint8type}, 65 ['N'] = { NULL, type, .u.arg = &uint16type}, 66 ['Z'] = { NULL, type, .u.arg = &uint32type}, 67 ['O'] = { NULL, type, .u.arg = &uint64type}, 68 ['J'] = { NULL, type, .u.arg = &float32type}, 69 ['D'] = { NULL, type, .u.arg = &float64type}, 70 ['H'] = { NULL, type, .u.arg = &float80type}, 71 ['0'] = { NULL, type, .u.arg = &voidtype}, 72 ['B'] = { NULL, type, .u.arg = &booltype}, 73 ['P'] = { NULL, type, .u.arg = &ptrtype}, 74 ['E'] = { NULL, type, .u.arg = &funetype}, 75 ['1'] = { NULL, type, .u.arg = &arg_type}, 76 77 ['F'] = { NULL, type, .u.arg = &funtype}, 78 ['V'] = { array,composed, 0}, 79 ['U'] = {aggregate,composed, 0}, 80 ['S'] = {aggregate,composed, 0}, 81 82 ['"'] = { NULL, getname, 0}, 83 ['{'] = { beginfun, NULL, 0}, 84 ['}'] = { endfun, NULL, 0}, 85 ['('] = { NULL, binit, 0}, 86 [')'] = { NULL, einit, 0}, 87 ['\\'] = { endpars, NULL, 0}, 88 ['\t'] = { stmt, NULL, 0}, 89 90 ['~'] = { NULL, unary, .u.op = OCPL}, 91 ['_'] = { NULL, unary, .u.op = OSNEG}, 92 ['\''] = { NULL, unary, .u.op = OADDR}, 93 ['@'] = { NULL, unary, .u.op = OPTR}, 94 ['g'] = { NULL, unary, .u.op = OCAST}, 95 ['p'] = { NULL, unary, .u.op = OPAR}, 96 ['n'] = { NULL, unary, .u.op = ONEG}, 97 98 ['a'] = { NULL, binary, .u.op = OAND}, 99 ['o'] = { NULL, binary, .u.op = OOR}, 100 ['.'] = { NULL, binary, .u.op = OFIELD}, 101 ['+'] = { NULL, binary, .u.op = OADD}, 102 ['-'] = { NULL, binary, .u.op = OSUB}, 103 ['*'] = { NULL, binary, .u.op = OMUL}, 104 ['%'] = { NULL, binary, .u.op = OMOD}, 105 ['/'] = { NULL, binary, .u.op = ODIV}, 106 ['l'] = { NULL, binary, .u.op = OSHL}, 107 ['r'] = { NULL, binary, .u.op = OSHR}, 108 ['<'] = { NULL, binary, .u.op = OLT}, 109 ['>'] = { NULL, binary, .u.op = OGT}, 110 ['['] = { NULL, binary, .u.op = OLE}, 111 [']'] = { NULL, binary, .u.op = OGE}, 112 ['='] = { NULL, binary, .u.op = OEQ}, 113 ['!'] = { NULL, binary, .u.op = ONE}, 114 ['&'] = { NULL, binary, .u.op = OBAND}, 115 ['|'] = { NULL, binary, .u.op = OBOR}, 116 ['^'] = { NULL, binary, .u.op = OBXOR}, 117 [','] = { NULL, binary, .u.op = OCOMMA}, 118 ['m'] = { NULL, builtin,.u.op = OBUILTIN}, 119 120 [':'] = { NULL, assign, .u.op = OASSIG}, 121 ['?'] = { NULL, ternary, .u.op = OASK}, 122 ['c'] = { NULL, call, .u.op = OCALL}, 123 ['z'] = { NULL, call, .u.op = OCALLE}, 124 125 ['#'] = { NULL,constant, .u.op = OCONST}, 126 127 ['j'] = { NULL, jump, .u.op = OJMP}, 128 ['y'] = { NULL, jump, .u.op = OBRANCH}, 129 ['h'] = { NULL, oreturn, .u.op = ORET}, 130 ['i'] = { NULL, NULL, .u.op = OINC}, 131 ['d'] = { NULL, NULL, .u.op = ODEC}, 132 133 ['b'] = { NULL, loop, .u.op = OBLOOP}, 134 ['e'] = { NULL, loop, .u.op = OELOOP}, 135 136 ['v'] = { NULL, ocase, .u.op = OCASE}, 137 ['f'] = { NULL, ocase, .u.op = ODEFAULT}, 138 ['t'] = { NULL, eswitch, .u.op = OESWITCH}, 139 ['s'] = { NULL, bswitch, .u.op = OBSWITCH}, 140 }; 141 142 static int sclass, inpars, ininit, endf, lineno; 143 static void *stack[STACKSIZ], **sp = stack; 144 145 static Node * 146 push(void *elem) 147 { 148 if (sp == &stack[STACKSIZ]) 149 error(ESTACKO); 150 return *sp++ = elem; 151 } 152 153 static void * 154 pop(void) 155 { 156 if (sp == stack) 157 error(ESTACKU); 158 return *--sp; 159 } 160 161 static int 162 empty(void) 163 { 164 return sp == stack; 165 } 166 167 static void 168 type(char *token, union tokenop u) 169 { 170 push(u.arg); 171 } 172 173 static void 174 composed(char *token, union tokenop u) 175 { 176 Symbol *sym; 177 178 sym = getsym(atoi(token+1)); 179 sym->type.id = sym->id; 180 push(&sym->type); 181 } 182 183 static void 184 getname(char *t, union tokenop u) 185 { 186 push((*++t) ? xstrdup(t) : NULL); 187 } 188 189 static void 190 symbol(char *token, union tokenop u) 191 { 192 Node *np = node(u.op & 0xFF); 193 Symbol *sym = getsym(atoi(token+1)); 194 195 sclass = u.op >> 8; 196 np->u.sym = sym; 197 np->type = sym->type; 198 push(np); 199 } 200 201 static Type * 202 gettype(char *token) 203 { 204 struct decoc *dp; 205 206 dp = &optbl[*token]; 207 if (!dp->parse) 208 error(ESYNTAX); 209 (*dp->parse)(token, dp->u); 210 return pop(); 211 } 212 213 static void 214 constant(char *token, union tokenop u) 215 { 216 static char letters[] = "0123456789ABCDEF"; 217 Node *np; 218 TUINT v; 219 unsigned c; 220 221 ++token; 222 if (*token == '"') { 223 ++token; 224 np = node(OSTRING); 225 np->type.flags = STRF; 226 np->type.size = strlen(token); 227 np->type.align = int8type.align; 228 np->u.s = xstrdup(token); 229 } else { 230 np = node(OCONST); 231 np->type = *gettype(token++); 232 for (v = 0; c = *token++; v += c) { 233 v <<= 4; 234 c = strchr(letters, c) - letters; 235 } 236 np->u.i = v; 237 } 238 push(np); 239 } 240 241 static void 242 assign(char *token, union tokenop u) 243 { 244 int subop; 245 Node *np = node(u.op); 246 247 switch (subop = *++token) { 248 case '+': 249 case '-': 250 case '*': 251 case '%': 252 case '/': 253 case 'l': 254 case 'r': 255 case '&': 256 case '|': 257 case '^': 258 case 'i': 259 case 'd': 260 ++token; 261 subop = optbl[subop].u.op; 262 break; 263 default: 264 subop = 0; 265 break; 266 } 267 268 np->u.subop = subop; 269 np->type = *gettype(token); 270 np->right = pop(); 271 np->left = pop(); 272 push(np); 273 } 274 275 static void 276 ternary(char *token, union tokenop u) 277 { 278 Node *ask = node(OASK), *colon = node(OCOLON); 279 Type *tp = gettype(token+1); 280 281 colon->right = pop(); 282 colon->left = pop(); 283 284 ask->type = *tp; 285 ask->left = pop(); 286 ask->right = colon; 287 push(ask); 288 } 289 290 static void 291 eval(char *tok) 292 { 293 struct decoc *dp; 294 295 do { 296 dp = &optbl[*tok]; 297 if (!dp->parse) 298 break; 299 (*dp->parse)(tok, dp->u); 300 } while (tok = strtok(NULL, "\t\n")); 301 } 302 303 static int 304 nextline(void) 305 { 306 static char line[LINESIZ]; 307 size_t len; 308 int c; 309 void (*fun)(void); 310 311 repeat: 312 ++lineno; 313 if (!fgets(line, sizeof(line), stdin)) 314 return 0; 315 if ((len = strlen(line)) == 0 || line[0] == '\n') 316 goto repeat; 317 if (line[len-1] != '\n') 318 error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE); 319 line[len-1] = '\0'; 320 321 c = *line; 322 eval(strtok(line, "\t\n")); 323 if ((fun = *optbl[c].eval) != NULL) 324 (*fun)(); 325 if (sp != stack) 326 error(ESTACKA); 327 return 1; 328 } 329 330 static void 331 oreturn(char *token, union tokenop u) 332 { 333 Node *np = node(u.op); 334 335 if (token = strtok(NULL, "\t\n")) 336 eval(token); 337 if (!empty()) 338 np->left = pop(); 339 push(np); 340 } 341 342 /* 343 * Move np (which is a OCASE/ODEFAULT/OESWITCH) to be contigous with 344 * the last switch table. It is a bit ugly to touch directly curstmt 345 * here, but moving this function to node.c is worse, because we are 346 * putting knowledge of how the text is parsed into the node 347 * represtation module. 348 */ 349 static void 350 waft(Node *np) 351 { 352 Node *lastcase, *next; 353 struct swtch *cur; 354 extern Node *curstmt; 355 356 if (swp == swtbl) 357 error(EWTACKU); 358 359 cur = swp - 1; 360 lastcase = cur->last; 361 next = lastcase->next; 362 363 np->next = next; 364 np->prev = lastcase; 365 366 if (next) 367 next->prev = np; 368 lastcase->next = np; 369 370 if (curstmt == cur->last) 371 curstmt = np; 372 cur->last = np; 373 cur->nr++; 374 } 375 376 static void 377 bswitch(char *token, union tokenop u) 378 { 379 struct swtch *cur; 380 Node *np = node(u.op); 381 382 if (swp == &swtbl[NR_BLOCK]) 383 error(EWTACKO); 384 cur = swp++; 385 cur->nr = 0; 386 387 eval(strtok(NULL, "\t\n")); 388 np->left = pop(); 389 390 push(cur->first = cur->last = np); 391 } 392 393 static void 394 eswitch(char *token, union tokenop u) 395 { 396 struct swtch *cur; 397 398 if (swp == swtbl) 399 error(EWTACKU); 400 jump(token, u); 401 waft(pop()); 402 cur = --swp; 403 cur->first->u.i = cur->nr; 404 } 405 406 static void 407 ocase(char *token, union tokenop u) 408 { 409 jump(token, u); 410 waft(pop()); 411 } 412 413 static void 414 jump(char *token, union tokenop u) 415 { 416 Node *aux, *np = node(u.op); 417 418 eval(strtok(NULL, "\t\n")); 419 420 if (u.op == OBRANCH || u.op == OCASE) 421 np->left = pop(); 422 aux = pop(); 423 np->u.sym = aux->u.sym; 424 delnode(aux); 425 push(np); 426 } 427 428 static void 429 loop(char *token, union tokenop u) 430 { 431 push(node(u.op)); 432 } 433 434 static void 435 unary(char *token, union tokenop u) 436 { 437 Node *np = node(u.op); 438 439 np->type = *gettype(token+1); 440 np->left = pop(); 441 np->right = NULL; 442 push(np); 443 } 444 445 static void 446 call(char *token, union tokenop u) 447 { 448 Node *np, *par, *fun = node(u.op); 449 450 for (par = NULL;; par = np) { 451 np = pop(); 452 if (np->op != OPAR) 453 break; 454 np->right = par; 455 } 456 457 fun->type = *gettype(token+1); 458 fun->left = np; 459 fun->right = par; 460 push(fun); 461 } 462 463 static void 464 builtin(char *token, union tokenop u) 465 { 466 Node *np = node(u.op); 467 char *name; 468 unsigned subop, nchilds; 469 470 np->type = *gettype(token+1); 471 name = pop(); 472 473 if (!strcmp("__builtin_va_arg", name)) { 474 nchilds = 1; 475 subop = BVA_ARG; 476 } else if (!strcmp("__builtin_va_start", name)) { 477 nchilds = 2; 478 subop = BVA_START; 479 } else if (!strcmp("__builtin_va_end", name)) { 480 nchilds = 1; 481 subop = BVA_END; 482 } else if (!strcmp("__builtin_va_copy", name)) { 483 nchilds = 2; 484 subop = BVA_COPY; 485 } else { 486 error(EBBUILT); 487 } 488 489 np->u.subop = subop; 490 np->right = (nchilds == 2) ? pop() : NULL; 491 np->left = (nchilds != 0) ? pop() : NULL; 492 493 free(name); 494 push(np); 495 } 496 497 static void 498 binary(char *token, union tokenop u) 499 { 500 Node *np = node(u.op); 501 502 np->type = *gettype(token+1); 503 np->right = pop(); 504 np->left = pop(); 505 push(np); 506 } 507 508 static void 509 binit(char *token, union tokenop u) 510 { 511 ininit = 1; 512 } 513 514 static void 515 einit(char *token, union tokenop u) 516 { 517 ininit = 0; 518 endinit(); 519 } 520 521 static void 522 endpars(void) 523 { 524 if (!curfun || !inpars) 525 error(ESYNTAX); 526 inpars = 0; 527 } 528 529 static void 530 aggregate(void) 531 { 532 Node *align, *size; 533 char *name; 534 Type *tp; 535 Symbol *sym; 536 537 align = pop(); 538 size = pop(); 539 name = pop(); 540 tp = pop(); 541 542 tp->size = size->u.i; 543 tp->align = align->u.i; 544 tp->flags = AGGRF; 545 /* 546 * type is the first field of Symbol so we can obtain the 547 * address of the symbol from the address of the type. 548 * We have to do this because composed returns the pointer 549 * to the type, but in this function we also need the 550 * symbol to store the name. 551 */ 552 sym = (Symbol *) tp; 553 sym->name = name; 554 deftype(tp); 555 556 delnode(align); 557 delnode(size); 558 } 559 560 static void 561 array(void) 562 { 563 Type *tp, *base; 564 Node *size; 565 566 size = pop(); 567 base = pop(); 568 tp = pop(); 569 tp->flags = ARRF; 570 571 if (size->u.i > LONG_MAX/base->size) 572 error(EOVERFL); 573 574 tp->size = size->u.i * base->size; 575 tp->align = base->align; 576 577 delnode(size); 578 } 579 580 static void 581 decl(Symbol *sym) 582 { 583 Type *tp = &sym->type; 584 585 if (tp->flags & FUNF) { 586 lastfun = sym; 587 } else { 588 switch (sym->kind) { 589 case SEXTRN: 590 case SGLOB: 591 case SPRIV: 592 case SLOCAL: 593 defglobal(sym); 594 break; 595 case SAUTO: 596 case SREG: 597 if (!curfun) 598 error(ESYNTAX); 599 ((inpars) ? defpar : defvar)(sym); 600 break; 601 default: 602 abort(); 603 } 604 } 605 } 606 607 static void 608 vardecl(void) 609 { 610 Type *tp, *rp; 611 Node *np; 612 Symbol *sym; 613 char *name; 614 615 name = pop(); 616 tp = pop(); 617 if (tp->flags & FUNF) 618 rp = pop(); 619 np = pop(); 620 621 sym = np->u.sym; 622 /* 623 * We have to free sym->name because in tentative declarations 624 * we can have multiple declarations of the same symbol, and in 625 * this case our parser will allocate twice the memory 626 */ 627 free(sym->name); 628 sym->name = name; 629 sym->type = *tp; 630 if (tp->flags & FUNF) 631 sym->rtype = *rp; 632 sym->kind = sclass; 633 634 if (ininit) 635 sym->type.flags |= INITF; 636 decl(sym); 637 delnode(np); 638 } 639 640 static void 641 flddecl(void) 642 { 643 Node *off, *np; 644 char *name; 645 Type *tp; 646 Symbol *sym; 647 648 off = pop(); 649 name = pop(); 650 tp = pop(); 651 np = pop(); 652 653 sym = np->u.sym; 654 sym->u.off = off->u.i; 655 sym->name = name; 656 sym->type = *tp; 657 658 delnode(np); 659 delnode(off); 660 } 661 662 static void 663 labeldcl(void) 664 { 665 Node *np; 666 Symbol *sym; 667 668 np = pop(); 669 np->op = ONOP; 670 sym = np->u.sym; 671 sym->kind = SLABEL; 672 sym->u.stmt = np; 673 np->label = sym; 674 addstmt(np, SETCUR); 675 } 676 677 static void 678 stmt(void) 679 { 680 Node *np; 681 682 if (empty()) 683 return; 684 np = pop(); 685 if (ininit) { 686 data(np); 687 deltree(np); 688 return; 689 } 690 addstmt(np, SETCUR); 691 } 692 693 static void 694 beginfun(void) 695 { 696 curfun = lastfun; 697 inpars = 1; 698 pushctx(); 699 addstmt(node(OBFUN), SETCUR); 700 } 701 702 static void 703 endfun(void) 704 { 705 endf = 1; 706 addstmt(node(OEFUN), SETCUR); 707 } 708 709 void 710 parse(void) 711 { 712 cleannodes(); /* remove code of previous function */ 713 popctx(); /* remove context of previous function */ 714 curfun = NULL; 715 endf = 0; 716 717 while (!endf && nextline()) 718 ; 719 if (ferror(stdin)) 720 error(EFERROR, strerror(errno)); 721 }