yacc.c (23342B)
1 /*% clang -g -Wall -Wextra % -o # 2 * miniyacc - LALR(1) grammars for C 3 * See LICENSE for copyright and license details. 4 */ 5 #include <assert.h> 6 #include <ctype.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 11 typedef int Sym; 12 typedef struct Rule Rule; 13 typedef struct TSet TSet; 14 typedef struct Info Info; 15 typedef struct Term Term; 16 typedef struct Item Item; 17 typedef struct Row Row; 18 19 #define S ((Sym) -1) 20 #define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */ 21 #define GetBit(s,n) (s[n/32] & (1<<(n%32))) 22 #define SetBit(s,n) (s[n/32] |= 1<<(n%32)) 23 24 enum { 25 IdntSz = 64, 26 MaxRhs = 32, 27 MaxTk = 500, 28 MaxNt = 500, 29 MaxRl = 800, 30 MaxTm = 1000, 31 32 TSetSz = (MaxTk+31)/32, 33 Sym0 = MaxTk 34 }; 35 36 struct Rule { 37 Sym lhs; 38 Sym rhs[MaxRhs]; 39 char *act; 40 int actln; 41 int prec; 42 }; 43 44 struct TSet { 45 unsigned t[TSetSz]; 46 }; 47 48 struct Info { 49 int nul; 50 TSet fst; 51 int prec; 52 enum { 53 ANone, 54 ALeft, 55 ARight, 56 ANonassoc 57 } assoc; 58 char name[IdntSz]; 59 char type[IdntSz]; 60 }; 61 62 struct Term { 63 Rule *rule; 64 int dot; 65 TSet lk; 66 }; 67 68 struct Item { 69 int id; 70 int nt; 71 Term ts[MaxTm]; 72 Item **gtbl; 73 int dirty; 74 }; 75 76 struct Row { 77 int def; 78 int ndef; 79 int *t; 80 }; 81 82 char srs[] = "shift/reduce conflict state %d token %s\n"; 83 char rrs[] = "reduce/reduce conflict state %d token %s\n"; 84 85 Item i0; /* temporary item */ 86 87 int nrl, nsy, nst, ntk; 88 Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */ 89 Info is[MaxTk+MaxNt]; /* symbol information */ 90 Item **st; /* LALR(1) states (ordered, icmp) */ 91 Row *as; /* action table [state][tok] */ 92 Row *gs; /* goto table [sym][state] */ 93 Sym sstart;/* start symbol */ 94 Item *ini; /* initial state */ 95 int doty; /* type-checking enabled */ 96 97 int srconf, rrconf; 98 int actsz; 99 int *act; 100 int *chk; 101 int *adsp; 102 int *gdsp; 103 104 int lineno = 1; 105 char *srca; 106 FILE *fin; 107 FILE *fout; 108 FILE *fgrm; 109 FILE *fhdr; 110 111 void 112 die(char *s) 113 { 114 fprintf(stderr, "%s (on line %d)\n", s, lineno); 115 exit(1); 116 } 117 118 void * 119 yalloc(size_t n, size_t o) 120 { 121 void *p; 122 123 p = calloc(n, o); 124 if (!p) 125 die("out of memory"); 126 return p; 127 } 128 129 int 130 rcmp(const void *a, const void *b) 131 { 132 return ((Rule *)a)->lhs - ((Rule *)b)->lhs; 133 } 134 135 Rule * 136 rfind(Sym lhs) 137 { 138 Rule *r; 139 Rule k; 140 141 k.lhs = lhs; 142 r = bsearch(&k, rs, nrl, sizeof *r, rcmp); 143 if (r != 0) 144 while (r > rs && r[-1].lhs == lhs) 145 r--; 146 return r; 147 } 148 149 int 150 slen(Sym *l) 151 { 152 int n; 153 154 for (n=0; *l!=S; n++, l++); 155 return n; 156 } 157 158 void 159 tszero(TSet *ts) 160 { 161 memset(ts, 0, sizeof *ts); 162 } 163 164 int 165 tsunion(TSet *tsa, TSet *tsb) 166 { 167 int n; 168 unsigned *a, *b, c, t; 169 170 c = 0; 171 a = tsa->t; 172 b = tsb->t; 173 n = (31+ntk)/32; 174 while (n-- > 0) { 175 t = *a; 176 *a |= *b++; 177 c |= t ^ *a++; 178 } 179 return !!c; 180 } 181 182 void 183 first(TSet *ts, Sym *stnc, TSet *last) 184 { 185 Sym f; 186 187 f = stnc[0]; 188 if (f == S) { 189 if (last) 190 tsunion(ts, last); 191 return; 192 } 193 if (f < ntk) { 194 SetBit(ts->t, f); 195 return; 196 } 197 if (is[f].nul) 198 first(ts, stnc+1, last); 199 tsunion(ts, &is[f].fst); 200 } 201 202 void 203 ginit() 204 { 205 int chg; 206 Rule *r; 207 Info *i; 208 Sym *s; 209 TSet ts; 210 211 do { 212 chg = 0; 213 for (r=rs; r-rs<nrl; r++) { 214 i = &is[r->lhs]; 215 for (s=r->rhs; *s!=S; s++) 216 if (!is[*s].nul) 217 goto nonul; 218 chg |= i->nul == 0; 219 i->nul = 1; 220 nonul: 221 tszero(&ts); 222 first(&ts, r->rhs, 0); 223 chg |= tsunion(&i->fst, &ts); 224 } 225 } while (chg); 226 } 227 228 int 229 tcmp(Term *a, Term *b) 230 { 231 int c; 232 233 c = a->rule - b->rule; 234 if (c==0) 235 c = a->dot - b->dot; 236 return c; 237 } 238 239 int 240 tcmpv(const void *a, const void *b) 241 { 242 return tcmp((Term *)a, (Term *)b); 243 } 244 245 void 246 iclose(Item *i) 247 { 248 int smap[MaxNt]; 249 Rule *r; 250 Term *t, t1; 251 Sym s, *rem; 252 int chg, n, m; 253 254 t1.dot = 0; 255 memset(smap, 0, sizeof smap); 256 for (n=0; n<i->nt; n++) { 257 t = &i->ts[n]; 258 s = t->rule->lhs-Sym0; 259 if (t->dot==0) 260 if (smap[s]==0) 261 smap[s] = n; 262 } 263 do { 264 chg = 0; 265 for (n=0; n<i->nt; n++) { 266 t = &i->ts[n]; 267 rem = &t->rule->rhs[t->dot]; 268 s = *rem++; 269 if (s < Sym0 || s == S) 270 continue; 271 r = rfind(s); 272 if (!r) 273 die("some non-terminals are not defined"); 274 tszero(&t1.lk); 275 first(&t1.lk, rem, &t->lk); 276 m = smap[s-Sym0]; 277 if (m) 278 for (; r-rs<nrl && r->lhs==s; r++, m++) 279 chg |= tsunion(&i->ts[m].lk, &t1.lk); 280 else { 281 m = i->nt; 282 smap[s-Sym0] = m; 283 for (; r-rs<nrl && r->lhs==s; r++, m++) { 284 if (m>=MaxTm) 285 die("too many terms in item"); 286 t1.rule = r; 287 i->ts[m] = t1; 288 } 289 i->nt = m; 290 chg = 1; 291 } 292 } 293 } while (chg); 294 } 295 296 void 297 igoto(Item *i, Sym s) 298 { 299 Term *t, *t1; 300 int n; 301 302 i0.nt = 0; 303 for (n=0, t=i->ts; n<i->nt; n++, t++) { 304 if (t->rule->rhs[t->dot] != s) 305 continue; 306 t1 = &i0.ts[i0.nt++]; 307 *t1 = *t; 308 t1->dot++; 309 } 310 qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv); 311 } 312 313 int 314 icmp(Item *a, Item *b) 315 { 316 Term *ta, *tb, *ma, *mb; 317 int c; 318 319 ta = a->ts; 320 tb = b->ts; 321 ma = ta+a->nt; 322 mb = tb+b->nt; 323 for (;;) { 324 if (ta==ma || ta->dot==0) 325 return -(tb<mb && tb->dot); 326 if (tb==mb || tb->dot==0) 327 return +(ta<ma && ta->dot); 328 if ((c=tcmp(ta++, tb++))) 329 return c; 330 } 331 } 332 333 int 334 stadd(Item **pi) 335 { 336 Item *i, *i1; 337 int lo, hi, mid, n, chg; 338 339 /* http://www.iq0.com/duffgram/bsearch.c */ 340 i = *pi; 341 lo = 0; 342 hi = nst - 1; 343 if (hi<0 || icmp(i, st[hi])>0) 344 hi++; 345 else if (icmp(i, st[lo])<=0) 346 hi = lo; 347 else 348 while (hi-lo!=1) { 349 mid = (lo+hi)/2; 350 if (icmp(st[mid], i)<0) 351 lo = mid; 352 else 353 hi = mid; 354 } 355 if (hi<nst && icmp(st[hi], i)==0) { 356 chg = 0; 357 i1 = st[hi]; 358 for (n=0; n<i->nt; n++) 359 chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk); 360 i1->dirty |= chg; 361 *pi = i1; 362 return chg; 363 } else { 364 st = realloc(st, ++nst * sizeof st[0]); 365 if (!st) 366 die("out of memory"); 367 memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]); 368 i->gtbl = yalloc(nsy, sizeof i->gtbl[0]); 369 i->dirty = 1; 370 i1 = yalloc(1, sizeof *i1); 371 *i1 = *i; 372 *pi = st[hi] = i1; 373 return 1; 374 } 375 } 376 377 void 378 stgen() 379 { 380 Sym s; 381 Rule *r; 382 Item *i, *i1; 383 Term tini; 384 int n, chg; 385 386 ini = &i0; 387 r = rfind(Sym0); 388 tini.rule = r; 389 tini.dot = 0; 390 tszero(&tini.lk); 391 SetBit(tini.lk.t, 0); 392 i0.nt = 0; 393 i0.ts[i0.nt++] = tini; 394 stadd(&ini); 395 do { 396 chg = 0; 397 for (n=0; n<nst; n++) { 398 i = st[n]; 399 if (!i->dirty) 400 continue; 401 i->dirty = 0; 402 iclose(i); 403 for (s=0; s<nsy; s++) { 404 igoto(i, s); 405 i1 = &i0; 406 if (!i1->nt) { 407 i->gtbl[s] = 0; 408 continue; 409 } 410 chg |= stadd(&i1); 411 i->gtbl[s] = i1; 412 } 413 } 414 } while (chg); 415 } 416 417 int 418 resolve(Rule *r, Sym s, int st) 419 { 420 if (!r->prec || !is[s].prec) { 421 conflict: 422 if (fgrm) 423 fprintf(fgrm, srs, st, is[s].name); 424 srconf++; 425 return ARight; 426 } 427 if (r->prec==is[s].prec) { 428 if (is[s].assoc == ANone) 429 goto conflict; 430 return is[s].assoc; 431 } else 432 if (r->prec<is[s].prec) 433 return ARight; 434 else 435 return ALeft; 436 } 437 438 void 439 tblset(int *tbl, Item *i, Term *t) 440 { 441 int act; 442 Sym s; 443 444 s = t->rule->rhs[t->dot]; 445 if (s!=S) { 446 /* shift */ 447 if (s>=ntk) 448 return; 449 assert(i->gtbl[s]); 450 act = ARight; 451 if (tbl[s] && tbl[s] != i->gtbl[s]->id) { 452 assert(tbl[s]<=0); 453 act = resolve(&rs[Red(tbl[s])], s, i->id-1); 454 } 455 switch (act) { 456 case ARight: 457 tbl[s] = i->gtbl[s]->id; 458 break; 459 case ANonassoc: 460 tbl[s] = -1; 461 break; 462 } 463 } else 464 /* reduce */ 465 for (s=0; s<ntk; s++) { 466 if (!GetBit(t->lk.t, s)) 467 continue; 468 /* default to shift if conflict occurs */ 469 if (!tbl[s]) 470 act = ALeft; 471 else if (tbl[s]<0) { 472 if (fgrm) 473 fprintf(fgrm, rrs, i->id-1, is[s].name); 474 rrconf++; 475 act = ARight; 476 } else 477 act = resolve(t->rule, s, i->id-1); 478 switch (act) { 479 case ALeft: 480 tbl[s] = Red(t->rule-rs); 481 break; 482 case ANonassoc: 483 tbl[s] = -1; 484 break; 485 } 486 } 487 } 488 489 void 490 setdef(Row *r, int w, int top) 491 { 492 int n, m, x, cnt, def, max; 493 494 max = 0; 495 def = -1; 496 r->ndef = 0; 497 for (n=0; n<w; n++) { 498 x = r->t[n]; 499 if (x==0) 500 r->ndef++; 501 if (x>=top || x==0) 502 continue; 503 cnt = 1; 504 for (m=n+1; m<w; m++) 505 if (r->t[m]==x) 506 cnt++; 507 if (cnt>max) { 508 def = x; 509 max = cnt; 510 } 511 } 512 r->def = def; 513 if (max!=0) 514 /* zero out the most frequent entry */ 515 for (n=0; n<w; n++) 516 if (r->t[n]==def) { 517 r->t[n] = 0; 518 r->ndef++; 519 } 520 } 521 522 void 523 tblgen() 524 { 525 Row *r; 526 Item *i; 527 int n, m; 528 529 for (n=0; n<nst; n++) 530 st[n]->id = n+1; 531 as = yalloc(nst, sizeof as[0]); 532 gs = yalloc(nsy-MaxTk, sizeof gs[0]); 533 /* fill action table */ 534 for (n=0; n<nst; n++) { 535 r = &as[n]; 536 r->t = yalloc(ntk, sizeof r->t[0]); 537 for (i=st[n], m=0; m<i->nt; m++) 538 tblset(r->t, i, &i->ts[m]); 539 setdef(r, ntk, -1); 540 r->def = Red(r->def); /* Red(-1) == -1 */ 541 } 542 /* fill goto table */ 543 for (n=MaxTk; n<nsy; n++) { 544 r = &gs[n-MaxTk]; 545 r->t = yalloc(nst, sizeof r->t[0]); 546 for (m=0; m<nst; m++) 547 if (st[m]->gtbl[n]) 548 r->t[m] = st[m]->gtbl[n]->id; 549 setdef(r, nst, nst+1); 550 } 551 } 552 553 int 554 prcmp(const void *a, const void *b) 555 { 556 return (*(Row **)a)->ndef - (*(Row **)b)->ndef; 557 } 558 559 void 560 actgen() 561 { 562 Row **o, *r; 563 int n, m, t, dsp, nnt; 564 565 actsz = 0; 566 o = yalloc(nst+nsy, sizeof o[0]); 567 act = yalloc(nst*nsy, sizeof act[0]); 568 chk = yalloc(nst*nsy, sizeof chk[0]); 569 adsp = yalloc(nst, sizeof adsp[0]); 570 for (n=0; n<nst*nsy; n++) 571 chk[n] = -1; 572 /* fill in actions */ 573 for (n=0; n<nst; n++) 574 o[n] = &as[n]; 575 qsort(o, nst, sizeof o[0], prcmp); 576 for (n=0; n<nst; n++) { 577 r = o[n]; 578 dsp = 0; 579 for (m=0; m<ntk && r->t[m]==0; m++) 580 dsp--; 581 retrya: 582 /* The invariant here is even 583 * trickier than it looks. 584 */ 585 for (t=0; t<ntk; t++) 586 if ((m=dsp+t)>=0 && chk[m]>=0) 587 if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t])) 588 || (!r->t[t] && chk[m]==t)) { 589 dsp++; 590 goto retrya; 591 } 592 adsp[r-as] = dsp; 593 for (t=0; t<ntk; t++) 594 if (r->t[t]) { 595 chk[dsp+t] = t; 596 act[dsp+t] = r->t[t]; 597 if (dsp+t>=actsz) 598 actsz = dsp+t+1; 599 } 600 } 601 /* fill in gotos */ 602 nnt = nsy-MaxTk; 603 gdsp = yalloc(nnt, sizeof gdsp[0]); 604 for (n=0; n<nnt; n++) 605 o[n] = &gs[n]; 606 qsort(o, nnt, sizeof o[0], prcmp); 607 for (n=0; n<nnt; n++) { 608 r = o[n]; 609 dsp = 0; 610 for (m=0; m<nst && r->t[m]==0; m++) 611 dsp--; 612 retryg: 613 for (t=m; t<nst; t++) 614 if (chk[dsp+t]>=0 && r->t[t]) { 615 dsp++; 616 goto retryg; 617 } 618 gdsp[r-gs] = dsp; 619 for (t=m; t<nst; t++) 620 if (r->t[t]) { 621 chk[dsp+t] = ntk+(r-gs); 622 act[dsp+t] = r->t[t]; 623 if (dsp+t>=actsz) 624 actsz = dsp+t+1; 625 } 626 } 627 free(o); 628 } 629 630 void 631 aout(char *name, int *t, int n) 632 { 633 int i; 634 635 fprintf(fout, "short %s[] = {", name); 636 for (i=0; i<n; i++) { 637 if (i % 10 == 0) 638 fprintf(fout, "\n"); 639 fprintf(fout, "%4d", t[i]); 640 if (i != n-1) 641 fprintf(fout, ","); 642 } 643 fprintf(fout, "\n};\n"); 644 } 645 646 void 647 tblout() 648 { 649 int *o, n, m; 650 651 fprintf(fout, "short yyini = %d;\n", ini->id-1); 652 fprintf(fout, "short yyntoks = %d;\n", ntk); 653 o = yalloc(nrl+nst+nsy, sizeof o[0]); 654 for (n=0; n<nrl; n++) 655 o[n] = slen(rs[n].rhs); 656 aout("yyr1", o, nrl); 657 for (n=0; n<nrl; n++) 658 o[n] = rs[n].lhs-MaxTk; 659 aout("yyr2", o, nrl); 660 for (n=0; n<nst; n++) 661 o[n] = as[n].def; 662 aout("yyadef", o, nst); 663 for (n=0; n<nsy-MaxTk; n++) { 664 o[n] = gs[n].def; 665 assert(o[n]>0 || o[n]==-1); 666 if (o[n]>0) 667 o[n]--; 668 } 669 aout("yygdef", o, nsy-MaxTk); 670 aout("yyadsp", adsp, nst); 671 aout("yygdsp", gdsp, nsy-MaxTk); 672 for (n=0; n<actsz; n++) 673 if (act[n]>=0) 674 act[n]--; 675 aout("yyact", act, actsz); 676 aout("yychk", chk, actsz); 677 for (n=0; n<128; n++) { 678 o[n] = 0; 679 for (m=0; m<ntk; m++) 680 if (is[m].name[0]=='\'') 681 if (is[m].name[1]==n) 682 assert(!o[n]), o[n] = m; 683 } 684 m = 128; 685 for (n=1; n<ntk; n++) { 686 if (is[n].name[0]=='\'') 687 continue; 688 fprintf(fout, "#define %s %d\n", is[n].name, m); 689 if (fhdr) 690 fprintf(fhdr, "#define %s %d\n", is[n].name, m); 691 o[m++] = n; 692 } 693 aout("yytrns", o, m); 694 if (fhdr) { 695 fputs("int yyparse(void);\n", fhdr); 696 fputs("#ifndef YYSTYPE\n", fhdr); 697 fputs("#define YYSTYPE int\n", fhdr); 698 fputs("#endif\n", fhdr); 699 fputs("extern YYSTYPE yylval;\n", fhdr); 700 fputs("#endif\n", fhdr); 701 } 702 free(o); 703 } 704 705 void 706 stdump() 707 { 708 Term *t; 709 Sym *s1; 710 int n, m, d, act; 711 Rule *r; 712 Row *ar; 713 714 if (!fgrm) 715 return; 716 for (r=rs; r-rs<nrl; r++) { 717 fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name); 718 for (s1=r->rhs; *s1!=S; s1++) 719 fprintf(fgrm, " %s", is[*s1].name); 720 } 721 fprintf(fgrm, "\n"); 722 for (m=0; m<nst; m++) { 723 fprintf(fgrm, "\nState %d:\n", m); 724 for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) { 725 r = t->rule; 726 d = t->dot; 727 if (d==0 && t!=st[m]->ts) 728 continue; 729 fprintf(fgrm, " %s ->", is[r->lhs].name); 730 for (s1=r->rhs; *s1!=S; s1++, d--) 731 fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name); 732 if (!d) 733 fprintf(fgrm, " ."); 734 fprintf(fgrm, "\n"); 735 } 736 fprintf(fgrm, "\n"); 737 ar = &as[m]; 738 for (n=0; n<ntk; n++) { 739 act = ar->t[n]; 740 if (!act) 741 continue; 742 if (act==-1) 743 fprintf(fgrm, " %s error (nonassoc)\n", is[n].name); 744 else if (act<0) 745 fprintf(fgrm, " %s reduce with rule %d\n", is[n].name, Red(act)); 746 else 747 fprintf(fgrm, " %s shift and go to %d\n", is[n].name, act-1); 748 } 749 if (ar->def != -1) 750 fprintf(fgrm, " * reduce with rule %d\n", ar->def); 751 } 752 } 753 754 enum { 755 TIdnt, 756 TTokchr, /* 'c' */ 757 TPP, /* %% */ 758 TLL, /* %{ */ 759 TLangle, /* < */ 760 TRangle, /* > */ 761 TSemi, /* ; */ 762 TBar, /* | */ 763 TColon, /* : */ 764 TLBrack, /* { */ 765 TUnion, 766 TType, 767 TToken, 768 TRight, 769 TLeft, 770 TNonassoc, 771 TPrec, 772 TStart, 773 TEof 774 }; 775 776 struct { 777 char *name; 778 int tok; 779 } words[] = { 780 { "%%", TPP }, 781 { "%union", TUnion }, 782 { "%type", TType }, 783 { "%token", TToken }, 784 { "%right", TRight }, 785 { "%left", TLeft }, 786 { "%nonassoc", TNonassoc }, 787 { "%prec", TPrec }, 788 { "%start", TStart }, 789 { 0, 0 } 790 }; 791 792 char idnt[IdntSz]; 793 794 int 795 istok(int c) 796 { 797 return isalnum(c) || c=='_' || c=='%'; 798 } 799 800 int 801 nexttk() 802 { 803 int n; 804 char c, *p; 805 806 while (isspace(c=fgetc(fin))) 807 if (c == '\n') 808 lineno++; 809 switch (c) { 810 case '<': 811 return TLangle; 812 case '>': 813 return TRangle; 814 case ';': 815 return TSemi; 816 case '|': 817 return TBar; 818 case ':': 819 return TColon; 820 case '{': 821 return TLBrack; 822 case EOF: 823 return TEof; 824 case '\'': 825 idnt[0] = '\''; 826 idnt[1] = fgetc(fin); 827 idnt[2] = '\''; 828 idnt[3] = 0; 829 if (fgetc(fin)!='\'') 830 die("syntax error, invalid char token"); 831 return TTokchr; 832 } 833 p = idnt; 834 while (istok(c)) { 835 *p++ = c; 836 if (p-idnt >= IdntSz-1) 837 die("identifier too long"); 838 c = fgetc(fin); 839 } 840 *p = 0; 841 if (strcmp(idnt, "%")==0) 842 if (c=='{') 843 return TLL; 844 ungetc(c, fin); 845 for (n=0; words[n].name; n++) 846 if (strcmp(idnt, words[n].name) == 0) 847 return words[n].tok; 848 return TIdnt; 849 } 850 851 char * 852 cpycode() 853 { 854 int c, nest, in, len, pos; 855 char *s; 856 857 len = 64; 858 s = yalloc(len+1, 1); 859 s[0] = '{'; 860 pos = 1; 861 nest = 1; 862 in = 0; 863 864 while (nest) { 865 c = fgetc(fin); 866 if (in) { 867 if (c == in) 868 if (s[pos-1] != '\\') 869 in = 0; 870 } else { 871 if (c == '"' || c == '\'') 872 in = c; 873 if (c == '{') 874 nest++; 875 if (c == '}') 876 nest--; 877 if (c == EOF) 878 die("syntax error, unclosed code block"); 879 if (c == '\n') 880 lineno++; 881 } 882 if (pos>=len) 883 if (!(s=realloc(s, len=2*len+1))) 884 die("out of memory"); 885 s[pos++] = c; 886 } 887 s[pos] = 0; 888 return s; 889 } 890 891 int 892 gettype(char *type) 893 { 894 int tk; 895 896 tk = nexttk(); 897 if (tk==TLangle) { 898 if (nexttk()!=TIdnt) 899 die("syntax error, ident expected after <"); 900 strcpy(type, idnt); 901 if (nexttk()!=TRangle) 902 die("syntax error, unclosed <"); 903 return nexttk(); 904 } else { 905 type[0] = 0; 906 return tk; 907 } 908 } 909 910 Sym 911 findsy(char *name, int add) 912 { 913 int n; 914 915 for (n=0; n<nsy; n++) { 916 if (n == ntk) { 917 if (name[0]=='\'') { 918 if (ntk>=MaxTk) 919 die("too many tokens"); 920 ntk++; 921 strcpy(is[n].name, name); 922 return n; 923 } 924 n = MaxTk; 925 } 926 if (strcmp(is[n].name, name)==0) 927 return n; 928 } 929 if (add) { 930 if (nsy>=MaxTk+MaxNt) 931 die("too many non-terminals"); 932 strcpy(is[nsy].name, name); 933 return nsy++; 934 } else 935 return nsy; 936 } 937 938 void 939 getdecls() 940 { 941 int tk, prec, p, a, c, c1, n; 942 Info *si; 943 char type[IdntSz], *s; 944 945 strcpy(is[0].name, "$"); 946 ntk = 1; 947 strcpy(is[Sym0].name, "@start"); 948 nsy = MaxTk+1; 949 sstart = S; 950 prec = 0; 951 tk = nexttk(); 952 for (;;) 953 switch (tk) { 954 case TStart: 955 tk = nexttk(); 956 if (tk!=TIdnt) 957 die("syntax error, ident expected after %start"); 958 sstart = findsy(idnt, 1); 959 if (sstart<ntk) 960 die("%start cannot specify a token"); 961 tk = nexttk(); 962 break; 963 case TUnion: 964 tk = nexttk(); 965 if (tk!=TLBrack) 966 die("syntax error, { expected after %union"); 967 fprintf(fout, "#line %d \"%s\"\n", lineno, srca); 968 s = cpycode(); 969 fprintf(fout, "typedef union %s yyunion;\n", s); 970 fprintf(fout, "#define YYSTYPE yyunion\n"); 971 if (fhdr) { 972 fprintf(fhdr, "typedef union %s yyunion;\n", s); 973 fprintf(fhdr, "#define YYSTYPE yyunion\n"); 974 } 975 free(s); 976 doty = 1; 977 tk = nexttk(); 978 break; 979 case TLeft: 980 p = ++prec; 981 a = ALeft; 982 goto addtoks; 983 case TRight: 984 p = ++prec; 985 a = ARight; 986 goto addtoks; 987 case TNonassoc: 988 p = ++prec; 989 a = ANonassoc; 990 goto addtoks; 991 case TToken: 992 p = 0; 993 a = ANone; 994 addtoks: 995 tk = gettype(type); 996 while (tk==TIdnt || tk==TTokchr) { 997 si = 0; 998 n = findsy(idnt, 0); 999 if (n>=MaxTk && n<nsy) 1000 die("non-terminal redeclared as token"); 1001 if (n==nsy) { 1002 if (ntk>=MaxTk) 1003 die("too many tokens"); 1004 n = ntk++; 1005 } 1006 si = &is[n]; 1007 strcpy(si->name, idnt); 1008 strcpy(si->type, type); 1009 si->prec = p; 1010 si->assoc = a; 1011 tk = nexttk(); 1012 } 1013 break; 1014 case TType: 1015 tk = gettype(type); 1016 if (type[0]==0) 1017 die("syntax error, type expected"); 1018 while (tk==TIdnt) { 1019 si = 0; 1020 n = findsy(idnt, 1); 1021 if (n<ntk) 1022 die("token redeclared as non-terminal"); 1023 if (n==nsy) { 1024 nsy++; 1025 } 1026 si = &is[n]; 1027 strcpy(si->name, idnt); 1028 strcpy(si->type, type); 1029 tk = nexttk(); 1030 } 1031 break; 1032 case TLL: 1033 fprintf(fout, "#line %d \"%s\"\n", lineno, srca); 1034 for (;;) { 1035 c = fgetc(fin); 1036 if (c == EOF) 1037 die("syntax error, unclosed %{"); 1038 if (c == '%') { 1039 c1 = fgetc(fin); 1040 if (c1 == '}') { 1041 fputs("\n", fout); 1042 break; 1043 } 1044 ungetc(c1, fin); 1045 } 1046 if (c == '\n') 1047 lineno++; 1048 fputc(c, fout); 1049 } 1050 tk = nexttk(); 1051 break; 1052 case TPP: 1053 return; 1054 case TEof: 1055 die("syntax error, unfinished declarations"); 1056 default: 1057 die("syntax error, declaration expected"); 1058 } 1059 } 1060 1061 void 1062 getgram() 1063 { 1064 extern char *retcode; 1065 int tk; 1066 Sym hd, *p, s; 1067 Rule *r; 1068 1069 for (;;) { 1070 tk = nexttk(); 1071 if (tk==TPP || tk==TEof) { 1072 if (sstart==S) 1073 die("syntax error, empty grammar"); 1074 r = &rs[nrl++]; 1075 r->lhs = Sym0; 1076 r->rhs[0] = sstart; 1077 r->rhs[1] = 0; 1078 r->rhs[2] = S; 1079 r->act = retcode; 1080 qsort(rs, nrl, sizeof rs[0], rcmp); 1081 return; 1082 } 1083 if (tk!=TIdnt) 1084 die("syntax error, production rule expected"); 1085 if (nexttk()!=TColon) 1086 die("syntax error, colon expected after production's head"); 1087 hd = findsy(idnt, 1); 1088 if (sstart==S) 1089 sstart = hd; 1090 do { 1091 if (nrl>=MaxRl-1) 1092 die("too many rules"); 1093 r = &rs[nrl++]; 1094 r->lhs = hd; 1095 r->act = 0; 1096 p = r->rhs; 1097 while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) { 1098 if (tk==TPrec) { 1099 tk = nexttk(); 1100 if (tk!=TIdnt 1101 || (s=findsy(idnt, 0))>=ntk) 1102 die("token expected after %prec"); 1103 r->prec = is[s].prec; 1104 continue; 1105 } 1106 s = findsy(idnt, 1); 1107 *p++ = s; 1108 if (s<ntk && is[s].prec>0) 1109 r->prec = is[s].prec; 1110 if (p-r->rhs >= MaxRhs-1) 1111 die("production rule too long"); 1112 } 1113 *p = S; 1114 if (tk==TLBrack) { 1115 r->actln = lineno; 1116 r->act = cpycode(); 1117 tk = nexttk(); 1118 } 1119 } while (tk==TBar); 1120 if (tk!=TSemi) 1121 die("syntax error, ; or | expected"); 1122 } 1123 } 1124 1125 void 1126 actout(Rule *r) 1127 { 1128 long l; 1129 int i, ar; 1130 char c, *p, *ty, tya[IdntSz]; 1131 1132 ar = slen(r->rhs); 1133 p = r->act; 1134 i = r->actln; 1135 if (!p) 1136 return; 1137 while ((c=*p++)) 1138 switch (c) { 1139 case '\n': 1140 i++; 1141 default: 1142 fputc(c, fout); 1143 break; 1144 case '$': 1145 c = *p++; 1146 if (c == '$') { 1147 fprintf(fout, "yyval"); 1148 if (doty) { 1149 ty = is[r->lhs].type; 1150 if (!ty[0]) { 1151 lineno = i; 1152 die("$$ has no type"); 1153 } 1154 fprintf(fout, ".%s", ty); 1155 } 1156 } 1157 else if (c == '<') { 1158 ty = tya; 1159 while (istok(*p) && ty-tya<IdntSz-1) 1160 *ty++ = *p++; 1161 *ty = 0; 1162 if (*p++!='>') { 1163 lineno = i; 1164 die("unclosed tag field"); 1165 } 1166 ty = tya; 1167 c = *p++; 1168 if (c == '$') { 1169 fprintf(fout, "yyval.%s", ty); 1170 } else { 1171 if (!isdigit(c)) { 1172 lineno = i; 1173 die("number or $ expected afer tag field"); 1174 } 1175 goto readnum; 1176 } 1177 } 1178 else if (isdigit(c)) { 1179 ty = 0; 1180 readnum: 1181 l = strtol(p-1, &p, 10); 1182 if (l > ar) { 1183 lineno = i; 1184 die("invalid $n"); 1185 } 1186 fprintf(fout, "ps[%d].val", (int)l); 1187 if (doty) { 1188 if (!ty && l>0) 1189 ty = is[r->rhs[l-1]].type; 1190 if (!ty || !ty[0]) { 1191 lineno = i; 1192 die("$n has no type"); 1193 } 1194 fprintf(fout, ".%s", ty); 1195 } 1196 } 1197 else { 1198 fputc('$', fout); 1199 fputc(c, fout); 1200 } 1201 } 1202 fputs("\n", fout); 1203 } 1204 1205 void 1206 codeout() 1207 { 1208 extern char *code0[], *code1[]; 1209 char **p; 1210 Rule *r; 1211 int n, c; 1212 1213 for (p=code0; *p; p++) 1214 fputs(*p, fout); 1215 for (n=0; n<nrl; n++) { 1216 fprintf(fout, "\tcase %d:\n", n); 1217 r = &rs[n]; 1218 fprintf(fout, "#line %d \"%s\"\n", r->actln, srca); 1219 actout(r); 1220 fputs("\t\tbreak;\n", fout); 1221 } 1222 for (p=code1; *p; p++) 1223 fputs(*p, fout); 1224 fprintf(fout, "#line %d \"%s\"\n", lineno, srca); 1225 while ((c=fgetc(fin))!=EOF) 1226 fputc(c, fout); 1227 } 1228 1229 void 1230 init(int ac, char *av[]) 1231 { 1232 int c, vf, df; 1233 char *pref, buf[100], *opt; 1234 1235 (void) ac; 1236 pref = "y"; 1237 vf = df = 0; 1238 for (av++; av[0] && av[0][0]=='-'; av++) 1239 for (opt = &av[0][1]; (c = *opt); opt++) 1240 switch (c) { 1241 case 'v': 1242 vf = 1; 1243 break; 1244 case 'd': 1245 df = 1; 1246 break; 1247 case 'b': 1248 if ((pref = *++av)) 1249 break; 1250 default: 1251 usage: 1252 fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr); 1253 exit(1); 1254 } 1255 1256 if (!(srca = *av)) 1257 goto usage; 1258 fin = fopen(srca, "r"); 1259 if (strlen(pref) + 10 > sizeof buf) 1260 die("-b prefix too long"); 1261 sprintf(buf, "%s.tab.c", pref); 1262 fout = fopen(buf, "w"); 1263 if (vf) { 1264 sprintf(buf, "%s.output", pref); 1265 fgrm = fopen(buf, "w"); 1266 } 1267 if (df) { 1268 sprintf(buf, "%s.tab.h", pref); 1269 fhdr = fopen(buf, "w"); 1270 if (fhdr) { 1271 fprintf(fhdr, "#ifndef Y_TAB_H_\n"); 1272 fprintf(fhdr, "#define Y_TAB_H_\n"); 1273 } 1274 } 1275 if (!fin || !fout || (!fgrm && vf) || (!fhdr && df)) 1276 die("cannot open work files"); 1277 } 1278 1279 int 1280 main(int ac, char *av[]) 1281 { 1282 1283 init(ac, av); 1284 getdecls(); 1285 getgram(); 1286 ginit(); 1287 stgen(); 1288 tblgen(); 1289 stdump(); 1290 actgen(); 1291 tblout(); 1292 codeout(); 1293 1294 if (srconf) 1295 fprintf(stderr, "%d shift/reduce conflicts\n", srconf); 1296 if (rrconf) 1297 fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf); 1298 1299 exit(0); 1300 } 1301 1302 /* Glorious macros. 1303 |sed 's|.*|"&\\n",|' 1304 */ 1305 1306 char *retcode = "\t\tyyval = ps[1].val; return 0;"; 1307 1308 char *code0[] = { 1309 "\n", 1310 "#ifndef YYSTYPE\n", 1311 "#define YYSTYPE int\n", 1312 "#endif\n", 1313 "YYSTYPE yylval;\n", 1314 "\n", 1315 "int\n", 1316 "yyparse()\n", 1317 "{\n", 1318 " enum {\n", 1319 " StackSize = 100,\n", 1320 " ActSz = sizeof yyact / sizeof yyact[0],\n", 1321 " };\n", 1322 " struct {\n", 1323 " YYSTYPE val;\n", 1324 " int state;\n", 1325 " } stk[StackSize], *ps;\n", 1326 " int r, h, n, s, tk;\n", 1327 " YYSTYPE yyval;\n", 1328 "\n", 1329 " ps = stk;\n", 1330 " ps->state = s = yyini;\n", 1331 " tk = -1;\n", 1332 "loop:\n", 1333 " n = yyadsp[s];\n", 1334 " if (tk < 0 && n > -yyntoks)\n", 1335 " tk = yytrns[yylex()];\n", 1336 " n += tk;\n", 1337 " if (n < 0 || n >= ActSz || yychk[n] != tk) {\n", 1338 " r = yyadef[s];\n", 1339 " if (r < 0)\n", 1340 " return -1;\n", 1341 " goto reduce;\n", 1342 " }\n", 1343 " n = yyact[n];\n", 1344 " if (n == -1)\n", 1345 " return -1;\n", 1346 " if (n < 0) {\n", 1347 " r = - (n+2);\n", 1348 " goto reduce;\n", 1349 " }\n", 1350 " tk = -1;\n", 1351 " yyval = yylval;\n", 1352 "stack:\n", 1353 " ps++;\n", 1354 " if (ps-stk >= StackSize)\n", 1355 " return -2;\n", 1356 " s = n;\n", 1357 " ps->state = s;\n", 1358 " ps->val = yyval;\n", 1359 " goto loop;\n", 1360 "reduce:\n", 1361 " ps -= yyr1[r];\n", 1362 " h = yyr2[r];\n", 1363 " s = ps->state;\n", 1364 " n = yygdsp[h] + s;\n", 1365 " if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n", 1366 " n = yygdef[h];\n", 1367 " else\n", 1368 " n = yyact[n];\n", 1369 " switch (r) {\n", 1370 0 1371 }; 1372 1373 char *code1[] = { 1374 " }\n", 1375 " goto stack;\n", 1376 "}\n", 1377 0 1378 };