qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

yacc.c (23342B)


      1 /*% clang -g -Wall -Wextra % -o #
      2  * miniyacc - LALR(1) grammars for C
      3  * See LICENSE for copyright and license details.
      4  */
      5 #include <assert.h>
      6 #include <ctype.h>
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 #include <string.h>
     10 
     11 typedef int Sym;
     12 typedef struct Rule Rule;
     13 typedef struct TSet TSet;
     14 typedef struct Info Info;
     15 typedef struct Term Term;
     16 typedef struct Item Item;
     17 typedef struct Row Row;
     18 
     19 #define S ((Sym) -1)
     20 #define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */
     21 #define GetBit(s,n) (s[n/32] & (1<<(n%32)))
     22 #define SetBit(s,n) (s[n/32] |= 1<<(n%32))
     23 
     24 enum {
     25 	IdntSz = 64,
     26 	MaxRhs = 32,
     27 	MaxTk = 500,
     28 	MaxNt = 500,
     29 	MaxRl = 800,
     30 	MaxTm = 1000,
     31 
     32 	TSetSz = (MaxTk+31)/32,
     33 	Sym0 = MaxTk
     34 };
     35 
     36 struct Rule {
     37 	Sym lhs;
     38 	Sym rhs[MaxRhs];
     39 	char *act;
     40 	int actln;
     41 	int prec;
     42 };
     43 
     44 struct TSet {
     45 	unsigned t[TSetSz];
     46 };
     47 
     48 struct Info {
     49 	int nul;
     50 	TSet fst;
     51 	int prec;
     52 	enum {
     53 		ANone,
     54 		ALeft,
     55 		ARight,
     56 		ANonassoc
     57 	} assoc;
     58 	char name[IdntSz];
     59 	char type[IdntSz];
     60 };
     61 
     62 struct Term {
     63 	Rule *rule;
     64 	int dot;
     65 	TSet lk;
     66 };
     67 
     68 struct Item {
     69 	int id;
     70 	int nt;
     71 	Term ts[MaxTm];
     72 	Item **gtbl;
     73 	int dirty;
     74 };
     75 
     76 struct Row {
     77 	int def;
     78 	int ndef;
     79 	int *t;
     80 };
     81 
     82 char srs[] = "shift/reduce conflict state %d token %s\n";
     83 char rrs[] = "reduce/reduce conflict state %d token %s\n";
     84 
     85 Item i0; /* temporary item */
     86 
     87 int nrl, nsy, nst, ntk;
     88 Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */
     89 Info is[MaxTk+MaxNt]; /* symbol information */
     90 Item **st; /* LALR(1) states (ordered, icmp) */
     91 Row *as;   /* action table [state][tok] */
     92 Row *gs;   /* goto table   [sym][state] */
     93 Sym sstart;/* start symbol */
     94 Item *ini; /* initial state */
     95 int doty;  /* type-checking enabled */
     96 
     97 int srconf, rrconf;
     98 int actsz;
     99 int *act;
    100 int *chk;
    101 int *adsp;
    102 int *gdsp;
    103 
    104 int lineno = 1;
    105 char *srca;
    106 FILE *fin;
    107 FILE *fout;
    108 FILE *fgrm;
    109 FILE *fhdr;
    110 
    111 void
    112 die(char *s)
    113 {
    114 	fprintf(stderr, "%s (on line %d)\n", s, lineno);
    115 	exit(1);
    116 }
    117 
    118 void *
    119 yalloc(size_t n, size_t o)
    120 {
    121 	void *p;
    122 
    123 	p = calloc(n, o);
    124 	if (!p)
    125 		die("out of memory");
    126 	return p;
    127 }
    128 
    129 int
    130 rcmp(const void *a, const void *b)
    131 {
    132 	return ((Rule *)a)->lhs - ((Rule *)b)->lhs;
    133 }
    134 
    135 Rule *
    136 rfind(Sym lhs)
    137 {
    138 	Rule *r;
    139 	Rule k;
    140 
    141 	k.lhs = lhs;
    142 	r = bsearch(&k, rs, nrl, sizeof *r, rcmp);
    143 	if (r != 0)
    144 		while (r > rs && r[-1].lhs == lhs)
    145 			r--;
    146 	return r;
    147 }
    148 
    149 int
    150 slen(Sym *l)
    151 {
    152 	int n;
    153 
    154 	for (n=0; *l!=S; n++, l++);
    155 	return n;
    156 }
    157 
    158 void
    159 tszero(TSet *ts)
    160 {
    161 	memset(ts, 0, sizeof *ts);
    162 }
    163 
    164 int
    165 tsunion(TSet *tsa, TSet *tsb)
    166 {
    167 	int n;
    168 	unsigned *a, *b, c, t;
    169 
    170 	c = 0;
    171 	a = tsa->t;
    172 	b = tsb->t;
    173 	n = (31+ntk)/32;
    174 	while (n-- > 0) {
    175 		t = *a;
    176 		*a |= *b++;
    177 		c |= t ^ *a++;
    178 	}
    179 	return !!c;
    180 }
    181 
    182 void
    183 first(TSet *ts, Sym *stnc, TSet *last)
    184 {
    185 	Sym f;
    186 
    187 	f = stnc[0];
    188 	if (f == S) {
    189 		if (last)
    190 			tsunion(ts, last);
    191 		return;
    192 	}
    193 	if (f < ntk) {
    194 		SetBit(ts->t, f);
    195 		return;
    196 	}
    197 	if (is[f].nul)
    198 		first(ts, stnc+1, last);
    199 	tsunion(ts, &is[f].fst);
    200 }
    201 
    202 void
    203 ginit()
    204 {
    205 	int chg;
    206 	Rule *r;
    207 	Info *i;
    208 	Sym *s;
    209 	TSet ts;
    210 
    211 	do {
    212 		chg = 0;
    213 		for (r=rs; r-rs<nrl; r++) {
    214 			i = &is[r->lhs];
    215 			for (s=r->rhs; *s!=S; s++)
    216 				if (!is[*s].nul)
    217 					goto nonul;
    218 			chg |= i->nul == 0;
    219 			i->nul = 1;
    220 		nonul:
    221 			tszero(&ts);
    222 			first(&ts, r->rhs, 0);
    223 			chg |= tsunion(&i->fst, &ts);
    224 		}
    225 	} while (chg);
    226 }
    227 
    228 int
    229 tcmp(Term *a, Term *b)
    230 {
    231 	int c;
    232 
    233 	c = a->rule - b->rule;
    234 	if (c==0)
    235 		c = a->dot - b->dot;
    236 	return c;
    237 }
    238 
    239 int
    240 tcmpv(const void *a, const void *b)
    241 {
    242 	return tcmp((Term *)a, (Term *)b);
    243 }
    244 
    245 void
    246 iclose(Item *i)
    247 {
    248 	int smap[MaxNt];
    249 	Rule *r;
    250 	Term *t, t1;
    251 	Sym s, *rem;
    252 	int chg, n, m;
    253 
    254 	t1.dot = 0;
    255 	memset(smap, 0, sizeof smap);
    256 	for (n=0; n<i->nt; n++) {
    257 		t = &i->ts[n];
    258 		s = t->rule->lhs-Sym0;
    259 		if (t->dot==0)
    260 		if (smap[s]==0)
    261 			smap[s] = n;
    262 	}
    263 	do {
    264 		chg = 0;
    265 		for (n=0; n<i->nt; n++) {
    266 			t = &i->ts[n];
    267 			rem = &t->rule->rhs[t->dot];
    268 			s = *rem++;
    269 			if (s < Sym0 || s == S)
    270 				continue;
    271 			r = rfind(s);
    272 			if (!r)
    273 				die("some non-terminals are not defined");
    274 			tszero(&t1.lk);
    275 			first(&t1.lk, rem, &t->lk);
    276 			m = smap[s-Sym0];
    277 			if (m)
    278 				for (; r-rs<nrl && r->lhs==s; r++, m++)
    279 					chg |= tsunion(&i->ts[m].lk, &t1.lk);
    280 			else {
    281 				m = i->nt;
    282 				smap[s-Sym0] = m;
    283 				for (; r-rs<nrl && r->lhs==s; r++, m++) {
    284 					if (m>=MaxTm)
    285 						die("too many terms in item");
    286 					t1.rule = r;
    287 					i->ts[m] = t1;
    288 				}
    289 				i->nt = m;
    290 				chg = 1;
    291 			}
    292 		}
    293 	} while (chg);
    294 }
    295 
    296 void
    297 igoto(Item *i, Sym s)
    298 {
    299 	Term *t, *t1;
    300 	int n;
    301 
    302 	i0.nt = 0;
    303 	for (n=0, t=i->ts; n<i->nt; n++, t++) {
    304 		if (t->rule->rhs[t->dot] != s)
    305 			continue;
    306 		t1 = &i0.ts[i0.nt++];
    307 		*t1 = *t;
    308 		t1->dot++;
    309 	}
    310 	qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv);
    311 }
    312 
    313 int
    314 icmp(Item *a, Item *b)
    315 {
    316 	Term *ta, *tb, *ma, *mb;
    317 	int c;
    318 
    319 	ta = a->ts;
    320 	tb = b->ts;
    321 	ma = ta+a->nt;
    322 	mb = tb+b->nt;
    323 	for (;;) {
    324 		if (ta==ma || ta->dot==0)
    325 			return -(tb<mb && tb->dot);
    326 		if (tb==mb || tb->dot==0)
    327 			return +(ta<ma && ta->dot);
    328 		if ((c=tcmp(ta++, tb++)))
    329 			return c;
    330 	}
    331 }
    332 
    333 int
    334 stadd(Item **pi)
    335 {
    336 	Item *i, *i1;
    337 	int lo, hi, mid, n, chg;
    338 
    339 	/* http://www.iq0.com/duffgram/bsearch.c */
    340 	i = *pi;
    341 	lo = 0;
    342 	hi = nst - 1;
    343 	if (hi<0 || icmp(i, st[hi])>0)
    344 		hi++;
    345 	else if (icmp(i, st[lo])<=0)
    346 		hi = lo;
    347 	else
    348 		while (hi-lo!=1) {
    349 			mid = (lo+hi)/2;
    350 			if (icmp(st[mid], i)<0)
    351 				lo = mid;
    352 			else
    353 				hi = mid;
    354 		}
    355 	if (hi<nst && icmp(st[hi], i)==0) {
    356 		chg = 0;
    357 		i1 = st[hi];
    358 		for (n=0; n<i->nt; n++)
    359 			chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk);
    360 		i1->dirty |= chg;
    361 		*pi = i1;
    362 		return chg;
    363 	} else {
    364 		st = realloc(st, ++nst * sizeof st[0]);
    365 		if (!st)
    366 			die("out of memory");
    367 		memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]);
    368 		i->gtbl = yalloc(nsy, sizeof i->gtbl[0]);
    369 		i->dirty = 1;
    370 		i1 = yalloc(1, sizeof *i1);
    371 		*i1 = *i;
    372 		*pi = st[hi] = i1;
    373 		return 1;
    374 	}
    375 }
    376 
    377 void
    378 stgen()
    379 {
    380 	Sym s;
    381 	Rule *r;
    382 	Item *i, *i1;
    383 	Term tini;
    384 	int n, chg;
    385 
    386 	ini = &i0;
    387 	r = rfind(Sym0);
    388 	tini.rule = r;
    389 	tini.dot = 0;
    390 	tszero(&tini.lk);
    391 	SetBit(tini.lk.t, 0);
    392 	i0.nt = 0;
    393 	i0.ts[i0.nt++] = tini;
    394 	stadd(&ini);
    395 	do {
    396 		chg = 0;
    397 		for (n=0; n<nst; n++) {
    398 			i = st[n];
    399 			if (!i->dirty)
    400 				continue;
    401 			i->dirty = 0;
    402 			iclose(i);
    403 			for (s=0; s<nsy; s++) {
    404 				igoto(i, s);
    405 				i1 = &i0;
    406 				if (!i1->nt) {
    407 					i->gtbl[s] = 0;
    408 					continue;
    409 				}
    410 				chg |= stadd(&i1);
    411 				i->gtbl[s] = i1;
    412 			}
    413 		}
    414 	} while (chg);
    415 }
    416 
    417 int
    418 resolve(Rule *r, Sym s, int st)
    419 {
    420 	if (!r->prec || !is[s].prec) {
    421 	conflict:
    422 		if (fgrm)
    423 			fprintf(fgrm, srs, st, is[s].name);
    424 		srconf++;
    425 		return ARight;
    426 	}
    427 	if (r->prec==is[s].prec) {
    428 		if (is[s].assoc == ANone)
    429 			goto conflict;
    430 		return is[s].assoc;
    431 	} else
    432 		if (r->prec<is[s].prec)
    433 			return ARight;
    434 		else
    435 			return ALeft;
    436 }
    437 
    438 void
    439 tblset(int *tbl, Item *i, Term *t)
    440 {
    441 	int act;
    442 	Sym s;
    443 
    444 	s = t->rule->rhs[t->dot];
    445 	if (s!=S) {
    446 		/* shift */
    447 		if (s>=ntk)
    448 			return;
    449 		assert(i->gtbl[s]);
    450 		act = ARight;
    451 		if (tbl[s] && tbl[s] != i->gtbl[s]->id) {
    452 			assert(tbl[s]<=0);
    453 			act = resolve(&rs[Red(tbl[s])], s, i->id-1);
    454 		}
    455 		switch (act) {
    456 		case ARight:
    457 			tbl[s] = i->gtbl[s]->id;
    458 			break;
    459 		case ANonassoc:
    460 			tbl[s] = -1;
    461 			break;
    462 		}
    463 	} else
    464 		/* reduce */
    465 		for (s=0; s<ntk; s++) {
    466 			if (!GetBit(t->lk.t, s))
    467 				continue;
    468 			/* default to shift if conflict occurs */
    469 			if (!tbl[s])
    470 				act = ALeft;
    471 			else if (tbl[s]<0) {
    472 				if (fgrm)
    473 					fprintf(fgrm, rrs, i->id-1, is[s].name);
    474 				rrconf++;
    475 				act = ARight;
    476 			} else
    477 				act = resolve(t->rule, s, i->id-1);
    478 			switch (act) {
    479 			case ALeft:
    480 				tbl[s] = Red(t->rule-rs);
    481 				break;
    482 			case ANonassoc:
    483 				tbl[s] = -1;
    484 				break;
    485 			}
    486 		}
    487 }
    488 
    489 void
    490 setdef(Row *r, int w, int top)
    491 {
    492 	int n, m, x, cnt, def, max;
    493 
    494 	max = 0;
    495 	def = -1;
    496 	r->ndef = 0;
    497 	for (n=0; n<w; n++) {
    498 		x = r->t[n];
    499 		if (x==0)
    500 			r->ndef++;
    501 		if (x>=top || x==0)
    502 			continue;
    503 		cnt = 1;
    504 		for (m=n+1; m<w; m++)
    505 			if (r->t[m]==x)
    506 				cnt++;
    507 		if (cnt>max) {
    508 			def = x;
    509 			max = cnt;
    510 		}
    511 	}
    512 	r->def = def;
    513 	if (max!=0)
    514 		/* zero out the most frequent entry */
    515 		for (n=0; n<w; n++)
    516 			if (r->t[n]==def) {
    517 				r->t[n] = 0;
    518 				r->ndef++;
    519 			}
    520 }
    521 
    522 void
    523 tblgen()
    524 {
    525 	Row *r;
    526 	Item *i;
    527 	int n, m;
    528 
    529 	for (n=0; n<nst; n++)
    530 		st[n]->id = n+1;
    531 	as = yalloc(nst, sizeof as[0]);
    532 	gs = yalloc(nsy-MaxTk, sizeof gs[0]);
    533 	/* fill action table */
    534 	for (n=0; n<nst; n++) {
    535 		r = &as[n];
    536 		r->t = yalloc(ntk, sizeof r->t[0]);
    537 		for (i=st[n], m=0; m<i->nt; m++)
    538 			tblset(r->t, i, &i->ts[m]);
    539 		setdef(r, ntk, -1);
    540 		r->def = Red(r->def); /* Red(-1) == -1 */
    541 	}
    542 	/* fill goto table */
    543 	for (n=MaxTk; n<nsy; n++) {
    544 		r = &gs[n-MaxTk];
    545 		r->t = yalloc(nst, sizeof r->t[0]);
    546 		for (m=0; m<nst; m++)
    547 			if (st[m]->gtbl[n])
    548 				r->t[m] = st[m]->gtbl[n]->id;
    549 		setdef(r, nst, nst+1);
    550 	}
    551 }
    552 
    553 int
    554 prcmp(const void *a, const void *b)
    555 {
    556 	return (*(Row **)a)->ndef - (*(Row **)b)->ndef;
    557 }
    558 
    559 void
    560 actgen()
    561 {
    562 	Row **o, *r;
    563 	int n, m, t, dsp, nnt;
    564 
    565 	actsz = 0;
    566 	o = yalloc(nst+nsy, sizeof o[0]);
    567 	act = yalloc(nst*nsy, sizeof act[0]);
    568 	chk = yalloc(nst*nsy, sizeof chk[0]);
    569 	adsp = yalloc(nst, sizeof adsp[0]);
    570 	for (n=0; n<nst*nsy; n++)
    571 		chk[n] = -1;
    572 	/* fill in actions */
    573 	for (n=0; n<nst; n++)
    574 		o[n] = &as[n];
    575 	qsort(o, nst, sizeof o[0], prcmp);
    576 	for (n=0; n<nst; n++) {
    577 		r = o[n];
    578 		dsp = 0;
    579 		for (m=0; m<ntk && r->t[m]==0; m++)
    580 			dsp--;
    581 	retrya:
    582 		/* The invariant here is even
    583 		 * trickier than it looks.
    584 		 */
    585 		for (t=0; t<ntk; t++)
    586 			if ((m=dsp+t)>=0 && chk[m]>=0)
    587 			if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t]))
    588 			|| (!r->t[t] && chk[m]==t)) {
    589 				dsp++;
    590 				goto retrya;
    591 			}
    592 		adsp[r-as] = dsp;
    593 		for (t=0; t<ntk; t++)
    594 			if (r->t[t]) {
    595 				chk[dsp+t] = t;
    596 				act[dsp+t] = r->t[t];
    597 				if (dsp+t>=actsz)
    598 					actsz = dsp+t+1;
    599 			}
    600 	}
    601 	/* fill in gotos */
    602 	nnt = nsy-MaxTk;
    603 	gdsp = yalloc(nnt, sizeof gdsp[0]);
    604 	for (n=0; n<nnt; n++)
    605 		o[n] = &gs[n];
    606 	qsort(o, nnt, sizeof o[0], prcmp);
    607 	for (n=0; n<nnt; n++) {
    608 		r = o[n];
    609 		dsp = 0;
    610 		for (m=0; m<nst && r->t[m]==0; m++)
    611 			dsp--;
    612 	retryg:
    613 		for (t=m; t<nst; t++)
    614 			if (chk[dsp+t]>=0 && r->t[t]) {
    615 				dsp++;
    616 				goto retryg;
    617 			}
    618 		gdsp[r-gs] = dsp;
    619 		for (t=m; t<nst; t++)
    620 			if (r->t[t]) {
    621 				chk[dsp+t] = ntk+(r-gs);
    622 				act[dsp+t] = r->t[t];
    623 				if (dsp+t>=actsz)
    624 					actsz = dsp+t+1;
    625 			}
    626 	}
    627 	free(o);
    628 }
    629 
    630 void
    631 aout(char *name, int *t, int n)
    632 {
    633 	int i;
    634 
    635 	fprintf(fout, "short %s[] = {", name);
    636 	for (i=0; i<n; i++) {
    637 		if (i % 10 == 0)
    638 			fprintf(fout, "\n");
    639 		fprintf(fout, "%4d", t[i]);
    640 		if (i != n-1)
    641 			fprintf(fout, ",");
    642 	}
    643 	fprintf(fout, "\n};\n");
    644 }
    645 
    646 void
    647 tblout()
    648 {
    649 	int *o, n, m;
    650 
    651 	fprintf(fout, "short yyini = %d;\n", ini->id-1);
    652 	fprintf(fout, "short yyntoks = %d;\n", ntk);
    653 	o = yalloc(nrl+nst+nsy, sizeof o[0]);
    654 	for (n=0; n<nrl; n++)
    655 		o[n] = slen(rs[n].rhs);
    656 	aout("yyr1", o, nrl);
    657 	for (n=0; n<nrl; n++)
    658 		o[n] = rs[n].lhs-MaxTk;
    659 	aout("yyr2", o, nrl);
    660 	for (n=0; n<nst; n++)
    661 		o[n] = as[n].def;
    662 	aout("yyadef", o, nst);
    663 	for (n=0; n<nsy-MaxTk; n++) {
    664 		o[n] = gs[n].def;
    665 		assert(o[n]>0 || o[n]==-1);
    666 		if (o[n]>0)
    667 			o[n]--;
    668 	}
    669 	aout("yygdef", o, nsy-MaxTk);
    670 	aout("yyadsp", adsp, nst);
    671 	aout("yygdsp", gdsp, nsy-MaxTk);
    672 	for (n=0; n<actsz; n++)
    673 		if (act[n]>=0)
    674 			act[n]--;
    675 	aout("yyact", act, actsz);
    676 	aout("yychk", chk, actsz);
    677 	for (n=0; n<128; n++) {
    678 		o[n] = 0;
    679 		for (m=0; m<ntk; m++)
    680 			if (is[m].name[0]=='\'')
    681 			if (is[m].name[1]==n)
    682 				assert(!o[n]), o[n] = m;
    683 	}
    684 	m = 128;
    685 	for (n=1; n<ntk; n++) {
    686 		if (is[n].name[0]=='\'')
    687 			continue;
    688 		fprintf(fout, "#define %s %d\n", is[n].name, m);
    689 		if (fhdr)
    690 			fprintf(fhdr, "#define %s %d\n", is[n].name, m);
    691 		o[m++] = n;
    692 	}
    693 	aout("yytrns", o, m);
    694 	if (fhdr) {
    695 		fputs("int yyparse(void);\n", fhdr);
    696 		fputs("#ifndef YYSTYPE\n", fhdr);
    697 		fputs("#define YYSTYPE int\n", fhdr);
    698 		fputs("#endif\n", fhdr);
    699 		fputs("extern YYSTYPE yylval;\n", fhdr);
    700 		fputs("#endif\n", fhdr);
    701 	}
    702 	free(o);
    703 }
    704 
    705 void
    706 stdump()
    707 {
    708 	Term *t;
    709 	Sym *s1;
    710 	int n, m, d, act;
    711 	Rule *r;
    712 	Row *ar;
    713 
    714 	if (!fgrm)
    715 		return;
    716 	for (r=rs; r-rs<nrl; r++) {
    717 		fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name);
    718 		for (s1=r->rhs; *s1!=S; s1++)
    719 			fprintf(fgrm, " %s", is[*s1].name);
    720 	}
    721 	fprintf(fgrm, "\n");
    722 	for (m=0; m<nst; m++) {
    723 		fprintf(fgrm, "\nState %d:\n", m);
    724 		for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) {
    725 			r = t->rule;
    726 			d = t->dot;
    727 			if (d==0 && t!=st[m]->ts)
    728 				continue;
    729 			fprintf(fgrm, "  %s ->", is[r->lhs].name);
    730 			for (s1=r->rhs; *s1!=S; s1++, d--)
    731 				fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name);
    732 			if (!d)
    733 				fprintf(fgrm, " .");
    734 			fprintf(fgrm, "\n");
    735 		}
    736 		fprintf(fgrm, "\n");
    737 		ar = &as[m];
    738 		for (n=0; n<ntk; n++) {
    739 			act = ar->t[n];
    740 			if (!act)
    741 				continue;
    742 			if (act==-1)
    743 				fprintf(fgrm, "  %s error (nonassoc)\n", is[n].name);
    744 			else if (act<0)
    745 				fprintf(fgrm, "  %s reduce with rule %d\n", is[n].name, Red(act));
    746 			else
    747 				fprintf(fgrm, "  %s shift and go to %d\n", is[n].name, act-1);
    748 		}
    749 		if (ar->def != -1)
    750 			fprintf(fgrm, "  * reduce with rule %d\n", ar->def);
    751 	}
    752 }
    753 
    754 enum {
    755 	TIdnt,
    756 	TTokchr, /* 'c' */
    757 	TPP, /* %% */
    758 	TLL, /* %{ */
    759 	TLangle, /* < */
    760 	TRangle, /* > */
    761 	TSemi, /* ; */
    762 	TBar, /* | */
    763 	TColon, /* : */
    764 	TLBrack, /* { */
    765 	TUnion,
    766 	TType,
    767 	TToken,
    768 	TRight,
    769 	TLeft,
    770 	TNonassoc,
    771 	TPrec,
    772 	TStart,
    773 	TEof
    774 };
    775 
    776 struct {
    777 	char *name;
    778 	int tok;
    779 } words[] = {
    780 	{ "%%", TPP },
    781 	{ "%union", TUnion },
    782 	{ "%type", TType },
    783 	{ "%token", TToken },
    784 	{ "%right", TRight },
    785 	{ "%left", TLeft },
    786 	{ "%nonassoc", TNonassoc },
    787 	{ "%prec", TPrec },
    788 	{ "%start", TStart },
    789 	{ 0, 0 }
    790 };
    791 
    792 char idnt[IdntSz];
    793 
    794 int
    795 istok(int c)
    796 {
    797 	return isalnum(c) || c=='_' || c=='%';
    798 }
    799 
    800 int
    801 nexttk()
    802 {
    803 	int n;
    804 	char c, *p;
    805 
    806 	while (isspace(c=fgetc(fin)))
    807 		if (c == '\n')
    808 			lineno++;
    809 	switch (c) {
    810 	case '<':
    811 		return TLangle;
    812 	case '>':
    813 		return TRangle;
    814 	case ';':
    815 		return TSemi;
    816 	case '|':
    817 		return TBar;
    818 	case ':':
    819 		return TColon;
    820 	case '{':
    821 		return TLBrack;
    822 	case EOF:
    823 		return TEof;
    824 	case '\'':
    825 		idnt[0] = '\'';
    826 		idnt[1] = fgetc(fin);
    827 		idnt[2] = '\'';
    828 		idnt[3] = 0;
    829 		if (fgetc(fin)!='\'')
    830 			die("syntax error, invalid char token");
    831 		return TTokchr;
    832 	}
    833 	p = idnt;
    834 	while (istok(c)) {
    835 		*p++ = c;
    836 		if (p-idnt >= IdntSz-1)
    837 			die("identifier too long");
    838 		c = fgetc(fin);
    839 	}
    840 	*p = 0;
    841 	if (strcmp(idnt, "%")==0)
    842 	if (c=='{')
    843 		return TLL;
    844 	ungetc(c, fin);
    845 	for (n=0; words[n].name; n++)
    846 		if (strcmp(idnt, words[n].name) == 0)
    847 			return words[n].tok;
    848 	return TIdnt;
    849 }
    850 
    851 char *
    852 cpycode()
    853 {
    854 	int c, nest, in, len, pos;
    855 	char *s;
    856 
    857 	len = 64;
    858 	s = yalloc(len+1, 1);
    859 	s[0] = '{';
    860 	pos = 1;
    861 	nest = 1;
    862 	in = 0;
    863 
    864 	while (nest) {
    865 		c = fgetc(fin);
    866 		if (in) {
    867 			if (c == in)
    868 			if (s[pos-1] != '\\')
    869 				in = 0;
    870 		} else {
    871 			if (c == '"' || c == '\'')
    872 				in = c;
    873 			if (c == '{')
    874 				nest++;
    875 			if (c == '}')
    876 				nest--;
    877 			if (c == EOF)
    878 				die("syntax error, unclosed code block");
    879 			if (c == '\n')
    880 				lineno++;
    881 		}
    882 		if (pos>=len)
    883 		if (!(s=realloc(s, len=2*len+1)))
    884 			die("out of memory");
    885 		s[pos++] = c;
    886 	}
    887 	s[pos] = 0;
    888 	return s;
    889 }
    890 
    891 int
    892 gettype(char *type)
    893 {
    894 	int tk;
    895 
    896 	tk = nexttk();
    897 	if (tk==TLangle) {
    898 		if (nexttk()!=TIdnt)
    899 			die("syntax error, ident expected after <");
    900 		strcpy(type, idnt);
    901 		if (nexttk()!=TRangle)
    902 			die("syntax error, unclosed <");
    903 		return nexttk();
    904 	} else {
    905 		type[0] = 0;
    906 		return tk;
    907 	}
    908 }
    909 
    910 Sym
    911 findsy(char *name, int add)
    912 {
    913 	int n;
    914 
    915 	for (n=0; n<nsy; n++) {
    916 		if (n == ntk) {
    917 			if (name[0]=='\'') {
    918 				if (ntk>=MaxTk)
    919 					die("too many tokens");
    920 				ntk++;
    921 				strcpy(is[n].name, name);
    922 				return n;
    923 			}
    924 			n = MaxTk;
    925 		}
    926 		if (strcmp(is[n].name, name)==0)
    927 			return n;
    928 	}
    929 	if (add) {
    930 		if (nsy>=MaxTk+MaxNt)
    931 			die("too many non-terminals");
    932 		strcpy(is[nsy].name, name);
    933 		return nsy++;
    934 	} else
    935 		return nsy;
    936 }
    937 
    938 void
    939 getdecls()
    940 {
    941 	int tk, prec, p, a, c, c1, n;
    942 	Info *si;
    943 	char type[IdntSz], *s;
    944 
    945 	strcpy(is[0].name, "$");
    946 	ntk = 1;
    947 	strcpy(is[Sym0].name, "@start");
    948 	nsy = MaxTk+1;
    949 	sstart = S;
    950 	prec = 0;
    951 	tk = nexttk();
    952 	for (;;)
    953 	switch (tk) {
    954 	case TStart:
    955 		tk = nexttk();
    956 		if (tk!=TIdnt)
    957 			die("syntax error, ident expected after %start");
    958 		sstart = findsy(idnt, 1);
    959 		if (sstart<ntk)
    960 			die("%start cannot specify a token");
    961 		tk = nexttk();
    962 		break;
    963 	case TUnion:
    964 		tk = nexttk();
    965 		if (tk!=TLBrack)
    966 			die("syntax error, { expected after %union");
    967 		fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
    968 		s = cpycode();
    969 		fprintf(fout, "typedef union %s yyunion;\n", s);
    970 		fprintf(fout, "#define YYSTYPE yyunion\n");
    971 		if (fhdr) {
    972 			fprintf(fhdr, "typedef union %s yyunion;\n", s);
    973 			fprintf(fhdr, "#define YYSTYPE yyunion\n");
    974 		}
    975 		free(s);
    976 		doty = 1;
    977 		tk = nexttk();
    978 		break;
    979 	case TLeft:
    980 		p = ++prec;
    981 		a = ALeft;
    982 		goto addtoks;
    983 	case TRight:
    984 		p = ++prec;
    985 		a = ARight;
    986 		goto addtoks;
    987 	case TNonassoc:
    988 		p = ++prec;
    989 		a = ANonassoc;
    990 		goto addtoks;
    991 	case TToken:
    992 		p = 0;
    993 		a = ANone;
    994 	addtoks:
    995 		tk = gettype(type);
    996 		while (tk==TIdnt || tk==TTokchr) {
    997 			si = 0;
    998 			n = findsy(idnt, 0);
    999 			if (n>=MaxTk && n<nsy)
   1000 				die("non-terminal redeclared as token");
   1001 			if (n==nsy) {
   1002 				if (ntk>=MaxTk)
   1003 					die("too many tokens");
   1004 				n = ntk++;
   1005 			}
   1006 			si = &is[n];
   1007 			strcpy(si->name, idnt);
   1008 			strcpy(si->type, type);
   1009 			si->prec = p;
   1010 			si->assoc = a;
   1011 			tk = nexttk();
   1012 		}
   1013 		break;
   1014 	case TType:
   1015 		tk = gettype(type);
   1016 		if (type[0]==0)
   1017 			die("syntax error, type expected");
   1018 		while (tk==TIdnt) {
   1019 			si = 0;
   1020 			n = findsy(idnt, 1);
   1021 			if (n<ntk)
   1022 				die("token redeclared as non-terminal");
   1023 			if (n==nsy) {
   1024 				nsy++;
   1025 			}
   1026 			si = &is[n];
   1027 			strcpy(si->name, idnt);
   1028 			strcpy(si->type, type);
   1029 			tk = nexttk();
   1030 		}
   1031 		break;
   1032 	case TLL:
   1033 		fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
   1034 		for (;;) {
   1035 			c = fgetc(fin);
   1036 			if (c == EOF)
   1037 				die("syntax error, unclosed %{");
   1038 			if (c == '%') {
   1039 				c1 = fgetc(fin);
   1040 				if (c1 == '}') {
   1041 					fputs("\n", fout);
   1042 					break;
   1043 				}
   1044 				ungetc(c1, fin);
   1045 			}
   1046 			if (c == '\n')
   1047 				lineno++;
   1048 			fputc(c, fout);
   1049 		}
   1050 		tk = nexttk();
   1051 		break;
   1052 	case TPP:
   1053 		return;
   1054 	case TEof:
   1055 		die("syntax error, unfinished declarations");
   1056 	default:
   1057 		die("syntax error, declaration expected");
   1058 	}
   1059 }
   1060 
   1061 void
   1062 getgram()
   1063 {
   1064 	extern char *retcode;
   1065 	int tk;
   1066 	Sym hd, *p, s;
   1067 	Rule *r;
   1068 
   1069 	for (;;) {
   1070 		tk = nexttk();
   1071 		if (tk==TPP || tk==TEof) {
   1072 			if (sstart==S)
   1073 				die("syntax error, empty grammar");
   1074 			r = &rs[nrl++];
   1075 			r->lhs = Sym0;
   1076 			r->rhs[0] = sstart;
   1077 			r->rhs[1] = 0;
   1078 			r->rhs[2] = S;
   1079 			r->act = retcode;
   1080 			qsort(rs, nrl, sizeof rs[0], rcmp);
   1081 			return;
   1082 		}
   1083 		if (tk!=TIdnt)
   1084 			die("syntax error, production rule expected");
   1085 		if (nexttk()!=TColon)
   1086 			die("syntax error, colon expected after production's head");
   1087 		hd = findsy(idnt, 1);
   1088 		if (sstart==S)
   1089 			sstart = hd;
   1090 		do {
   1091 			if (nrl>=MaxRl-1)
   1092 				die("too many rules");
   1093 			r = &rs[nrl++];
   1094 			r->lhs = hd;
   1095 			r->act = 0;
   1096 			p = r->rhs;
   1097 			while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) {
   1098 				if (tk==TPrec) {
   1099 					tk = nexttk();
   1100 					if (tk!=TIdnt
   1101 					|| (s=findsy(idnt, 0))>=ntk)
   1102 						die("token expected after %prec");
   1103 					r->prec = is[s].prec;
   1104 					continue;
   1105 				}
   1106 				s = findsy(idnt, 1);
   1107 				*p++ = s;
   1108 				if (s<ntk && is[s].prec>0)
   1109 					r->prec = is[s].prec;
   1110 				if (p-r->rhs >= MaxRhs-1)
   1111 					die("production rule too long");
   1112 			}
   1113 			*p = S;
   1114 			if (tk==TLBrack) {
   1115 				r->actln = lineno;
   1116 				r->act = cpycode();
   1117 				tk = nexttk();
   1118 			}
   1119 		} while (tk==TBar);
   1120 		if (tk!=TSemi)
   1121 			die("syntax error, ; or | expected");
   1122 	}
   1123 }
   1124 
   1125 void
   1126 actout(Rule *r)
   1127 {
   1128 	long l;
   1129 	int i, ar;
   1130 	char c, *p, *ty, tya[IdntSz];
   1131 
   1132 	ar = slen(r->rhs);
   1133 	p = r->act;
   1134 	i = r->actln;
   1135 	if (!p)
   1136 		return;
   1137 	while ((c=*p++))
   1138 	switch (c) {
   1139 	case '\n':
   1140 		i++;
   1141 	default:
   1142 		fputc(c, fout);
   1143 		break;
   1144 	case '$':
   1145 		c = *p++;
   1146 		if (c == '$') {
   1147 			fprintf(fout, "yyval");
   1148 			if (doty) {
   1149 				ty = is[r->lhs].type;
   1150 				if (!ty[0]) {
   1151 					lineno = i;
   1152 					die("$$ has no type");
   1153 				}
   1154 				fprintf(fout, ".%s", ty);
   1155 			}
   1156 		}
   1157 		else if (c == '<') {
   1158 			ty = tya;
   1159 			while (istok(*p) && ty-tya<IdntSz-1)
   1160 				*ty++ = *p++;
   1161 			*ty = 0;
   1162 			if (*p++!='>') {
   1163 				lineno = i;
   1164 				die("unclosed tag field");
   1165 			}
   1166 			ty = tya;
   1167 			c = *p++;
   1168 			if (c == '$') {
   1169 				fprintf(fout, "yyval.%s", ty);
   1170 			} else {
   1171 				if (!isdigit(c)) {
   1172 					lineno = i;
   1173 					die("number or $ expected afer tag field");
   1174 				}
   1175 				goto readnum;
   1176 			}
   1177 		}
   1178 		else if (isdigit(c)) {
   1179 			ty = 0;
   1180 		readnum:
   1181 			l = strtol(p-1, &p, 10);
   1182 			if (l > ar) {
   1183 				lineno = i;
   1184 				die("invalid $n");
   1185 			}
   1186 			fprintf(fout, "ps[%d].val", (int)l);
   1187 			if (doty) {
   1188 				if (!ty && l>0)
   1189 					ty = is[r->rhs[l-1]].type;
   1190 				if (!ty || !ty[0]) {
   1191 					lineno = i;
   1192 					die("$n has no type");
   1193 				}
   1194 				fprintf(fout, ".%s", ty);
   1195 			}
   1196 		}
   1197 		else {
   1198 			fputc('$', fout);
   1199 			fputc(c, fout);
   1200 		}
   1201 	}
   1202 	fputs("\n", fout);
   1203 }
   1204 
   1205 void
   1206 codeout()
   1207 {
   1208 	extern char *code0[], *code1[];
   1209 	char **p;
   1210 	Rule *r;
   1211 	int n, c;
   1212 
   1213 	for (p=code0; *p; p++)
   1214 		fputs(*p, fout);
   1215 	for (n=0; n<nrl; n++) {
   1216 		fprintf(fout, "\tcase %d:\n", n);
   1217 		r = &rs[n];
   1218 		fprintf(fout, "#line %d \"%s\"\n", r->actln, srca);
   1219 		actout(r);
   1220 		fputs("\t\tbreak;\n", fout);
   1221 	}
   1222 	for (p=code1; *p; p++)
   1223 		fputs(*p, fout);
   1224 	fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
   1225 	while ((c=fgetc(fin))!=EOF)
   1226 		fputc(c, fout);
   1227 }
   1228 
   1229 void
   1230 init(int ac, char *av[])
   1231 {
   1232 	int c, vf, df;
   1233 	char *pref, buf[100], *opt;
   1234 
   1235 	(void) ac;
   1236 	pref = "y";
   1237 	vf = df = 0;
   1238 	for (av++; av[0] && av[0][0]=='-'; av++)
   1239 		for (opt = &av[0][1]; (c = *opt); opt++)
   1240 			switch (c) {
   1241 			case 'v':
   1242 				vf = 1;
   1243 				break;
   1244 			case 'd':
   1245 				df = 1;
   1246 				break;
   1247 			case 'b':
   1248 				if ((pref = *++av))
   1249 					break;
   1250 			default:
   1251 			usage:
   1252 				fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr);
   1253 				exit(1);
   1254 			}
   1255 
   1256 	if (!(srca = *av))
   1257 		goto usage;
   1258 	fin = fopen(srca, "r");
   1259 	if (strlen(pref) + 10 > sizeof buf)
   1260 		die("-b prefix too long");
   1261 	sprintf(buf, "%s.tab.c", pref);
   1262 	fout = fopen(buf, "w");
   1263 	if (vf) {
   1264 		sprintf(buf, "%s.output", pref);
   1265 		fgrm = fopen(buf, "w");
   1266 	}
   1267 	if (df) {
   1268 		sprintf(buf, "%s.tab.h", pref);
   1269 		fhdr = fopen(buf, "w");
   1270 		if (fhdr) {
   1271 			fprintf(fhdr, "#ifndef Y_TAB_H_\n");
   1272 			fprintf(fhdr, "#define Y_TAB_H_\n");
   1273 		}
   1274 	}
   1275 	if (!fin || !fout || (!fgrm && vf) || (!fhdr && df))
   1276 		die("cannot open work files");
   1277 }
   1278 
   1279 int
   1280 main(int ac, char *av[])
   1281 {
   1282 
   1283 	init(ac, av);
   1284 	getdecls();
   1285 	getgram();
   1286 	ginit();
   1287 	stgen();
   1288 	tblgen();
   1289 	stdump();
   1290 	actgen();
   1291 	tblout();
   1292 	codeout();
   1293 
   1294 	if (srconf)
   1295 		fprintf(stderr, "%d shift/reduce conflicts\n", srconf);
   1296 	if (rrconf)
   1297 		fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf);
   1298 
   1299 	exit(0);
   1300 }
   1301 
   1302 /* Glorious macros.
   1303 	|sed 's|.*|"&\\n",|'
   1304 */
   1305 
   1306 char *retcode = "\t\tyyval = ps[1].val; return 0;";
   1307 
   1308 char *code0[] = {
   1309 "\n",
   1310 "#ifndef YYSTYPE\n",
   1311 "#define YYSTYPE int\n",
   1312 "#endif\n",
   1313 "YYSTYPE yylval;\n",
   1314 "\n",
   1315 "int\n",
   1316 "yyparse()\n",
   1317 "{\n",
   1318 "	enum {\n",
   1319 "		StackSize = 100,\n",
   1320 "		ActSz = sizeof yyact / sizeof yyact[0],\n",
   1321 "	};\n",
   1322 "	struct {\n",
   1323 "		YYSTYPE val;\n",
   1324 "		int state;\n",
   1325 "	} stk[StackSize], *ps;\n",
   1326 "	int r, h, n, s, tk;\n",
   1327 "	YYSTYPE yyval;\n",
   1328 "\n",
   1329 "	ps = stk;\n",
   1330 "	ps->state = s = yyini;\n",
   1331 "	tk = -1;\n",
   1332 "loop:\n",
   1333 "	n = yyadsp[s];\n",
   1334 "	if (tk < 0 && n > -yyntoks)\n",
   1335 "		tk = yytrns[yylex()];\n",
   1336 "	n += tk;\n",
   1337 "	if (n < 0 || n >= ActSz || yychk[n] != tk) {\n",
   1338 "		r = yyadef[s];\n",
   1339 "		if (r < 0)\n",
   1340 "			return -1;\n",
   1341 "		goto reduce;\n",
   1342 "	}\n",
   1343 "	n = yyact[n];\n",
   1344 "	if (n == -1)\n",
   1345 "		return -1;\n",
   1346 "	if (n < 0) {\n",
   1347 "		r = - (n+2);\n",
   1348 "		goto reduce;\n",
   1349 "	}\n",
   1350 "	tk = -1;\n",
   1351 "	yyval = yylval;\n",
   1352 "stack:\n",
   1353 "	ps++;\n",
   1354 "	if (ps-stk >= StackSize)\n",
   1355 "		return -2;\n",
   1356 "	s = n;\n",
   1357 "	ps->state = s;\n",
   1358 "	ps->val = yyval;\n",
   1359 "	goto loop;\n",
   1360 "reduce:\n",
   1361 "	ps -= yyr1[r];\n",
   1362 "	h = yyr2[r];\n",
   1363 "	s = ps->state;\n",
   1364 "	n = yygdsp[h] + s;\n",
   1365 "	if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n",
   1366 "		n = yygdef[h];\n",
   1367 "	else\n",
   1368 "		n = yyact[n];\n",
   1369 "	switch (r) {\n",
   1370 0
   1371 };
   1372 
   1373 char *code1[] = {
   1374 "	}\n",
   1375 "	goto stack;\n",
   1376 "}\n",
   1377 0
   1378 };