qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

load.c (9004B)


      1 #include "all.h"
      2 
      3 #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
      4 
      5 typedef struct Loc Loc;
      6 typedef struct Slice Slice;
      7 typedef struct Insert Insert;
      8 
      9 
     10 struct Loc {
     11 	enum {
     12 		LRoot,   /* right above the original load */
     13 		LLoad,   /* inserting a load is allowed */
     14 		LNoLoad, /* only scalar operations allowed */
     15 	} type;
     16 	uint off;
     17 	Blk *blk;
     18 };
     19 
     20 struct Slice {
     21 	Ref ref;
     22 	short sz;
     23 	short cls; /* load class */
     24 };
     25 
     26 struct Insert {
     27 	uint isphi:1;
     28 	uint num:31;
     29 	uint bid;
     30 	uint off;
     31 	union {
     32 		Ins ins;
     33 		struct {
     34 			Slice m;
     35 			Phi *p;
     36 		} phi;
     37 	} new;
     38 };
     39 
     40 static Fn *curf;
     41 static uint inum;    /* current insertion number */
     42 static Insert *ilog; /* global insertion log */
     43 static uint nlog;    /* number of entries in the log */
     44 
     45 int
     46 loadsz(Ins *l)
     47 {
     48 	switch (l->op) {
     49 	case Oloadsb: case Oloadub: return 1;
     50 	case Oloadsh: case Oloaduh: return 2;
     51 	case Oloadsw: case Oloaduw: return 4;
     52 	case Oload: return KWIDE(l->cls) ? 8 : 4;
     53 	}
     54 	die("unreachable");
     55 }
     56 
     57 int
     58 storesz(Ins *s)
     59 {
     60 	switch (s->op) {
     61 	case Ostoreb: return 1;
     62 	case Ostoreh: return 2;
     63 	case Ostorew: case Ostores: return 4;
     64 	case Ostorel: case Ostored: return 8;
     65 	}
     66 	die("unreachable");
     67 }
     68 
     69 static Ref
     70 iins(int cls, int op, Ref a0, Ref a1, Loc *l)
     71 {
     72 	Insert *ist;
     73 
     74 	vgrow(&ilog, ++nlog);
     75 	ist = &ilog[nlog-1];
     76 	ist->isphi = 0;
     77 	ist->num = inum++;
     78 	ist->bid = l->blk->id;
     79 	ist->off = l->off;
     80 	ist->new.ins = (Ins){op, cls, R, {a0, a1}};
     81 	return ist->new.ins.to = newtmp("ld", cls, curf);
     82 }
     83 
     84 static void
     85 cast(Ref *r, int cls, Loc *l)
     86 {
     87 	int cls0;
     88 
     89 	if (rtype(*r) == RCon)
     90 		return;
     91 	assert(rtype(*r) == RTmp);
     92 	cls0 = curf->tmp[r->val].cls;
     93 	if (cls0 == cls || (cls == Kw && cls0 == Kl))
     94 		return;
     95 	if (KWIDE(cls0) < KWIDE(cls)) {
     96 		if (cls0 == Ks)
     97 			*r = iins(Kw, Ocast, *r, R, l);
     98 		*r = iins(Kl, Oextuw, *r, R, l);
     99 		if (cls == Kd)
    100 			*r = iins(Kd, Ocast, *r, R, l);
    101 	} else {
    102 		if (cls0 == Kd && cls != Kl)
    103 			*r = iins(Kl, Ocast, *r, R, l);
    104 		if (cls0 != Kd || cls != Kw)
    105 			*r = iins(cls, Ocast, *r, R, l);
    106 	}
    107 }
    108 
    109 static inline void
    110 mask(int cls, Ref *r, bits msk, Loc *l)
    111 {
    112 	cast(r, cls, l);
    113 	*r = iins(cls, Oand, *r, getcon(msk, curf), l);
    114 }
    115 
    116 static Ref
    117 load(Slice sl, bits msk, Loc *l)
    118 {
    119 	Alias *a;
    120 	Ref r, r1;
    121 	int ld, cls, all, c;
    122 
    123 	ld = (int[]){
    124 		[1] = Oloadub,
    125 		[2] = Oloaduh,
    126 		[4] = Oloaduw,
    127 		[8] = Oload
    128 	}[sl.sz];
    129 	all = msk == MASK(sl.sz);
    130 	if (all)
    131 		cls = sl.cls;
    132 	else
    133 		cls = sl.sz > 4 ? Kl : Kw;
    134 	r = sl.ref;
    135 	/* sl.ref might not be live here,
    136 	 * but its alias base ref will be
    137 	 * (see killsl() below) */
    138 	if (rtype(r) == RTmp) {
    139 		a = &curf->tmp[r.val].alias;
    140 		switch (a->type) {
    141 		default:
    142 			die("unreachable");
    143 		case ALoc:
    144 		case AEsc:
    145 		case AUnk:
    146 			r = a->base;
    147 			if (!a->offset)
    148 				break;
    149 			r1 = getcon(a->offset, curf);
    150 			r = iins(Kl, Oadd, r, r1, l);
    151 			break;
    152 		case ACon:
    153 		case ASym:
    154 			c = curf->ncon++;
    155 			vgrow(&curf->con, curf->ncon);
    156 			curf->con[c].type = CAddr;
    157 			curf->con[c].label = a->label;
    158 			curf->con[c].bits.i = a->offset;
    159 			curf->con[c].local = 0;
    160 			r = CON(c);
    161 			break;
    162 		}
    163 	}
    164 	r = iins(cls, ld, r, R, l);
    165 	if (!all)
    166 		mask(cls, &r, msk, l);
    167 	return r;
    168 }
    169 
    170 static int
    171 killsl(Ref r, Slice sl)
    172 {
    173 	Alias *a;
    174 
    175 	if (rtype(sl.ref) != RTmp)
    176 		return 0;
    177 	a = &curf->tmp[sl.ref.val].alias;
    178 	switch (a->type) {
    179 	default:   die("unreachable");
    180 	case ALoc:
    181 	case AEsc:
    182 	case AUnk: return req(a->base, r);
    183 	case ACon:
    184 	case ASym: return 0;
    185 	}
    186 }
    187 
    188 /* returns a ref containing the contents of the slice
    189  * passed as argument, all the bits set to 0 in the
    190  * mask argument are zeroed in the result;
    191  * the returned ref has an integer class when the
    192  * mask does not cover all the bits of the slice,
    193  * otherwise, it has class sl.cls
    194  * the procedure returns R when it fails */
    195 static Ref
    196 def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
    197 {
    198 	Blk *bp;
    199 	bits msk1, msks;
    200 	int off, cls, cls1, op, sz, ld;
    201 	uint np, oldl, oldt;
    202 	Ref r, r1;
    203 	Phi *p;
    204 	Insert *ist;
    205 	Loc l;
    206 
    207 	/* invariants:
    208 	 * -1- b dominates il->blk; so we can use
    209 	 *     temporaries of b in il->blk
    210 	 * -2- if il->type != LNoLoad, then il->blk
    211 	 *     postdominates the original load; so it
    212 	 *     is safe to load in il->blk
    213 	 * -3- if il->type != LNoLoad, then b
    214 	 *     postdominates il->blk (and by 2, the
    215 	 *     original load)
    216 	 */
    217 	assert(dom(b, il->blk));
    218 	oldl = nlog;
    219 	oldt = curf->ntmp;
    220 	if (0) {
    221 	Load:
    222 		curf->ntmp = oldt;
    223 		nlog = oldl;
    224 		if (il->type != LLoad)
    225 			return R;
    226 		return load(sl, msk, il);
    227 	}
    228 
    229 	if (!i)
    230 		i = &b->ins[b->nins];
    231 	cls = sl.sz > 4 ? Kl : Kw;
    232 	msks = MASK(sl.sz);
    233 
    234 	while (i > b->ins) {
    235 		--i;
    236 		if (killsl(i->to, sl)
    237 		|| (i->op == Ocall && escapes(sl.ref, curf)))
    238 			goto Load;
    239 		ld = isload(i->op);
    240 		if (ld) {
    241 			sz = loadsz(i);
    242 			r1 = i->arg[0];
    243 			r = i->to;
    244 		} else if (isstore(i->op)) {
    245 			sz = storesz(i);
    246 			r1 = i->arg[1];
    247 			r = i->arg[0];
    248 		} else
    249 			continue;
    250 		switch (alias(sl.ref, sl.sz, r1, sz, &off, curf)) {
    251 		case MustAlias:
    252 			if (off < 0) {
    253 				off = -off;
    254 				msk1 = (MASK(sz) << 8*off) & msks;
    255 				op = Oshl;
    256 			} else {
    257 				msk1 = (MASK(sz) >> 8*off) & msks;
    258 				op = Oshr;
    259 			}
    260 			if ((msk1 & msk) == 0)
    261 				break;
    262 			if (off) {
    263 				cls1 = cls;
    264 				if (op == Oshr && off + sl.sz > 4)
    265 					cls1 = Kl;
    266 				cast(&r, cls1, il);
    267 				r1 = getcon(8*off, curf);
    268 				r = iins(cls1, op, r, r1, il);
    269 			}
    270 			if ((msk1 & msk) != msk1 || off + sz < sl.sz)
    271 				mask(cls, &r, msk1 & msk, il);
    272 			if ((msk & ~msk1) != 0) {
    273 				r1 = def(sl, msk & ~msk1, b, i, il);
    274 				if (req(r1, R))
    275 					goto Load;
    276 				r = iins(cls, Oor, r, r1, il);
    277 			}
    278 			if (msk == msks)
    279 				cast(&r, sl.cls, il);
    280 			return r;
    281 		case MayAlias:
    282 			if (ld)
    283 				break;
    284 			else
    285 				goto Load;
    286 		case NoAlias:
    287 			break;
    288 		default:
    289 			die("unreachable");
    290 		}
    291 	}
    292 
    293 	for (ist=ilog; ist<&ilog[nlog]; ++ist)
    294 		if (ist->isphi && ist->bid == b->id)
    295 		if (req(ist->new.phi.m.ref, sl.ref))
    296 		if (ist->new.phi.m.sz == sl.sz) {
    297 			r = ist->new.phi.p->to;
    298 			if (msk != msks)
    299 				mask(cls, &r, msk, il);
    300 			else
    301 				cast(&r, sl.cls, il);
    302 			return r;
    303 		}
    304 
    305 	for (p=b->phi; p; p=p->link)
    306 		if (killsl(p->to, sl))
    307 			/* scanning predecessors in that
    308 			 * case would be unsafe */
    309 			goto Load;
    310 
    311 	if (b->npred == 0)
    312 		goto Load;
    313 	if (b->npred == 1) {
    314 		bp = b->pred[0];
    315 		assert(bp->loop >= il->blk->loop);
    316 		l = *il;
    317 		if (bp->s2)
    318 			l.type = LNoLoad;
    319 		r1 = def(sl, msk, bp, 0, &l);
    320 		if (req(r1, R))
    321 			goto Load;
    322 		return r1;
    323 	}
    324 
    325 	r = newtmp("ld", sl.cls, curf);
    326 	p = alloc(sizeof *p);
    327 	vgrow(&ilog, ++nlog);
    328 	ist = &ilog[nlog-1];
    329 	ist->isphi = 1;
    330 	ist->bid = b->id;
    331 	ist->new.phi.m = sl;
    332 	ist->new.phi.p = p;
    333 	p->to = r;
    334 	p->cls = sl.cls;
    335 	p->narg = b->npred;
    336 	p->arg = vnew(p->narg, sizeof p->arg[0], Pfn);
    337 	p->blk = vnew(p->narg, sizeof p->blk[0], Pfn);
    338 	for (np=0; np<b->npred; ++np) {
    339 		bp = b->pred[np];
    340 		if (!bp->s2
    341 		&& il->type != LNoLoad
    342 		&& bp->loop < il->blk->loop)
    343 			l.type = LLoad;
    344 		else
    345 			l.type = LNoLoad;
    346 		l.blk = bp;
    347 		l.off = bp->nins;
    348 		r1 = def(sl, msks, bp, 0, &l);
    349 		if (req(r1, R))
    350 			goto Load;
    351 		p->arg[np] = r1;
    352 		p->blk[np] = bp;
    353 	}
    354 	if (msk != msks)
    355 		mask(cls, &r, msk, il);
    356 	return r;
    357 }
    358 
    359 static int
    360 icmp(const void *pa, const void *pb)
    361 {
    362 	Insert *a, *b;
    363 	int c;
    364 
    365 	a = (Insert *)pa;
    366 	b = (Insert *)pb;
    367 	if ((c = a->bid - b->bid))
    368 		return c;
    369 	if (a->isphi && b->isphi)
    370 		return 0;
    371 	if (a->isphi)
    372 		return -1;
    373 	if (b->isphi)
    374 		return +1;
    375 	if ((c = a->off - b->off))
    376 		return c;
    377 	return a->num - b->num;
    378 }
    379 
    380 /* require rpo ssa alias */
    381 void
    382 loadopt(Fn *fn)
    383 {
    384 	Ins *i, *ib;
    385 	Blk *b;
    386 	int sz;
    387 	uint n, ni, ext, nt;
    388 	Insert *ist;
    389 	Slice sl;
    390 	Loc l;
    391 
    392 	curf = fn;
    393 	ilog = vnew(0, sizeof ilog[0], Pheap);
    394 	nlog = 0;
    395 	inum = 0;
    396 	for (b=fn->start; b; b=b->link)
    397 		for (i=b->ins; i<&b->ins[b->nins]; ++i) {
    398 			if (!isload(i->op))
    399 				continue;
    400 			sz = loadsz(i);
    401 			sl = (Slice){i->arg[0], sz, i->cls};
    402 			l = (Loc){LRoot, i-b->ins, b};
    403 			i->arg[1] = def(sl, MASK(sz), b, i, &l);
    404 		}
    405 	qsort(ilog, nlog, sizeof ilog[0], icmp);
    406 	vgrow(&ilog, nlog+1);
    407 	ilog[nlog].bid = fn->nblk; /* add a sentinel */
    408 	ib = vnew(0, sizeof(Ins), Pheap);
    409 	for (ist=ilog, n=0; n<fn->nblk; ++n) {
    410 		b = fn->rpo[n];
    411 		for (; ist->bid == n && ist->isphi; ++ist) {
    412 			ist->new.phi.p->link = b->phi;
    413 			b->phi = ist->new.phi.p;
    414 		}
    415 		ni = 0;
    416 		nt = 0;
    417 		for (;;) {
    418 			if (ist->bid == n && ist->off == ni)
    419 				i = &ist++->new.ins;
    420 			else {
    421 				if (ni == b->nins)
    422 					break;
    423 				i = &b->ins[ni++];
    424 				if (isload(i->op)
    425 				&& !req(i->arg[1], R)) {
    426 					ext = Oextsb + i->op - Oloadsb;
    427 					switch (i->op) {
    428 					default:
    429 						die("unreachable");
    430 					case Oloadsb:
    431 					case Oloadub:
    432 					case Oloadsh:
    433 					case Oloaduh:
    434 						i->op = ext;
    435 						break;
    436 					case Oloadsw:
    437 					case Oloaduw:
    438 						if (i->cls == Kl) {
    439 							i->op = ext;
    440 							break;
    441 						}
    442 						/* fall through */
    443 					case Oload:
    444 						i->op = Ocopy;
    445 						break;
    446 					}
    447 					i->arg[0] = i->arg[1];
    448 					i->arg[1] = R;
    449 				}
    450 			}
    451 			vgrow(&ib, ++nt);
    452 			ib[nt-1] = *i;
    453 		}
    454 		b->nins = nt;
    455 		idup(&b->ins, ib, nt);
    456 	}
    457 	vfree(ib);
    458 	vfree(ilog);
    459 	if (debug['M']) {
    460 		fprintf(stderr, "\n> After load elimination:\n");
    461 		printfn(fn, stderr);
    462 	}
    463 }