qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

load.c (9596B)


      1 #include "all.h"
      2 
      3 #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */
      4 
      5 typedef struct Loc Loc;
      6 typedef struct Slice Slice;
      7 typedef struct Insert Insert;
      8 
      9 struct Loc {
     10 	enum {
     11 		LRoot,   /* right above the original load */
     12 		LLoad,   /* inserting a load is allowed */
     13 		LNoLoad, /* only scalar operations allowed */
     14 	} type;
     15 	uint off;
     16 	Blk *blk;
     17 };
     18 
     19 struct Slice {
     20 	Ref ref;
     21 	int off;
     22 	short sz;
     23 	short cls; /* load class */
     24 };
     25 
     26 struct Insert {
     27 	uint isphi:1;
     28 	uint num:31;
     29 	uint bid;
     30 	uint off;
     31 	union {
     32 		Ins ins;
     33 		struct {
     34 			Slice m;
     35 			Phi *p;
     36 		} phi;
     37 	} new;
     38 };
     39 
     40 static Fn *curf;
     41 static uint inum;    /* current insertion number */
     42 static Insert *ilog; /* global insertion log */
     43 static uint nlog;    /* number of entries in the log */
     44 
     45 int
     46 loadsz(Ins *l)
     47 {
     48 	switch (l->op) {
     49 	case Oloadsb: case Oloadub: return 1;
     50 	case Oloadsh: case Oloaduh: return 2;
     51 	case Oloadsw: case Oloaduw: return 4;
     52 	case Oload: return KWIDE(l->cls) ? 8 : 4;
     53 	}
     54 	die("unreachable");
     55 }
     56 
     57 int
     58 storesz(Ins *s)
     59 {
     60 	switch (s->op) {
     61 	case Ostoreb: return 1;
     62 	case Ostoreh: return 2;
     63 	case Ostorew: case Ostores: return 4;
     64 	case Ostorel: case Ostored: return 8;
     65 	}
     66 	die("unreachable");
     67 }
     68 
     69 static Ref
     70 iins(int cls, int op, Ref a0, Ref a1, Loc *l)
     71 {
     72 	Insert *ist;
     73 
     74 	vgrow(&ilog, ++nlog);
     75 	ist = &ilog[nlog-1];
     76 	ist->isphi = 0;
     77 	ist->num = inum++;
     78 	ist->bid = l->blk->id;
     79 	ist->off = l->off;
     80 	ist->new.ins = (Ins){op, cls, R, {a0, a1}};
     81 	return ist->new.ins.to = newtmp("ld", cls, curf);
     82 }
     83 
     84 static void
     85 cast(Ref *r, int cls, Loc *l)
     86 {
     87 	int cls0;
     88 
     89 	if (rtype(*r) == RCon)
     90 		return;
     91 	assert(rtype(*r) == RTmp);
     92 	cls0 = curf->tmp[r->val].cls;
     93 	if (cls0 == cls || (cls == Kw && cls0 == Kl))
     94 		return;
     95 	if (KWIDE(cls0) < KWIDE(cls)) {
     96 		if (cls0 == Ks)
     97 			*r = iins(Kw, Ocast, *r, R, l);
     98 		*r = iins(Kl, Oextuw, *r, R, l);
     99 		if (cls == Kd)
    100 			*r = iins(Kd, Ocast, *r, R, l);
    101 	} else {
    102 		if (cls0 == Kd && cls != Kl)
    103 			*r = iins(Kl, Ocast, *r, R, l);
    104 		if (cls0 != Kd || cls != Kw)
    105 			*r = iins(cls, Ocast, *r, R, l);
    106 	}
    107 }
    108 
    109 static inline void
    110 mask(int cls, Ref *r, bits msk, Loc *l)
    111 {
    112 	cast(r, cls, l);
    113 	*r = iins(cls, Oand, *r, getcon(msk, curf), l);
    114 }
    115 
    116 static Ref
    117 load(Slice sl, bits msk, Loc *l)
    118 {
    119 	Alias *a;
    120 	Ref r, r1;
    121 	int ld, cls, all;
    122 	Con c;
    123 
    124 	ld = (int[]){
    125 		[1] = Oloadub,
    126 		[2] = Oloaduh,
    127 		[4] = Oloaduw,
    128 		[8] = Oload
    129 	}[sl.sz];
    130 	all = msk == MASK(sl.sz);
    131 	if (all)
    132 		cls = sl.cls;
    133 	else
    134 		cls = sl.sz > 4 ? Kl : Kw;
    135 	r = sl.ref;
    136 	/* sl.ref might not be live here,
    137 	 * but its alias base ref will be
    138 	 * (see killsl() below) */
    139 	if (rtype(r) == RTmp) {
    140 		a = &curf->tmp[r.val].alias;
    141 		switch (a->type) {
    142 		default:
    143 			die("unreachable");
    144 		case ALoc:
    145 		case AEsc:
    146 		case AUnk:
    147 			r = TMP(a->base);
    148 			if (!a->offset)
    149 				break;
    150 			r1 = getcon(a->offset, curf);
    151 			r = iins(Kl, Oadd, r, r1, l);
    152 			break;
    153 		case ACon:
    154 		case ASym:
    155 			memset(&c, 0, sizeof c);
    156 			c.type = CAddr;
    157 			c.sym = a->u.sym;
    158 			c.bits.i = a->offset;
    159 			r = newcon(&c, curf);
    160 			break;
    161 		}
    162 	}
    163 	r = iins(cls, ld, r, R, l);
    164 	if (!all)
    165 		mask(cls, &r, msk, l);
    166 	return r;
    167 }
    168 
    169 static int
    170 killsl(Ref r, Slice sl)
    171 {
    172 	Alias *a;
    173 
    174 	if (rtype(sl.ref) != RTmp)
    175 		return 0;
    176 	a = &curf->tmp[sl.ref.val].alias;
    177 	switch (a->type) {
    178 	default:   die("unreachable");
    179 	case ALoc:
    180 	case AEsc:
    181 	case AUnk: return req(TMP(a->base), r);
    182 	case ACon:
    183 	case ASym: return 0;
    184 	}
    185 }
    186 
    187 /* returns a ref containing the contents of the slice
    188  * passed as argument, all the bits set to 0 in the
    189  * mask argument are zeroed in the result;
    190  * the returned ref has an integer class when the
    191  * mask does not cover all the bits of the slice,
    192  * otherwise, it has class sl.cls
    193  * the procedure returns R when it fails */
    194 static Ref
    195 def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
    196 {
    197 	Slice sl1;
    198 	Blk *bp;
    199 	bits msk1, msks;
    200 	int off, cls, cls1, op, sz, ld;
    201 	uint np, oldl, oldt;
    202 	Ref r, r1;
    203 	Phi *p;
    204 	Insert *ist;
    205 	Loc l;
    206 
    207 	/* invariants:
    208 	 * -1- b dominates il->blk; so we can use
    209 	 *     temporaries of b in il->blk
    210 	 * -2- if il->type != LNoLoad, then il->blk
    211 	 *     postdominates the original load; so it
    212 	 *     is safe to load in il->blk
    213 	 * -3- if il->type != LNoLoad, then b
    214 	 *     postdominates il->blk (and by 2, the
    215 	 *     original load)
    216 	 */
    217 	assert(dom(b, il->blk));
    218 	oldl = nlog;
    219 	oldt = curf->ntmp;
    220 	if (0) {
    221 	Load:
    222 		curf->ntmp = oldt;
    223 		nlog = oldl;
    224 		if (il->type != LLoad)
    225 			return R;
    226 		return load(sl, msk, il);
    227 	}
    228 
    229 	if (!i)
    230 		i = &b->ins[b->nins];
    231 	cls = sl.sz > 4 ? Kl : Kw;
    232 	msks = MASK(sl.sz);
    233 
    234 	while (i > b->ins) {
    235 		--i;
    236 		if (killsl(i->to, sl)
    237 		|| (i->op == Ocall && escapes(sl.ref, curf)))
    238 			goto Load;
    239 		ld = isload(i->op);
    240 		if (ld) {
    241 			sz = loadsz(i);
    242 			r1 = i->arg[0];
    243 			r = i->to;
    244 		} else if (isstore(i->op)) {
    245 			sz = storesz(i);
    246 			r1 = i->arg[1];
    247 			r = i->arg[0];
    248 		} else if (i->op == Oblit1) {
    249 			assert(rtype(i->arg[0]) == RInt);
    250 			sz = abs(rsval(i->arg[0]));
    251 			assert(i > b->ins);
    252 			--i;
    253 			assert(i->op == Oblit0);
    254 			r1 = i->arg[1];
    255 		} else
    256 			continue;
    257 		switch (alias(sl.ref, sl.off, sl.sz, r1, sz, &off, curf)) {
    258 		case MustAlias:
    259 			if (i->op == Oblit0) {
    260 				sl1 = sl;
    261 				sl1.ref = i->arg[0];
    262 				if (off >= 0) {
    263 					assert(off < sz);
    264 					sl1.off = off;
    265 					sz -= off;
    266 					off = 0;
    267 				} else {
    268 					sl1.off = 0;
    269 					sl1.sz += off;
    270 				}
    271 				if (sz > sl1.sz)
    272 					sz = sl1.sz;
    273 				assert(sz <= 8);
    274 				sl1.sz = sz;
    275 			}
    276 			if (off < 0) {
    277 				off = -off;
    278 				msk1 = (MASK(sz) << 8*off) & msks;
    279 				op = Oshl;
    280 			} else {
    281 				msk1 = (MASK(sz) >> 8*off) & msks;
    282 				op = Oshr;
    283 			}
    284 			if ((msk1 & msk) == 0)
    285 				continue;
    286 			if (i->op == Oblit0) {
    287 				r = def(sl1, MASK(sz), b, i, il);
    288 				if (req(r, R))
    289 					goto Load;
    290 			}
    291 			if (off) {
    292 				cls1 = cls;
    293 				if (op == Oshr && off + sl.sz > 4)
    294 					cls1 = Kl;
    295 				cast(&r, cls1, il);
    296 				r1 = getcon(8*off, curf);
    297 				r = iins(cls1, op, r, r1, il);
    298 			}
    299 			if ((msk1 & msk) != msk1 || off + sz < sl.sz)
    300 				mask(cls, &r, msk1 & msk, il);
    301 			if ((msk & ~msk1) != 0) {
    302 				r1 = def(sl, msk & ~msk1, b, i, il);
    303 				if (req(r1, R))
    304 					goto Load;
    305 				r = iins(cls, Oor, r, r1, il);
    306 			}
    307 			if (msk == msks)
    308 				cast(&r, sl.cls, il);
    309 			return r;
    310 		case MayAlias:
    311 			if (ld)
    312 				continue;
    313 			else
    314 				goto Load;
    315 		case NoAlias:
    316 			continue;
    317 		default:
    318 			die("unreachable");
    319 		}
    320 	}
    321 
    322 	for (ist=ilog; ist<&ilog[nlog]; ++ist)
    323 		if (ist->isphi && ist->bid == b->id)
    324 		if (req(ist->new.phi.m.ref, sl.ref))
    325 		if (ist->new.phi.m.off == sl.off)
    326 		if (ist->new.phi.m.sz == sl.sz) {
    327 			r = ist->new.phi.p->to;
    328 			if (msk != msks)
    329 				mask(cls, &r, msk, il);
    330 			else
    331 				cast(&r, sl.cls, il);
    332 			return r;
    333 		}
    334 
    335 	for (p=b->phi; p; p=p->link)
    336 		if (killsl(p->to, sl))
    337 			/* scanning predecessors in that
    338 			 * case would be unsafe */
    339 			goto Load;
    340 
    341 	if (b->npred == 0)
    342 		goto Load;
    343 	if (b->npred == 1) {
    344 		bp = b->pred[0];
    345 		assert(bp->loop >= il->blk->loop);
    346 		l = *il;
    347 		if (bp->s2)
    348 			l.type = LNoLoad;
    349 		r1 = def(sl, msk, bp, 0, &l);
    350 		if (req(r1, R))
    351 			goto Load;
    352 		return r1;
    353 	}
    354 
    355 	r = newtmp("ld", sl.cls, curf);
    356 	p = alloc(sizeof *p);
    357 	vgrow(&ilog, ++nlog);
    358 	ist = &ilog[nlog-1];
    359 	ist->isphi = 1;
    360 	ist->bid = b->id;
    361 	ist->new.phi.m = sl;
    362 	ist->new.phi.p = p;
    363 	p->to = r;
    364 	p->cls = sl.cls;
    365 	p->narg = b->npred;
    366 	p->arg = vnew(p->narg, sizeof p->arg[0], PFn);
    367 	p->blk = vnew(p->narg, sizeof p->blk[0], PFn);
    368 	for (np=0; np<b->npred; ++np) {
    369 		bp = b->pred[np];
    370 		if (!bp->s2
    371 		&& il->type != LNoLoad
    372 		&& bp->loop < il->blk->loop)
    373 			l.type = LLoad;
    374 		else
    375 			l.type = LNoLoad;
    376 		l.blk = bp;
    377 		l.off = bp->nins;
    378 		r1 = def(sl, msks, bp, 0, &l);
    379 		if (req(r1, R))
    380 			goto Load;
    381 		p->arg[np] = r1;
    382 		p->blk[np] = bp;
    383 	}
    384 	if (msk != msks)
    385 		mask(cls, &r, msk, il);
    386 	return r;
    387 }
    388 
    389 static int
    390 icmp(const void *pa, const void *pb)
    391 {
    392 	Insert *a, *b;
    393 	int c;
    394 
    395 	a = (Insert *)pa;
    396 	b = (Insert *)pb;
    397 	if ((c = a->bid - b->bid))
    398 		return c;
    399 	if (a->isphi && b->isphi)
    400 		return 0;
    401 	if (a->isphi)
    402 		return -1;
    403 	if (b->isphi)
    404 		return +1;
    405 	if ((c = a->off - b->off))
    406 		return c;
    407 	return a->num - b->num;
    408 }
    409 
    410 /* require rpo ssa alias */
    411 void
    412 loadopt(Fn *fn)
    413 {
    414 	Ins *i, *ib;
    415 	Blk *b;
    416 	int sz;
    417 	uint n, ni, ext, nt;
    418 	Insert *ist;
    419 	Slice sl;
    420 	Loc l;
    421 
    422 	curf = fn;
    423 	ilog = vnew(0, sizeof ilog[0], PHeap);
    424 	nlog = 0;
    425 	inum = 0;
    426 	for (b=fn->start; b; b=b->link)
    427 		for (i=b->ins; i<&b->ins[b->nins]; ++i) {
    428 			if (!isload(i->op))
    429 				continue;
    430 			sz = loadsz(i);
    431 			sl = (Slice){i->arg[0], 0, sz, i->cls};
    432 			l = (Loc){LRoot, i-b->ins, b};
    433 			i->arg[1] = def(sl, MASK(sz), b, i, &l);
    434 		}
    435 	qsort(ilog, nlog, sizeof ilog[0], icmp);
    436 	vgrow(&ilog, nlog+1);
    437 	ilog[nlog].bid = fn->nblk; /* add a sentinel */
    438 	ib = vnew(0, sizeof(Ins), PHeap);
    439 	for (ist=ilog, n=0; n<fn->nblk; ++n) {
    440 		b = fn->rpo[n];
    441 		for (; ist->bid == n && ist->isphi; ++ist) {
    442 			ist->new.phi.p->link = b->phi;
    443 			b->phi = ist->new.phi.p;
    444 		}
    445 		ni = 0;
    446 		nt = 0;
    447 		for (;;) {
    448 			if (ist->bid == n && ist->off == ni)
    449 				i = &ist++->new.ins;
    450 			else {
    451 				if (ni == b->nins)
    452 					break;
    453 				i = &b->ins[ni++];
    454 				if (isload(i->op)
    455 				&& !req(i->arg[1], R)) {
    456 					ext = Oextsb + i->op - Oloadsb;
    457 					switch (i->op) {
    458 					default:
    459 						die("unreachable");
    460 					case Oloadsb:
    461 					case Oloadub:
    462 					case Oloadsh:
    463 					case Oloaduh:
    464 						i->op = ext;
    465 						break;
    466 					case Oloadsw:
    467 					case Oloaduw:
    468 						if (i->cls == Kl) {
    469 							i->op = ext;
    470 							break;
    471 						}
    472 						/* fall through */
    473 					case Oload:
    474 						i->op = Ocopy;
    475 						break;
    476 					}
    477 					i->arg[0] = i->arg[1];
    478 					i->arg[1] = R;
    479 				}
    480 			}
    481 			vgrow(&ib, ++nt);
    482 			ib[nt-1] = *i;
    483 		}
    484 		b->nins = nt;
    485 		idup(&b->ins, ib, nt);
    486 	}
    487 	vfree(ib);
    488 	vfree(ilog);
    489 	if (debug['M']) {
    490 		fprintf(stderr, "\n> After load elimination:\n");
    491 		printfn(fn, stderr);
    492 	}
    493 }