load.c (9596B)
1 #include "all.h" 2 3 #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */ 4 5 typedef struct Loc Loc; 6 typedef struct Slice Slice; 7 typedef struct Insert Insert; 8 9 struct Loc { 10 enum { 11 LRoot, /* right above the original load */ 12 LLoad, /* inserting a load is allowed */ 13 LNoLoad, /* only scalar operations allowed */ 14 } type; 15 uint off; 16 Blk *blk; 17 }; 18 19 struct Slice { 20 Ref ref; 21 int off; 22 short sz; 23 short cls; /* load class */ 24 }; 25 26 struct Insert { 27 uint isphi:1; 28 uint num:31; 29 uint bid; 30 uint off; 31 union { 32 Ins ins; 33 struct { 34 Slice m; 35 Phi *p; 36 } phi; 37 } new; 38 }; 39 40 static Fn *curf; 41 static uint inum; /* current insertion number */ 42 static Insert *ilog; /* global insertion log */ 43 static uint nlog; /* number of entries in the log */ 44 45 int 46 loadsz(Ins *l) 47 { 48 switch (l->op) { 49 case Oloadsb: case Oloadub: return 1; 50 case Oloadsh: case Oloaduh: return 2; 51 case Oloadsw: case Oloaduw: return 4; 52 case Oload: return KWIDE(l->cls) ? 8 : 4; 53 } 54 die("unreachable"); 55 } 56 57 int 58 storesz(Ins *s) 59 { 60 switch (s->op) { 61 case Ostoreb: return 1; 62 case Ostoreh: return 2; 63 case Ostorew: case Ostores: return 4; 64 case Ostorel: case Ostored: return 8; 65 } 66 die("unreachable"); 67 } 68 69 static Ref 70 iins(int cls, int op, Ref a0, Ref a1, Loc *l) 71 { 72 Insert *ist; 73 74 vgrow(&ilog, ++nlog); 75 ist = &ilog[nlog-1]; 76 ist->isphi = 0; 77 ist->num = inum++; 78 ist->bid = l->blk->id; 79 ist->off = l->off; 80 ist->new.ins = (Ins){op, cls, R, {a0, a1}}; 81 return ist->new.ins.to = newtmp("ld", cls, curf); 82 } 83 84 static void 85 cast(Ref *r, int cls, Loc *l) 86 { 87 int cls0; 88 89 if (rtype(*r) == RCon) 90 return; 91 assert(rtype(*r) == RTmp); 92 cls0 = curf->tmp[r->val].cls; 93 if (cls0 == cls || (cls == Kw && cls0 == Kl)) 94 return; 95 if (KWIDE(cls0) < KWIDE(cls)) { 96 if (cls0 == Ks) 97 *r = iins(Kw, Ocast, *r, R, l); 98 *r = iins(Kl, Oextuw, *r, R, l); 99 if (cls == Kd) 100 *r = iins(Kd, Ocast, *r, R, l); 101 } else { 102 if (cls0 == Kd && cls != Kl) 103 *r = iins(Kl, Ocast, *r, R, l); 104 if (cls0 != Kd || cls != Kw) 105 *r = iins(cls, Ocast, *r, R, l); 106 } 107 } 108 109 static inline void 110 mask(int cls, Ref *r, bits msk, Loc *l) 111 { 112 cast(r, cls, l); 113 *r = iins(cls, Oand, *r, getcon(msk, curf), l); 114 } 115 116 static Ref 117 load(Slice sl, bits msk, Loc *l) 118 { 119 Alias *a; 120 Ref r, r1; 121 int ld, cls, all; 122 Con c; 123 124 ld = (int[]){ 125 [1] = Oloadub, 126 [2] = Oloaduh, 127 [4] = Oloaduw, 128 [8] = Oload 129 }[sl.sz]; 130 all = msk == MASK(sl.sz); 131 if (all) 132 cls = sl.cls; 133 else 134 cls = sl.sz > 4 ? Kl : Kw; 135 r = sl.ref; 136 /* sl.ref might not be live here, 137 * but its alias base ref will be 138 * (see killsl() below) */ 139 if (rtype(r) == RTmp) { 140 a = &curf->tmp[r.val].alias; 141 switch (a->type) { 142 default: 143 die("unreachable"); 144 case ALoc: 145 case AEsc: 146 case AUnk: 147 r = TMP(a->base); 148 if (!a->offset) 149 break; 150 r1 = getcon(a->offset, curf); 151 r = iins(Kl, Oadd, r, r1, l); 152 break; 153 case ACon: 154 case ASym: 155 memset(&c, 0, sizeof c); 156 c.type = CAddr; 157 c.sym = a->u.sym; 158 c.bits.i = a->offset; 159 r = newcon(&c, curf); 160 break; 161 } 162 } 163 r = iins(cls, ld, r, R, l); 164 if (!all) 165 mask(cls, &r, msk, l); 166 return r; 167 } 168 169 static int 170 killsl(Ref r, Slice sl) 171 { 172 Alias *a; 173 174 if (rtype(sl.ref) != RTmp) 175 return 0; 176 a = &curf->tmp[sl.ref.val].alias; 177 switch (a->type) { 178 default: die("unreachable"); 179 case ALoc: 180 case AEsc: 181 case AUnk: return req(TMP(a->base), r); 182 case ACon: 183 case ASym: return 0; 184 } 185 } 186 187 /* returns a ref containing the contents of the slice 188 * passed as argument, all the bits set to 0 in the 189 * mask argument are zeroed in the result; 190 * the returned ref has an integer class when the 191 * mask does not cover all the bits of the slice, 192 * otherwise, it has class sl.cls 193 * the procedure returns R when it fails */ 194 static Ref 195 def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) 196 { 197 Slice sl1; 198 Blk *bp; 199 bits msk1, msks; 200 int off, cls, cls1, op, sz, ld; 201 uint np, oldl, oldt; 202 Ref r, r1; 203 Phi *p; 204 Insert *ist; 205 Loc l; 206 207 /* invariants: 208 * -1- b dominates il->blk; so we can use 209 * temporaries of b in il->blk 210 * -2- if il->type != LNoLoad, then il->blk 211 * postdominates the original load; so it 212 * is safe to load in il->blk 213 * -3- if il->type != LNoLoad, then b 214 * postdominates il->blk (and by 2, the 215 * original load) 216 */ 217 assert(dom(b, il->blk)); 218 oldl = nlog; 219 oldt = curf->ntmp; 220 if (0) { 221 Load: 222 curf->ntmp = oldt; 223 nlog = oldl; 224 if (il->type != LLoad) 225 return R; 226 return load(sl, msk, il); 227 } 228 229 if (!i) 230 i = &b->ins[b->nins]; 231 cls = sl.sz > 4 ? Kl : Kw; 232 msks = MASK(sl.sz); 233 234 while (i > b->ins) { 235 --i; 236 if (killsl(i->to, sl) 237 || (i->op == Ocall && escapes(sl.ref, curf))) 238 goto Load; 239 ld = isload(i->op); 240 if (ld) { 241 sz = loadsz(i); 242 r1 = i->arg[0]; 243 r = i->to; 244 } else if (isstore(i->op)) { 245 sz = storesz(i); 246 r1 = i->arg[1]; 247 r = i->arg[0]; 248 } else if (i->op == Oblit1) { 249 assert(rtype(i->arg[0]) == RInt); 250 sz = abs(rsval(i->arg[0])); 251 assert(i > b->ins); 252 --i; 253 assert(i->op == Oblit0); 254 r1 = i->arg[1]; 255 } else 256 continue; 257 switch (alias(sl.ref, sl.off, sl.sz, r1, sz, &off, curf)) { 258 case MustAlias: 259 if (i->op == Oblit0) { 260 sl1 = sl; 261 sl1.ref = i->arg[0]; 262 if (off >= 0) { 263 assert(off < sz); 264 sl1.off = off; 265 sz -= off; 266 off = 0; 267 } else { 268 sl1.off = 0; 269 sl1.sz += off; 270 } 271 if (sz > sl1.sz) 272 sz = sl1.sz; 273 assert(sz <= 8); 274 sl1.sz = sz; 275 } 276 if (off < 0) { 277 off = -off; 278 msk1 = (MASK(sz) << 8*off) & msks; 279 op = Oshl; 280 } else { 281 msk1 = (MASK(sz) >> 8*off) & msks; 282 op = Oshr; 283 } 284 if ((msk1 & msk) == 0) 285 continue; 286 if (i->op == Oblit0) { 287 r = def(sl1, MASK(sz), b, i, il); 288 if (req(r, R)) 289 goto Load; 290 } 291 if (off) { 292 cls1 = cls; 293 if (op == Oshr && off + sl.sz > 4) 294 cls1 = Kl; 295 cast(&r, cls1, il); 296 r1 = getcon(8*off, curf); 297 r = iins(cls1, op, r, r1, il); 298 } 299 if ((msk1 & msk) != msk1 || off + sz < sl.sz) 300 mask(cls, &r, msk1 & msk, il); 301 if ((msk & ~msk1) != 0) { 302 r1 = def(sl, msk & ~msk1, b, i, il); 303 if (req(r1, R)) 304 goto Load; 305 r = iins(cls, Oor, r, r1, il); 306 } 307 if (msk == msks) 308 cast(&r, sl.cls, il); 309 return r; 310 case MayAlias: 311 if (ld) 312 continue; 313 else 314 goto Load; 315 case NoAlias: 316 continue; 317 default: 318 die("unreachable"); 319 } 320 } 321 322 for (ist=ilog; ist<&ilog[nlog]; ++ist) 323 if (ist->isphi && ist->bid == b->id) 324 if (req(ist->new.phi.m.ref, sl.ref)) 325 if (ist->new.phi.m.off == sl.off) 326 if (ist->new.phi.m.sz == sl.sz) { 327 r = ist->new.phi.p->to; 328 if (msk != msks) 329 mask(cls, &r, msk, il); 330 else 331 cast(&r, sl.cls, il); 332 return r; 333 } 334 335 for (p=b->phi; p; p=p->link) 336 if (killsl(p->to, sl)) 337 /* scanning predecessors in that 338 * case would be unsafe */ 339 goto Load; 340 341 if (b->npred == 0) 342 goto Load; 343 if (b->npred == 1) { 344 bp = b->pred[0]; 345 assert(bp->loop >= il->blk->loop); 346 l = *il; 347 if (bp->s2) 348 l.type = LNoLoad; 349 r1 = def(sl, msk, bp, 0, &l); 350 if (req(r1, R)) 351 goto Load; 352 return r1; 353 } 354 355 r = newtmp("ld", sl.cls, curf); 356 p = alloc(sizeof *p); 357 vgrow(&ilog, ++nlog); 358 ist = &ilog[nlog-1]; 359 ist->isphi = 1; 360 ist->bid = b->id; 361 ist->new.phi.m = sl; 362 ist->new.phi.p = p; 363 p->to = r; 364 p->cls = sl.cls; 365 p->narg = b->npred; 366 p->arg = vnew(p->narg, sizeof p->arg[0], PFn); 367 p->blk = vnew(p->narg, sizeof p->blk[0], PFn); 368 for (np=0; np<b->npred; ++np) { 369 bp = b->pred[np]; 370 if (!bp->s2 371 && il->type != LNoLoad 372 && bp->loop < il->blk->loop) 373 l.type = LLoad; 374 else 375 l.type = LNoLoad; 376 l.blk = bp; 377 l.off = bp->nins; 378 r1 = def(sl, msks, bp, 0, &l); 379 if (req(r1, R)) 380 goto Load; 381 p->arg[np] = r1; 382 p->blk[np] = bp; 383 } 384 if (msk != msks) 385 mask(cls, &r, msk, il); 386 return r; 387 } 388 389 static int 390 icmp(const void *pa, const void *pb) 391 { 392 Insert *a, *b; 393 int c; 394 395 a = (Insert *)pa; 396 b = (Insert *)pb; 397 if ((c = a->bid - b->bid)) 398 return c; 399 if (a->isphi && b->isphi) 400 return 0; 401 if (a->isphi) 402 return -1; 403 if (b->isphi) 404 return +1; 405 if ((c = a->off - b->off)) 406 return c; 407 return a->num - b->num; 408 } 409 410 /* require rpo ssa alias */ 411 void 412 loadopt(Fn *fn) 413 { 414 Ins *i, *ib; 415 Blk *b; 416 int sz; 417 uint n, ni, ext, nt; 418 Insert *ist; 419 Slice sl; 420 Loc l; 421 422 curf = fn; 423 ilog = vnew(0, sizeof ilog[0], PHeap); 424 nlog = 0; 425 inum = 0; 426 for (b=fn->start; b; b=b->link) 427 for (i=b->ins; i<&b->ins[b->nins]; ++i) { 428 if (!isload(i->op)) 429 continue; 430 sz = loadsz(i); 431 sl = (Slice){i->arg[0], 0, sz, i->cls}; 432 l = (Loc){LRoot, i-b->ins, b}; 433 i->arg[1] = def(sl, MASK(sz), b, i, &l); 434 } 435 qsort(ilog, nlog, sizeof ilog[0], icmp); 436 vgrow(&ilog, nlog+1); 437 ilog[nlog].bid = fn->nblk; /* add a sentinel */ 438 ib = vnew(0, sizeof(Ins), PHeap); 439 for (ist=ilog, n=0; n<fn->nblk; ++n) { 440 b = fn->rpo[n]; 441 for (; ist->bid == n && ist->isphi; ++ist) { 442 ist->new.phi.p->link = b->phi; 443 b->phi = ist->new.phi.p; 444 } 445 ni = 0; 446 nt = 0; 447 for (;;) { 448 if (ist->bid == n && ist->off == ni) 449 i = &ist++->new.ins; 450 else { 451 if (ni == b->nins) 452 break; 453 i = &b->ins[ni++]; 454 if (isload(i->op) 455 && !req(i->arg[1], R)) { 456 ext = Oextsb + i->op - Oloadsb; 457 switch (i->op) { 458 default: 459 die("unreachable"); 460 case Oloadsb: 461 case Oloadub: 462 case Oloadsh: 463 case Oloaduh: 464 i->op = ext; 465 break; 466 case Oloadsw: 467 case Oloaduw: 468 if (i->cls == Kl) { 469 i->op = ext; 470 break; 471 } 472 /* fall through */ 473 case Oload: 474 i->op = Ocopy; 475 break; 476 } 477 i->arg[0] = i->arg[1]; 478 i->arg[1] = R; 479 } 480 } 481 vgrow(&ib, ++nt); 482 ib[nt-1] = *i; 483 } 484 b->nins = nt; 485 idup(&b->ins, ib, nt); 486 } 487 vfree(ib); 488 vfree(ilog); 489 if (debug['M']) { 490 fprintf(stderr, "\n> After load elimination:\n"); 491 printfn(fn, stderr); 492 } 493 }