rega.c (14235B)
1 #include "all.h" 2 3 #ifdef TEST_PMOV 4 #undef assert 5 #define assert(x) assert_test(#x, x) 6 #endif 7 8 typedef struct RMap RMap; 9 10 struct RMap { 11 int t[Tmp0]; 12 int r[Tmp0]; 13 int w[Tmp0]; /* wait list, for unmatched hints */ 14 BSet b[1]; 15 int n; 16 }; 17 18 static bits regu; /* registers used */ 19 static Tmp *tmp; /* function temporaries */ 20 static Mem *mem; /* function mem references */ 21 static struct { 22 Ref src, dst; 23 int cls; 24 } pm[Tmp0]; /* parallel move constructed */ 25 static int npm; /* size of pm */ 26 static int loop; /* current loop level */ 27 28 static uint stmov; /* stats: added moves */ 29 static uint stblk; /* stats: added blocks */ 30 31 static int * 32 hint(int t) 33 { 34 return &tmp[phicls(t, tmp)].hint.r; 35 } 36 37 static void 38 sethint(int t, int r) 39 { 40 Tmp *p; 41 42 p = &tmp[phicls(t, tmp)]; 43 if (p->hint.r == -1 || p->hint.w > loop) { 44 p->hint.r = r; 45 p->hint.w = loop; 46 tmp[t].visit = -1; 47 } 48 } 49 50 static void 51 rcopy(RMap *ma, RMap *mb) 52 { 53 memcpy(ma->t, mb->t, sizeof ma->t); 54 memcpy(ma->r, mb->r, sizeof ma->r); 55 memcpy(ma->w, mb->w, sizeof ma->w); 56 bscopy(ma->b, mb->b); 57 ma->n = mb->n; 58 } 59 60 static int 61 rfind(RMap *m, int t) 62 { 63 int i; 64 65 for (i=0; i<m->n; i++) 66 if (m->t[i] == t) 67 return m->r[i]; 68 return -1; 69 } 70 71 static Ref 72 rref(RMap *m, int t) 73 { 74 int r, s; 75 76 r = rfind(m, t); 77 if (r == -1) { 78 s = tmp[t].slot; 79 assert(s != -1 && "should have spilled"); 80 return SLOT(s); 81 } else 82 return TMP(r); 83 } 84 85 static void 86 radd(RMap *m, int t, int r) 87 { 88 assert((t >= Tmp0 || t == r) && "invalid temporary"); 89 assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr) 90 || (T.fpr0 <= r && r < T.fpr0 + T.nfpr)) 91 && "invalid register"); 92 assert(!bshas(m->b, t) && "temporary has mapping"); 93 assert(!bshas(m->b, r) && "register already allocated"); 94 assert(m->n <= T.ngpr+T.nfpr && "too many mappings"); 95 bsset(m->b, t); 96 bsset(m->b, r); 97 m->t[m->n] = t; 98 m->r[m->n] = r; 99 m->n++; 100 regu |= BIT(r); 101 } 102 103 static Ref 104 ralloctry(RMap *m, int t, int try) 105 { 106 bits regs; 107 int h, r, r0, r1; 108 109 if (t < Tmp0) { 110 assert(bshas(m->b, t)); 111 return TMP(t); 112 } 113 if (bshas(m->b, t)) { 114 r = rfind(m, t); 115 assert(r != -1); 116 return TMP(r); 117 } 118 r = tmp[t].visit; 119 if (r == -1 || bshas(m->b, r)) 120 r = *hint(t); 121 if (r == -1 || bshas(m->b, r)) { 122 if (try) 123 return R; 124 regs = tmp[phicls(t, tmp)].hint.m; 125 regs |= m->b->t[0]; 126 if (KBASE(tmp[t].cls) == 0) { 127 r0 = T.gpr0; 128 r1 = r0 + T.ngpr; 129 } else { 130 r0 = T.fpr0; 131 r1 = r0 + T.nfpr; 132 } 133 for (r=r0; r<r1; r++) 134 if (!(regs & BIT(r))) 135 goto Found; 136 for (r=r0; r<r1; r++) 137 if (!bshas(m->b, r)) 138 goto Found; 139 die("no more regs"); 140 } 141 Found: 142 radd(m, t, r); 143 sethint(t, r); 144 tmp[t].visit = r; 145 h = *hint(t); 146 if (h != -1 && h != r) 147 m->w[h] = t; 148 return TMP(r); 149 } 150 151 static inline Ref 152 ralloc(RMap *m, int t) 153 { 154 return ralloctry(m, t, 0); 155 } 156 157 static int 158 rfree(RMap *m, int t) 159 { 160 int i, r; 161 162 assert(t >= Tmp0 || !(BIT(t) & T.rglob)); 163 if (!bshas(m->b, t)) 164 return -1; 165 for (i=0; m->t[i] != t; i++) 166 assert(i+1 < m->n); 167 r = m->r[i]; 168 bsclr(m->b, t); 169 bsclr(m->b, r); 170 m->n--; 171 memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); 172 memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); 173 assert(t >= Tmp0 || t == r); 174 return r; 175 } 176 177 static void 178 mdump(RMap *m) 179 { 180 int i; 181 182 for (i=0; i<m->n; i++) 183 if (m->t[i] >= Tmp0) 184 fprintf(stderr, " (%s, R%d)", 185 tmp[m->t[i]].name, 186 m->r[i]); 187 fprintf(stderr, "\n"); 188 } 189 190 static void 191 pmadd(Ref src, Ref dst, int k) 192 { 193 if (npm == Tmp0) 194 die("cannot have more moves than registers"); 195 pm[npm].src = src; 196 pm[npm].dst = dst; 197 pm[npm].cls = k; 198 npm++; 199 } 200 201 enum PMStat { ToMove, Moving, Moved }; 202 203 static int 204 pmrec(enum PMStat *status, int i, int *k) 205 { 206 int j, c; 207 208 /* note, this routine might emit 209 * too many large instructions 210 */ 211 if (req(pm[i].src, pm[i].dst)) { 212 status[i] = Moved; 213 return -1; 214 } 215 assert(KBASE(pm[i].cls) == KBASE(*k)); 216 assert((Kw|Kl) == Kl && (Ks|Kd) == Kd); 217 *k |= pm[i].cls; 218 for (j=0; j<npm; j++) 219 if (req(pm[j].dst, pm[i].src)) 220 break; 221 switch (j == npm ? Moved : status[j]) { 222 case Moving: 223 c = j; /* start of cycle */ 224 emit(Oswap, *k, R, pm[i].src, pm[i].dst); 225 break; 226 case ToMove: 227 status[i] = Moving; 228 c = pmrec(status, j, k); 229 if (c == i) { 230 c = -1; /* end of cycle */ 231 break; 232 } 233 if (c != -1) { 234 emit(Oswap, *k, R, pm[i].src, pm[i].dst); 235 break; 236 } 237 /* fall through */ 238 case Moved: 239 c = -1; 240 emit(Ocopy, pm[i].cls, pm[i].dst, pm[i].src, R); 241 break; 242 default: 243 die("unreachable"); 244 } 245 status[i] = Moved; 246 return c; 247 } 248 249 static void 250 pmgen() 251 { 252 int i; 253 enum PMStat *status; 254 255 status = alloc(npm * sizeof status[0]); 256 assert(!npm || status[npm-1] == ToMove); 257 for (i=0; i<npm; i++) 258 if (status[i] == ToMove) 259 pmrec(status, i, (int[]){pm[i].cls}); 260 } 261 262 static void 263 move(int r, Ref to, RMap *m) 264 { 265 int n, t, r1; 266 267 r1 = req(to, R) ? -1 : rfree(m, to.val); 268 if (bshas(m->b, r)) { 269 /* r is used and not by to */ 270 assert(r1 != r); 271 for (n=0; m->r[n] != r; n++) 272 assert(n+1 < m->n); 273 t = m->t[n]; 274 rfree(m, t); 275 bsset(m->b, r); 276 ralloc(m, t); 277 bsclr(m->b, r); 278 } 279 t = req(to, R) ? r : to.val; 280 radd(m, t, r); 281 } 282 283 static int 284 regcpy(Ins *i) 285 { 286 return i->op == Ocopy && isreg(i->arg[0]); 287 } 288 289 static Ins * 290 dopm(Blk *b, Ins *i, RMap *m) 291 { 292 RMap m0; 293 int n, r, r1, t, s; 294 Ins *i1, *ip; 295 bits def; 296 297 m0 = *m; /* okay since we don't use m0.b */ 298 m0.b->t = 0; 299 i1 = ++i; 300 do { 301 i--; 302 move(i->arg[0].val, i->to, m); 303 } while (i != b->ins && regcpy(i-1)); 304 assert(m0.n <= m->n); 305 if (i != b->ins && (i-1)->op == Ocall) { 306 def = T.retregs((i-1)->arg[1], 0) | T.rglob; 307 for (r=0; T.rsave[r]>=0; r++) 308 if (!(BIT(T.rsave[r]) & def)) 309 move(T.rsave[r], R, m); 310 } 311 for (npm=0, n=0; n<m->n; n++) { 312 t = m->t[n]; 313 s = tmp[t].slot; 314 r1 = m->r[n]; 315 r = rfind(&m0, t); 316 if (r != -1) 317 pmadd(TMP(r1), TMP(r), tmp[t].cls); 318 else if (s != -1) 319 pmadd(TMP(r1), SLOT(s), tmp[t].cls); 320 } 321 for (ip=i; ip<i1; ip++) { 322 if (!req(ip->to, R)) 323 rfree(m, ip->to.val); 324 r = ip->arg[0].val; 325 if (rfind(m, r) == -1) 326 radd(m, r, r); 327 } 328 pmgen(); 329 return i; 330 } 331 332 static int 333 prio1(Ref r1, Ref r2) 334 { 335 /* trivial heuristic to begin with, 336 * later we can use the distance to 337 * the definition instruction 338 */ 339 (void) r2; 340 return *hint(r1.val) != -1; 341 } 342 343 static void 344 insert(Ref *r, Ref **rs, int p) 345 { 346 int i; 347 348 rs[i = p] = r; 349 while (i-- > 0 && prio1(*r, *rs[i])) { 350 rs[i+1] = rs[i]; 351 rs[i] = r; 352 } 353 } 354 355 static void 356 doblk(Blk *b, RMap *cur) 357 { 358 int t, x, r, rf, rt, nr; 359 bits rs; 360 Ins *i, *i1; 361 Mem *m; 362 Ref *ra[4]; 363 364 if (rtype(b->jmp.arg) == RTmp) 365 b->jmp.arg = ralloc(cur, b->jmp.arg.val); 366 curi = &insb[NIns]; 367 for (i1=&b->ins[b->nins]; i1!=b->ins;) { 368 emiti(*--i1); 369 i = curi; 370 rf = -1; 371 switch (i->op) { 372 case Ocall: 373 rs = T.argregs(i->arg[1], 0) | T.rglob; 374 for (r=0; T.rsave[r]>=0; r++) 375 if (!(BIT(T.rsave[r]) & rs)) 376 rfree(cur, T.rsave[r]); 377 break; 378 case Ocopy: 379 if (regcpy(i)) { 380 curi++; 381 i1 = dopm(b, i1, cur); 382 stmov += i+1 - curi; 383 continue; 384 } 385 if (isreg(i->to)) 386 if (rtype(i->arg[0]) == RTmp) 387 sethint(i->arg[0].val, i->to.val); 388 /* fall through */ 389 default: 390 if (!req(i->to, R)) { 391 assert(rtype(i->to) == RTmp); 392 r = i->to.val; 393 if (r < Tmp0 && (BIT(r) & T.rglob)) 394 break; 395 rf = rfree(cur, r); 396 if (rf == -1) { 397 assert(!isreg(i->to)); 398 curi++; 399 continue; 400 } 401 i->to = TMP(rf); 402 } 403 break; 404 } 405 for (x=0, nr=0; x<2; x++) 406 switch (rtype(i->arg[x])) { 407 case RMem: 408 m = &mem[i->arg[x].val]; 409 if (rtype(m->base) == RTmp) 410 insert(&m->base, ra, nr++); 411 if (rtype(m->index) == RTmp) 412 insert(&m->index, ra, nr++); 413 break; 414 case RTmp: 415 insert(&i->arg[x], ra, nr++); 416 break; 417 } 418 for (r=0; r<nr; r++) 419 *ra[r] = ralloc(cur, ra[r]->val); 420 if (i->op == Ocopy && req(i->to, i->arg[0])) 421 curi++; 422 423 /* try to change the register of a hinted 424 * temporary if rf is available */ 425 if (rf != -1 && (t = cur->w[rf]) != 0) 426 if (!bshas(cur->b, rf) && *hint(t) == rf 427 && (rt = rfree(cur, t)) != -1) { 428 tmp[t].visit = -1; 429 ralloc(cur, t); 430 assert(bshas(cur->b, rf)); 431 emit(Ocopy, tmp[t].cls, TMP(rt), TMP(rf), R); 432 stmov += 1; 433 cur->w[rf] = 0; 434 for (r=0; r<nr; r++) 435 if (req(*ra[r], TMP(rt))) 436 *ra[r] = TMP(rf); 437 /* one could iterate this logic with 438 * the newly freed rt, but in this case 439 * the above loop must be changed */ 440 } 441 } 442 b->nins = &insb[NIns] - curi; 443 idup(&b->ins, curi, b->nins); 444 } 445 446 /* qsort() comparison function to peel 447 * loop nests from inside out */ 448 static int 449 carve(const void *a, const void *b) 450 { 451 Blk *ba, *bb; 452 453 /* todo, evaluate if this order is really 454 * better than the simple postorder */ 455 ba = *(Blk**)a; 456 bb = *(Blk**)b; 457 if (ba->loop == bb->loop) 458 return ba->id > bb->id ? -1 : ba->id < bb->id; 459 return ba->loop > bb->loop ? -1 : +1; 460 } 461 462 /* comparison function to order temporaries 463 * for allocation at the end of blocks */ 464 static int 465 prio2(int t1, int t2) 466 { 467 if ((tmp[t1].visit ^ tmp[t2].visit) < 0) /* != signs */ 468 return tmp[t1].visit != -1 ? +1 : -1; 469 if ((*hint(t1) ^ *hint(t2)) < 0) 470 return *hint(t1) != -1 ? +1 : -1; 471 return tmp[t1].cost - tmp[t2].cost; 472 } 473 474 /* register allocation 475 * depends on rpo, phi, cost, (and obviously spill) 476 */ 477 void 478 rega(Fn *fn) 479 { 480 int j, t, r, x, rl[Tmp0]; 481 Blk *b, *b1, *s, ***ps, *blist, **blk, **bp; 482 RMap *end, *beg, cur, old, *m; 483 Ins *i; 484 Phi *p; 485 uint u, n; 486 Ref src, dst; 487 488 /* 1. setup */ 489 stmov = 0; 490 stblk = 0; 491 regu = 0; 492 tmp = fn->tmp; 493 mem = fn->mem; 494 blk = alloc(fn->nblk * sizeof blk[0]); 495 end = alloc(fn->nblk * sizeof end[0]); 496 beg = alloc(fn->nblk * sizeof beg[0]); 497 for (n=0; n<fn->nblk; n++) { 498 bsinit(end[n].b, fn->ntmp); 499 bsinit(beg[n].b, fn->ntmp); 500 } 501 bsinit(cur.b, fn->ntmp); 502 bsinit(old.b, fn->ntmp); 503 504 loop = INT_MAX; 505 for (t=0; t<fn->ntmp; t++) { 506 tmp[t].hint.r = t < Tmp0 ? t : -1; 507 tmp[t].hint.w = loop; 508 tmp[t].visit = -1; 509 } 510 for (bp=blk, b=fn->start; b; b=b->link) 511 *bp++ = b; 512 qsort(blk, fn->nblk, sizeof blk[0], carve); 513 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++) 514 if (i->op != Ocopy || !isreg(i->arg[0])) 515 break; 516 else { 517 assert(rtype(i->to) == RTmp); 518 sethint(i->to.val, i->arg[0].val); 519 } 520 521 /* 2. assign registers */ 522 for (bp=blk; bp<&blk[fn->nblk]; bp++) { 523 b = *bp; 524 n = b->id; 525 loop = b->loop; 526 cur.n = 0; 527 bszero(cur.b); 528 memset(cur.w, 0, sizeof cur.w); 529 for (x=0, t=Tmp0; bsiter(b->out, &t); t++) { 530 j = x++; 531 rl[j] = t; 532 while (j-- > 0 && prio2(t, rl[j]) > 0) { 533 rl[j+1] = rl[j]; 534 rl[j] = t; 535 } 536 } 537 for (r=0; bsiter(b->out, &r) && r<Tmp0; r++) 538 radd(&cur, r, r); 539 for (j=0; j<x; j++) 540 ralloctry(&cur, rl[j], 1); 541 for (j=0; j<x; j++) 542 ralloc(&cur, rl[j]); 543 rcopy(&end[n], &cur); 544 doblk(b, &cur); 545 bscopy(b->in, cur.b); 546 for (p=b->phi; p; p=p->link) 547 if (rtype(p->to) == RTmp) 548 bsclr(b->in, p->to.val); 549 rcopy(&beg[n], &cur); 550 } 551 552 /* 3. emit copies shared by multiple edges 553 * to the same block */ 554 for (s=fn->start; s; s=s->link) { 555 if (s->npred <= 1) 556 continue; 557 m = &beg[s->id]; 558 559 /* rl maps a register that is live at the 560 * beginning of s to the one used in all 561 * predecessors (if any, -1 otherwise) */ 562 memset(rl, 0, sizeof rl); 563 564 /* to find the register of a phi in a 565 * predecessor, we have to find the 566 * corresponding argument */ 567 for (p=s->phi; p; p=p->link) { 568 if (rtype(p->to) != RTmp 569 || (r=rfind(m, p->to.val)) == -1) 570 continue; 571 for (u=0; u<p->narg; u++) { 572 b = p->blk[u]; 573 src = p->arg[u]; 574 if (rtype(src) != RTmp) 575 continue; 576 x = rfind(&end[b->id], src.val); 577 if (x == -1) /* spilled */ 578 continue; 579 rl[r] = (!rl[r] || rl[r] == x) ? x : -1; 580 } 581 if (rl[r] == 0) 582 rl[r] = -1; 583 } 584 585 /* process non-phis temporaries */ 586 for (j=0; j<m->n; j++) { 587 t = m->t[j]; 588 r = m->r[j]; 589 if (rl[r] || t < Tmp0 /* todo, remove this */) 590 continue; 591 for (bp=s->pred; bp<&s->pred[s->npred]; bp++) { 592 x = rfind(&end[(*bp)->id], t); 593 if (x == -1) /* spilled */ 594 continue; 595 rl[r] = (!rl[r] || rl[r] == x) ? x : -1; 596 } 597 if (rl[r] == 0) 598 rl[r] = -1; 599 } 600 601 npm = 0; 602 for (j=0; j<m->n; j++) { 603 t = m->t[j]; 604 r = m->r[j]; 605 x = rl[r]; 606 assert(x != 0 || t < Tmp0 /* todo, ditto */); 607 if (x > 0 && !bshas(m->b, x)) { 608 pmadd(TMP(x), TMP(r), tmp[t].cls); 609 m->r[j] = x; 610 bsset(m->b, x); 611 } 612 } 613 curi = &insb[NIns]; 614 pmgen(); 615 j = &insb[NIns] - curi; 616 if (j == 0) 617 continue; 618 stmov += j; 619 s->nins += j; 620 i = alloc(s->nins * sizeof(Ins)); 621 icpy(icpy(i, curi, j), s->ins, s->nins-j); 622 s->ins = i; 623 } 624 625 if (debug['R']) { 626 fprintf(stderr, "\n> Register mappings:\n"); 627 for (n=0; n<fn->nblk; n++) { 628 b = fn->rpo[n]; 629 fprintf(stderr, "\t%-10s beg", b->name); 630 mdump(&beg[n]); 631 fprintf(stderr, "\t end"); 632 mdump(&end[n]); 633 } 634 fprintf(stderr, "\n"); 635 } 636 637 /* 4. emit remaining copies in new blocks */ 638 blist = 0; 639 for (b=fn->start;; b=b->link) { 640 ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; 641 for (; (s=**ps); ps++) { 642 npm = 0; 643 for (p=s->phi; p; p=p->link) { 644 dst = p->to; 645 assert(rtype(dst)==RSlot || rtype(dst)==RTmp); 646 if (rtype(dst) == RTmp) { 647 r = rfind(&beg[s->id], dst.val); 648 if (r == -1) 649 continue; 650 dst = TMP(r); 651 } 652 for (u=0; p->blk[u]!=b; u++) 653 assert(u+1 < p->narg); 654 src = p->arg[u]; 655 if (rtype(src) == RTmp) 656 src = rref(&end[b->id], src.val); 657 pmadd(src, dst, p->cls); 658 } 659 for (t=Tmp0; bsiter(s->in, &t); t++) { 660 src = rref(&end[b->id], t); 661 dst = rref(&beg[s->id], t); 662 pmadd(src, dst, tmp[t].cls); 663 } 664 curi = &insb[NIns]; 665 pmgen(); 666 if (curi == &insb[NIns]) 667 continue; 668 b1 = newblk(); 669 b1->loop = (b->loop+s->loop) / 2; 670 b1->link = blist; 671 blist = b1; 672 fn->nblk++; 673 strf(b1->name, "%s_%s", b->name, s->name); 674 b1->nins = &insb[NIns] - curi; 675 stmov += b1->nins; 676 stblk += 1; 677 idup(&b1->ins, curi, b1->nins); 678 b1->jmp.type = Jjmp; 679 b1->s1 = s; 680 **ps = b1; 681 } 682 if (!b->link) { 683 b->link = blist; 684 break; 685 } 686 } 687 for (b=fn->start; b; b=b->link) 688 b->phi = 0; 689 fn->reg = regu; 690 691 if (debug['R']) { 692 fprintf(stderr, "\n> Register allocation statistics:\n"); 693 fprintf(stderr, "\tnew moves: %d\n", stmov); 694 fprintf(stderr, "\tnew blocks: %d\n", stblk); 695 fprintf(stderr, "\n> After register allocation:\n"); 696 printfn(fn, stderr); 697 } 698 }