sysv.c (15023B)
1 #include "all.h" 2 3 typedef struct AClass AClass; 4 typedef struct RAlloc RAlloc; 5 6 struct AClass { 7 Typ *type; 8 int inmem; 9 int align; 10 uint size; 11 int cls[2]; 12 Ref ref[2]; 13 }; 14 15 struct RAlloc { 16 Ins i; 17 RAlloc *link; 18 }; 19 20 static void 21 classify(AClass *a, Typ *t, uint s) 22 { 23 Field *f; 24 int *cls; 25 uint n, s1; 26 27 for (n=0, s1=s; n<t->nunion; n++, s=s1) 28 for (f=t->fields[n]; f->type!=FEnd; f++) { 29 assert(s <= 16); 30 cls = &a->cls[s/8]; 31 switch (f->type) { 32 case FEnd: 33 die("unreachable"); 34 case FPad: 35 /* don't change anything */ 36 s += f->len; 37 break; 38 case Fs: 39 case Fd: 40 if (*cls == Kx) 41 *cls = Kd; 42 s += f->len; 43 break; 44 case Fb: 45 case Fh: 46 case Fw: 47 case Fl: 48 *cls = Kl; 49 s += f->len; 50 break; 51 case FTyp: 52 classify(a, &typ[f->len], s); 53 s += typ[f->len].size; 54 break; 55 } 56 } 57 } 58 59 static void 60 typclass(AClass *a, Typ *t) 61 { 62 uint sz, al; 63 64 sz = t->size; 65 al = 1u << t->align; 66 67 /* the ABI requires sizes to be rounded 68 * up to the nearest multiple of 8, moreover 69 * it makes it easy load and store structures 70 * in registers 71 */ 72 if (al < 8) 73 al = 8; 74 sz = (sz + al-1) & -al; 75 76 a->type = t; 77 a->size = sz; 78 a->align = t->align; 79 80 if (t->isdark || sz > 16 || sz == 0) { 81 /* large or unaligned structures are 82 * required to be passed in memory 83 */ 84 a->inmem = 1; 85 return; 86 } 87 88 a->cls[0] = Kx; 89 a->cls[1] = Kx; 90 a->inmem = 0; 91 classify(a, t, 0); 92 } 93 94 static int 95 retr(Ref reg[2], AClass *aret) 96 { 97 static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; 98 int n, k, ca, nr[2]; 99 100 nr[0] = nr[1] = 0; 101 ca = 0; 102 for (n=0; (uint)n*8<aret->size; n++) { 103 k = KBASE(aret->cls[n]); 104 reg[n] = TMP(retreg[k][nr[k]++]); 105 ca += 1 << (2 * k); 106 } 107 return ca; 108 } 109 110 static void 111 selret(Blk *b, Fn *fn) 112 { 113 int j, k, ca; 114 Ref r, r0, reg[2]; 115 AClass aret; 116 117 j = b->jmp.type; 118 119 if (!isret(j) || j == Jret0) 120 return; 121 122 r0 = b->jmp.arg; 123 b->jmp.type = Jret0; 124 125 if (j == Jretc) { 126 typclass(&aret, &typ[fn->retty]); 127 if (aret.inmem) { 128 assert(rtype(fn->retr) == RTmp); 129 emit(Ocopy, Kl, TMP(RAX), fn->retr, R); 130 emit(Oblit1, 0, R, INT(aret.type->size), R); 131 emit(Oblit0, 0, R, r0, fn->retr); 132 ca = 1; 133 } else { 134 ca = retr(reg, &aret); 135 if (aret.size > 8) { 136 r = newtmp("abi", Kl, fn); 137 emit(Oload, Kl, reg[1], r, R); 138 emit(Oadd, Kl, r, r0, getcon(8, fn)); 139 } 140 emit(Oload, Kl, reg[0], r0, R); 141 } 142 } else { 143 k = j - Jretw; 144 if (KBASE(k) == 0) { 145 emit(Ocopy, k, TMP(RAX), r0, R); 146 ca = 1; 147 } else { 148 emit(Ocopy, k, TMP(XMM0), r0, R); 149 ca = 1 << 2; 150 } 151 } 152 153 b->jmp.arg = CALL(ca); 154 } 155 156 static int 157 argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env) 158 { 159 int varc, envc, nint, ni, nsse, ns, n, *pn; 160 AClass *a; 161 Ins *i; 162 163 if (aret && aret->inmem) 164 nint = 5; /* hidden argument */ 165 else 166 nint = 6; 167 nsse = 8; 168 varc = 0; 169 envc = 0; 170 for (i=i0, a=ac; i<i1; i++, a++) 171 switch (i->op - op + Oarg) { 172 case Oarg: 173 if (KBASE(i->cls) == 0) 174 pn = &nint; 175 else 176 pn = &nsse; 177 if (*pn > 0) { 178 --*pn; 179 a->inmem = 0; 180 } else 181 a->inmem = 2; 182 a->align = 3; 183 a->size = 8; 184 a->cls[0] = i->cls; 185 break; 186 case Oargc: 187 n = i->arg[0].val; 188 typclass(a, &typ[n]); 189 if (a->inmem) 190 continue; 191 ni = ns = 0; 192 for (n=0; (uint)n*8<a->size; n++) 193 if (KBASE(a->cls[n]) == 0) 194 ni++; 195 else 196 ns++; 197 if (nint >= ni && nsse >= ns) { 198 nint -= ni; 199 nsse -= ns; 200 } else 201 a->inmem = 1; 202 break; 203 case Oarge: 204 envc = 1; 205 if (op == Opar) 206 *env = i->to; 207 else 208 *env = i->arg[0]; 209 break; 210 case Oargv: 211 varc = 1; 212 break; 213 default: 214 die("unreachable"); 215 } 216 217 if (varc && envc) 218 err("sysv abi does not support variadic env calls"); 219 220 return ((varc|envc) << 12) | ((6-nint) << 4) | ((8-nsse) << 8); 221 } 222 223 int amd64_sysv_rsave[] = { 224 RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, 225 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 226 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1 227 }; 228 int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1}; 229 230 MAKESURE(sysv_arrays_ok, 231 sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) && 232 sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int) 233 ); 234 235 /* layout of call's second argument (RCall) 236 * 237 * 29 12 8 4 3 0 238 * |0...00|x|xxxx|xxxx|xx|xx| range 239 * | | | | ` gp regs returned (0..2) 240 * | | | ` sse regs returned (0..2) 241 * | | ` gp regs passed (0..6) 242 * | ` sse regs passed (0..8) 243 * ` 1 if rax is used to pass data (0..1) 244 */ 245 246 bits 247 amd64_sysv_retregs(Ref r, int p[2]) 248 { 249 bits b; 250 int ni, nf; 251 252 assert(rtype(r) == RCall); 253 b = 0; 254 ni = r.val & 3; 255 nf = (r.val >> 2) & 3; 256 if (ni >= 1) 257 b |= BIT(RAX); 258 if (ni >= 2) 259 b |= BIT(RDX); 260 if (nf >= 1) 261 b |= BIT(XMM0); 262 if (nf >= 2) 263 b |= BIT(XMM1); 264 if (p) { 265 p[0] = ni; 266 p[1] = nf; 267 } 268 return b; 269 } 270 271 bits 272 amd64_sysv_argregs(Ref r, int p[2]) 273 { 274 bits b; 275 int j, ni, nf, ra; 276 277 assert(rtype(r) == RCall); 278 b = 0; 279 ni = (r.val >> 4) & 15; 280 nf = (r.val >> 8) & 15; 281 ra = (r.val >> 12) & 1; 282 for (j=0; j<ni; j++) 283 b |= BIT(amd64_sysv_rsave[j]); 284 for (j=0; j<nf; j++) 285 b |= BIT(XMM0+j); 286 if (p) { 287 p[0] = ni + ra; 288 p[1] = nf; 289 } 290 return b | (ra ? BIT(RAX) : 0); 291 } 292 293 static Ref 294 rarg(int ty, int *ni, int *ns) 295 { 296 if (KBASE(ty) == 0) 297 return TMP(amd64_sysv_rsave[(*ni)++]); 298 else 299 return TMP(XMM0 + (*ns)++); 300 } 301 302 static void 303 selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) 304 { 305 Ins *i; 306 AClass *ac, *a, aret; 307 int ca, ni, ns, al; 308 uint stk, off; 309 Ref r, r1, r2, reg[2], env; 310 RAlloc *ra; 311 312 env = R; 313 ac = alloc((i1-i0) * sizeof ac[0]); 314 315 if (!req(i1->arg[1], R)) { 316 assert(rtype(i1->arg[1]) == RType); 317 typclass(&aret, &typ[i1->arg[1].val]); 318 ca = argsclass(i0, i1, ac, Oarg, &aret, &env); 319 } else 320 ca = argsclass(i0, i1, ac, Oarg, 0, &env); 321 322 for (stk=0, a=&ac[i1-i0]; a>ac;) 323 if ((--a)->inmem) { 324 if (a->align > 4) 325 err("sysv abi requires alignments of 16 or less"); 326 stk += a->size; 327 if (a->align == 4) 328 stk += stk & 15; 329 } 330 stk += stk & 15; 331 if (stk) { 332 r = getcon(-(int64_t)stk, fn); 333 emit(Osalloc, Kl, R, r, R); 334 } 335 336 if (!req(i1->arg[1], R)) { 337 if (aret.inmem) { 338 /* get the return location from eax 339 * it saves one callee-save reg */ 340 r1 = newtmp("abi", Kl, fn); 341 emit(Ocopy, Kl, i1->to, TMP(RAX), R); 342 ca += 1; 343 } else { 344 /* todo, may read out of bounds. 345 * gcc did this up until 5.2, but 346 * this should still be fixed. 347 */ 348 if (aret.size > 8) { 349 r = newtmp("abi", Kl, fn); 350 aret.ref[1] = newtmp("abi", aret.cls[1], fn); 351 emit(Ostorel, 0, R, aret.ref[1], r); 352 emit(Oadd, Kl, r, i1->to, getcon(8, fn)); 353 } 354 aret.ref[0] = newtmp("abi", aret.cls[0], fn); 355 emit(Ostorel, 0, R, aret.ref[0], i1->to); 356 ca += retr(reg, &aret); 357 if (aret.size > 8) 358 emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R); 359 emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R); 360 r1 = i1->to; 361 } 362 /* allocate return pad */ 363 ra = alloc(sizeof *ra); 364 /* specific to NAlign == 3 */ 365 al = aret.align >= 2 ? aret.align - 2 : 0; 366 ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}}; 367 ra->link = (*rap); 368 *rap = ra; 369 } else { 370 ra = 0; 371 if (KBASE(i1->cls) == 0) { 372 emit(Ocopy, i1->cls, i1->to, TMP(RAX), R); 373 ca += 1; 374 } else { 375 emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R); 376 ca += 1 << 2; 377 } 378 } 379 380 emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca)); 381 382 if (!req(R, env)) 383 emit(Ocopy, Kl, TMP(RAX), env, R); 384 else if ((ca >> 12) & 1) /* vararg call */ 385 emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); 386 387 ni = ns = 0; 388 if (ra && aret.inmem) 389 emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ 390 391 for (i=i0, a=ac; i<i1; i++, a++) { 392 if (i->op >= Oarge || a->inmem) 393 continue; 394 r1 = rarg(a->cls[0], &ni, &ns); 395 if (i->op == Oargc) { 396 if (a->size > 8) { 397 r2 = rarg(a->cls[1], &ni, &ns); 398 r = newtmp("abi", Kl, fn); 399 emit(Oload, a->cls[1], r2, r, R); 400 emit(Oadd, Kl, r, i->arg[1], getcon(8, fn)); 401 } 402 emit(Oload, a->cls[0], r1, i->arg[1], R); 403 } else 404 emit(Ocopy, i->cls, r1, i->arg[0], R); 405 } 406 407 if (!stk) 408 return; 409 410 r = newtmp("abi", Kl, fn); 411 for (i=i0, a=ac, off=0; i<i1; i++, a++) { 412 if (i->op >= Oarge || !a->inmem) 413 continue; 414 r1 = newtmp("abi", Kl, fn); 415 if (i->op == Oargc) { 416 if (a->align == 4) 417 off += off & 15; 418 emit(Oblit1, 0, R, INT(a->type->size), R); 419 emit(Oblit0, 0, R, i->arg[1], r1); 420 } else 421 emit(Ostorel, 0, R, i->arg[0], r1); 422 emit(Oadd, Kl, r1, r, getcon(off, fn)); 423 off += a->size; 424 } 425 emit(Osalloc, Kl, r, getcon(stk, fn), R); 426 } 427 428 static int 429 selpar(Fn *fn, Ins *i0, Ins *i1) 430 { 431 AClass *ac, *a, aret; 432 Ins *i; 433 int ni, ns, s, al, fa; 434 Ref r, env; 435 436 env = R; 437 ac = alloc((i1-i0) * sizeof ac[0]); 438 curi = &insb[NIns]; 439 ni = ns = 0; 440 441 if (fn->retty >= 0) { 442 typclass(&aret, &typ[fn->retty]); 443 fa = argsclass(i0, i1, ac, Opar, &aret, &env); 444 } else 445 fa = argsclass(i0, i1, ac, Opar, 0, &env); 446 fn->reg = amd64_sysv_argregs(CALL(fa), 0); 447 448 for (i=i0, a=ac; i<i1; i++, a++) { 449 if (i->op != Oparc || a->inmem) 450 continue; 451 if (a->size > 8) { 452 r = newtmp("abi", Kl, fn); 453 a->ref[1] = newtmp("abi", Kl, fn); 454 emit(Ostorel, 0, R, a->ref[1], r); 455 emit(Oadd, Kl, r, i->to, getcon(8, fn)); 456 } 457 a->ref[0] = newtmp("abi", Kl, fn); 458 emit(Ostorel, 0, R, a->ref[0], i->to); 459 /* specific to NAlign == 3 */ 460 al = a->align >= 2 ? a->align - 2 : 0; 461 emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R); 462 } 463 464 if (fn->retty >= 0 && aret.inmem) { 465 r = newtmp("abi", Kl, fn); 466 emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R); 467 fn->retr = r; 468 } 469 470 for (i=i0, a=ac, s=4; i<i1; i++, a++) { 471 switch (a->inmem) { 472 case 1: 473 if (a->align > 4) 474 err("sysv abi requires alignments of 16 or less"); 475 if (a->align == 4) 476 s = (s+3) & -4; 477 fn->tmp[i->to.val].slot = -s; 478 s += a->size / 4; 479 continue; 480 case 2: 481 emit(Oload, i->cls, i->to, SLOT(-s), R); 482 s += 2; 483 continue; 484 } 485 if (i->op == Opare) 486 continue; 487 r = rarg(a->cls[0], &ni, &ns); 488 if (i->op == Oparc) { 489 emit(Ocopy, a->cls[0], a->ref[0], r, R); 490 if (a->size > 8) { 491 r = rarg(a->cls[1], &ni, &ns); 492 emit(Ocopy, a->cls[1], a->ref[1], r, R); 493 } 494 } else 495 emit(Ocopy, i->cls, i->to, r, R); 496 } 497 498 if (!req(R, env)) 499 emit(Ocopy, Kl, env, TMP(RAX), R); 500 501 return fa | (s*4)<<12; 502 } 503 504 static Blk * 505 split(Fn *fn, Blk *b) 506 { 507 Blk *bn; 508 509 ++fn->nblk; 510 bn = newblk(); 511 bn->nins = &insb[NIns] - curi; 512 idup(&bn->ins, curi, bn->nins); 513 curi = &insb[NIns]; 514 bn->visit = ++b->visit; 515 strf(bn->name, "%s.%d", b->name, b->visit); 516 bn->loop = b->loop; 517 bn->link = b->link; 518 b->link = bn; 519 return bn; 520 } 521 522 static void 523 chpred(Blk *b, Blk *bp, Blk *bp1) 524 { 525 Phi *p; 526 uint a; 527 528 for (p=b->phi; p; p=p->link) { 529 for (a=0; p->blk[a]!=bp; a++) 530 assert(a+1<p->narg); 531 p->blk[a] = bp1; 532 } 533 } 534 535 static void 536 selvaarg(Fn *fn, Blk *b, Ins *i) 537 { 538 Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap; 539 Blk *b0, *bstk, *breg; 540 int isint; 541 542 c4 = getcon(4, fn); 543 c8 = getcon(8, fn); 544 c16 = getcon(16, fn); 545 ap = i->arg[0]; 546 isint = KBASE(i->cls) == 0; 547 548 /* @b [...] 549 r0 =l add ap, (0 or 4) 550 nr =l loadsw r0 551 r1 =w cultw nr, (48 or 176) 552 jnz r1, @breg, @bstk 553 @breg 554 r0 =l add ap, 16 555 r1 =l loadl r0 556 lreg =l add r1, nr 557 r0 =w add nr, (8 or 16) 558 r1 =l add ap, (0 or 4) 559 storew r0, r1 560 @bstk 561 r0 =l add ap, 8 562 lstk =l loadl r0 563 r1 =l add lstk, 8 564 storel r1, r0 565 @b0 566 %loc =l phi @breg %lreg, @bstk %lstk 567 i->to =(i->cls) load %loc 568 */ 569 570 loc = newtmp("abi", Kl, fn); 571 emit(Oload, i->cls, i->to, loc, R); 572 b0 = split(fn, b); 573 b0->jmp = b->jmp; 574 b0->s1 = b->s1; 575 b0->s2 = b->s2; 576 if (b->s1) 577 chpred(b->s1, b, b0); 578 if (b->s2 && b->s2 != b->s1) 579 chpred(b->s2, b, b0); 580 581 lreg = newtmp("abi", Kl, fn); 582 nr = newtmp("abi", Kl, fn); 583 r0 = newtmp("abi", Kw, fn); 584 r1 = newtmp("abi", Kl, fn); 585 emit(Ostorew, Kw, R, r0, r1); 586 emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4); 587 emit(Oadd, Kw, r0, nr, isint ? c8 : c16); 588 r0 = newtmp("abi", Kl, fn); 589 r1 = newtmp("abi", Kl, fn); 590 emit(Oadd, Kl, lreg, r1, nr); 591 emit(Oload, Kl, r1, r0, R); 592 emit(Oadd, Kl, r0, ap, c16); 593 breg = split(fn, b); 594 breg->jmp.type = Jjmp; 595 breg->s1 = b0; 596 597 lstk = newtmp("abi", Kl, fn); 598 r0 = newtmp("abi", Kl, fn); 599 r1 = newtmp("abi", Kl, fn); 600 emit(Ostorel, Kw, R, r1, r0); 601 emit(Oadd, Kl, r1, lstk, c8); 602 emit(Oload, Kl, lstk, r0, R); 603 emit(Oadd, Kl, r0, ap, c8); 604 bstk = split(fn, b); 605 bstk->jmp.type = Jjmp; 606 bstk->s1 = b0; 607 608 b0->phi = alloc(sizeof *b0->phi); 609 *b0->phi = (Phi){ 610 .cls = Kl, .to = loc, 611 .narg = 2, 612 .blk = vnew(2, sizeof b0->phi->blk[0], PFn), 613 .arg = vnew(2, sizeof b0->phi->arg[0], PFn), 614 }; 615 b0->phi->blk[0] = bstk; 616 b0->phi->blk[1] = breg; 617 b0->phi->arg[0] = lstk; 618 b0->phi->arg[1] = lreg; 619 r0 = newtmp("abi", Kl, fn); 620 r1 = newtmp("abi", Kw, fn); 621 b->jmp.type = Jjnz; 622 b->jmp.arg = r1; 623 b->s1 = breg; 624 b->s2 = bstk; 625 c = getcon(isint ? 48 : 176, fn); 626 emit(Ocmpw+Ciult, Kw, r1, nr, c); 627 emit(Oloadsw, Kl, nr, r0, R); 628 emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4); 629 } 630 631 static void 632 selvastart(Fn *fn, int fa, Ref ap) 633 { 634 Ref r0, r1; 635 int gp, fp, sp; 636 637 gp = ((fa >> 4) & 15) * 8; 638 fp = 48 + ((fa >> 8) & 15) * 16; 639 sp = fa >> 12; 640 r0 = newtmp("abi", Kl, fn); 641 r1 = newtmp("abi", Kl, fn); 642 emit(Ostorel, Kw, R, r1, r0); 643 emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn)); 644 emit(Oadd, Kl, r0, ap, getcon(16, fn)); 645 r0 = newtmp("abi", Kl, fn); 646 r1 = newtmp("abi", Kl, fn); 647 emit(Ostorel, Kw, R, r1, r0); 648 emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn)); 649 emit(Oadd, Kl, r0, ap, getcon(8, fn)); 650 r0 = newtmp("abi", Kl, fn); 651 emit(Ostorew, Kw, R, getcon(fp, fn), r0); 652 emit(Oadd, Kl, r0, ap, getcon(4, fn)); 653 emit(Ostorew, Kw, R, getcon(gp, fn), ap); 654 } 655 656 void 657 amd64_sysv_abi(Fn *fn) 658 { 659 Blk *b; 660 Ins *i, *i0, *ip; 661 RAlloc *ral; 662 int n, fa; 663 664 for (b=fn->start; b; b=b->link) 665 b->visit = 0; 666 667 /* lower parameters */ 668 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++) 669 if (!ispar(i->op)) 670 break; 671 fa = selpar(fn, b->ins, i); 672 n = b->nins - (i - b->ins) + (&insb[NIns] - curi); 673 i0 = alloc(n * sizeof(Ins)); 674 ip = icpy(ip = i0, curi, &insb[NIns] - curi); 675 ip = icpy(ip, i, &b->ins[b->nins] - i); 676 b->nins = n; 677 b->ins = i0; 678 679 /* lower calls, returns, and vararg instructions */ 680 ral = 0; 681 b = fn->start; 682 do { 683 if (!(b = b->link)) 684 b = fn->start; /* do it last */ 685 if (b->visit) 686 continue; 687 curi = &insb[NIns]; 688 selret(b, fn); 689 for (i=&b->ins[b->nins]; i!=b->ins;) 690 switch ((--i)->op) { 691 default: 692 emiti(*i); 693 break; 694 case Ocall: 695 for (i0=i; i0>b->ins; i0--) 696 if (!isarg((i0-1)->op)) 697 break; 698 selcall(fn, i0, i, &ral); 699 i = i0; 700 break; 701 case Ovastart: 702 selvastart(fn, fa, i->arg[0]); 703 break; 704 case Ovaarg: 705 selvaarg(fn, b, i); 706 break; 707 case Oarg: 708 case Oargc: 709 die("unreachable"); 710 } 711 if (b == fn->start) 712 for (; ral; ral=ral->link) 713 emiti(ral->i); 714 b->nins = &insb[NIns] - curi; 715 idup(&b->ins, curi, b->nins); 716 } while (b != fn->start); 717 718 if (debug['A']) { 719 fprintf(stderr, "\n> After ABI lowering:\n"); 720 printfn(fn, stderr); 721 } 722 }