emit.c (13265B)
1 #include "all.h" 2 3 typedef struct E E; 4 5 struct E { 6 FILE *f; 7 Fn *fn; 8 uint64_t frame; 9 uint padding; 10 }; 11 12 #define CMP(X) \ 13 X(Cieq, "eq") \ 14 X(Cine, "ne") \ 15 X(Cisge, "ge") \ 16 X(Cisgt, "gt") \ 17 X(Cisle, "le") \ 18 X(Cislt, "lt") \ 19 X(Ciuge, "cs") \ 20 X(Ciugt, "hi") \ 21 X(Ciule, "ls") \ 22 X(Ciult, "cc") \ 23 X(NCmpI+Cfeq, "eq") \ 24 X(NCmpI+Cfge, "ge") \ 25 X(NCmpI+Cfgt, "gt") \ 26 X(NCmpI+Cfle, "ls") \ 27 X(NCmpI+Cflt, "mi") \ 28 X(NCmpI+Cfne, "ne") \ 29 X(NCmpI+Cfo, "vc") \ 30 X(NCmpI+Cfuo, "vs") 31 32 enum { 33 Ki = -1, /* matches Kw and Kl */ 34 Ka = -2, /* matches all classes */ 35 }; 36 37 static struct { 38 short op; 39 short cls; 40 char *asm; 41 } omap[] = { 42 { Oadd, Ki, "add %=, %0, %1" }, 43 { Oadd, Ka, "fadd %=, %0, %1" }, 44 { Osub, Ki, "sub %=, %0, %1" }, 45 { Osub, Ka, "fsub %=, %0, %1" }, 46 { Oneg, Ki, "neg %=, %0" }, 47 { Oneg, Ka, "fneg %=, %0" }, 48 { Oand, Ki, "and %=, %0, %1" }, 49 { Oor, Ki, "orr %=, %0, %1" }, 50 { Oxor, Ki, "eor %=, %0, %1" }, 51 { Osar, Ki, "asr %=, %0, %1" }, 52 { Oshr, Ki, "lsr %=, %0, %1" }, 53 { Oshl, Ki, "lsl %=, %0, %1" }, 54 { Omul, Ki, "mul %=, %0, %1" }, 55 { Omul, Ka, "fmul %=, %0, %1" }, 56 { Odiv, Ki, "sdiv %=, %0, %1" }, 57 { Odiv, Ka, "fdiv %=, %0, %1" }, 58 { Oudiv, Ki, "udiv %=, %0, %1" }, 59 { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, 60 { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, 61 { Ocopy, Ki, "mov %=, %0" }, 62 { Ocopy, Ka, "fmov %=, %0" }, 63 { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" }, 64 { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" }, 65 { Ostoreb, Kw, "strb %W0, %M1" }, 66 { Ostoreh, Kw, "strh %W0, %M1" }, 67 { Ostorew, Kw, "str %W0, %M1" }, 68 { Ostorel, Kw, "str %L0, %M1" }, 69 { Ostores, Kw, "str %S0, %M1" }, 70 { Ostored, Kw, "str %D0, %M1" }, 71 { Oloadsb, Ki, "ldrsb %=, %M0" }, 72 { Oloadub, Ki, "ldrb %W=, %M0" }, 73 { Oloadsh, Ki, "ldrsh %=, %M0" }, 74 { Oloaduh, Ki, "ldrh %W=, %M0" }, 75 { Oloadsw, Kw, "ldr %=, %M0" }, 76 { Oloadsw, Kl, "ldrsw %=, %M0" }, 77 { Oloaduw, Ki, "ldr %W=, %M0" }, 78 { Oload, Ka, "ldr %=, %M0" }, 79 { Oextsb, Ki, "sxtb %=, %W0" }, 80 { Oextub, Ki, "uxtb %W=, %W0" }, 81 { Oextsh, Ki, "sxth %=, %W0" }, 82 { Oextuh, Ki, "uxth %W=, %W0" }, 83 { Oextsw, Ki, "sxtw %L=, %W0" }, 84 { Oextuw, Ki, "mov %W=, %W0" }, 85 { Oexts, Kd, "fcvt %=, %S0" }, 86 { Otruncd, Ks, "fcvt %=, %D0" }, 87 { Ocast, Kw, "fmov %=, %S0" }, 88 { Ocast, Kl, "fmov %=, %D0" }, 89 { Ocast, Ks, "fmov %=, %W0" }, 90 { Ocast, Kd, "fmov %=, %L0" }, 91 { Ostosi, Ka, "fcvtzs %=, %S0" }, 92 { Ostoui, Ka, "fcvtzu %=, %S0" }, 93 { Odtosi, Ka, "fcvtzs %=, %D0" }, 94 { Odtoui, Ka, "fcvtzu %=, %D0" }, 95 { Oswtof, Ka, "scvtf %=, %W0" }, 96 { Ouwtof, Ka, "ucvtf %=, %W0" }, 97 { Osltof, Ka, "scvtf %=, %L0" }, 98 { Oultof, Ka, "ucvtf %=, %L0" }, 99 { Ocall, Kw, "blr %L0" }, 100 101 { Oacmp, Ki, "cmp %0, %1" }, 102 { Oacmn, Ki, "cmn %0, %1" }, 103 { Oafcmp, Ka, "fcmpe %0, %1" }, 104 105 #define X(c, str) \ 106 { Oflag+c, Ki, "cset %=, " str }, 107 CMP(X) 108 #undef X 109 { NOp, 0, 0 } 110 }; 111 112 static char * 113 rname(int r, int k) 114 { 115 static char buf[4]; 116 117 if (r == SP) { 118 assert(k == Kl); 119 sprintf(buf, "sp"); 120 } 121 else if (R0 <= r && r <= LR) 122 switch (k) { 123 default: die("invalid class"); 124 case Kw: sprintf(buf, "w%d", r-R0); break; 125 case Kx: 126 case Kl: sprintf(buf, "x%d", r-R0); break; 127 } 128 else if (V0 <= r && r <= V30) 129 switch (k) { 130 default: die("invalid class"); 131 case Ks: sprintf(buf, "s%d", r-V0); break; 132 case Kx: 133 case Kd: sprintf(buf, "d%d", r-V0); break; 134 } 135 else 136 die("invalid register"); 137 return buf; 138 } 139 140 static uint64_t 141 slot(Ref r, E *e) 142 { 143 int s; 144 145 s = rsval(r); 146 if (s == -1) 147 return 16 + e->frame; 148 if (s < 0) { 149 if (e->fn->vararg && !T.apple) 150 return 16 + e->frame + 192 - (s+2); 151 else 152 return 16 + e->frame - (s+2); 153 } else 154 return 16 + e->padding + 4 * s; 155 } 156 157 static void 158 emitf(char *s, Ins *i, E *e) 159 { 160 Ref r; 161 int k, c; 162 Con *pc; 163 uint n, sp; 164 165 fputc('\t', e->f); 166 167 sp = 0; 168 for (;;) { 169 k = i->cls; 170 while ((c = *s++) != '%') 171 if (c == ' ' && !sp) { 172 fputc('\t', e->f); 173 sp = 1; 174 } else if ( !c) { 175 fputc('\n', e->f); 176 return; 177 } else 178 fputc(c, e->f); 179 Switch: 180 switch ((c = *s++)) { 181 default: 182 die("invalid escape"); 183 case 'W': 184 k = Kw; 185 goto Switch; 186 case 'L': 187 k = Kl; 188 goto Switch; 189 case 'S': 190 k = Ks; 191 goto Switch; 192 case 'D': 193 k = Kd; 194 goto Switch; 195 case '?': 196 if (KBASE(k) == 0) 197 fputs(rname(R18, k), e->f); 198 else 199 fputs(k==Ks ? "s31" : "d31", e->f); 200 break; 201 case '=': 202 case '0': 203 r = c == '=' ? i->to : i->arg[0]; 204 assert(isreg(r)); 205 fputs(rname(r.val, k), e->f); 206 break; 207 case '1': 208 r = i->arg[1]; 209 switch (rtype(r)) { 210 default: 211 die("invalid second argument"); 212 case RTmp: 213 assert(isreg(r)); 214 fputs(rname(r.val, k), e->f); 215 break; 216 case RCon: 217 pc = &e->fn->con[r.val]; 218 n = pc->bits.i; 219 assert(pc->type == CBits); 220 if (n & 0xfff000) 221 fprintf(e->f, "#%u, lsl #12", n>>12); 222 else 223 fprintf(e->f, "#%u", n); 224 break; 225 } 226 break; 227 case 'M': 228 c = *s++; 229 assert(c == '0' || c == '1' || c == '='); 230 r = c == '=' ? i->to : i->arg[c - '0']; 231 switch (rtype(r)) { 232 default: 233 die("todo (arm emit): unhandled ref"); 234 case RTmp: 235 assert(isreg(r)); 236 fprintf(e->f, "[%s]", rname(r.val, Kl)); 237 break; 238 case RSlot: 239 fprintf(e->f, "[x29, %"PRIu64"]", slot(r, e)); 240 break; 241 } 242 break; 243 } 244 } 245 } 246 247 static void 248 loadaddr(Con *c, char *rn, E *e) 249 { 250 char *p, *l, *s; 251 252 switch (c->sym.type) { 253 default: 254 die("unreachable"); 255 case SGlo: 256 if (T.apple) 257 s = "\tadrp\tR, S@pageO\n" 258 "\tadd\tR, R, S@pageoffO\n"; 259 else 260 s = "\tadrp\tR, SO\n" 261 "\tadd\tR, R, #:lo12:SO\n"; 262 break; 263 case SThr: 264 if (T.apple) 265 s = "\tadrp\tR, S@tlvppage\n" 266 "\tldr\tR, [R, S@tlvppageoff]\n"; 267 else 268 s = "\tmrs\tR, tpidr_el0\n" 269 "\tadd\tR, R, #:tprel_hi12:SO, lsl #12\n" 270 "\tadd\tR, R, #:tprel_lo12_nc:SO\n"; 271 break; 272 } 273 274 l = str(c->sym.id); 275 p = l[0] == '"' ? "" : T.assym; 276 for (; *s; s++) 277 switch (*s) { 278 default: 279 fputc(*s, e->f); 280 break; 281 case 'R': 282 fputs(rn, e->f); 283 break; 284 case 'S': 285 fputs(p, e->f); 286 fputs(l, e->f); 287 break; 288 case 'O': 289 if (c->bits.i) 290 /* todo, handle large offsets */ 291 fprintf(e->f, "+%"PRIi64, c->bits.i); 292 break; 293 } 294 } 295 296 static void 297 loadcon(Con *c, int r, int k, E *e) 298 { 299 char *rn; 300 int64_t n; 301 int w, sh; 302 303 w = KWIDE(k); 304 rn = rname(r, k); 305 n = c->bits.i; 306 if (c->type == CAddr) { 307 loadaddr(c, rn, e); 308 return; 309 } 310 assert(c->type == CBits); 311 if (!w) 312 n = (int32_t)n; 313 if ((n | 0xffff) == -1 || arm64_logimm(n, k)) { 314 fprintf(e->f, "\tmov\t%s, #%"PRIi64"\n", rn, n); 315 } else { 316 fprintf(e->f, "\tmov\t%s, #%d\n", 317 rn, (int)(n & 0xffff)); 318 for (sh=16; n>>=16; sh+=16) { 319 if ((!w && sh == 32) || sh == 64) 320 break; 321 fprintf(e->f, "\tmovk\t%s, #0x%x, lsl #%d\n", 322 rn, (uint)(n & 0xffff), sh); 323 } 324 } 325 } 326 327 static void emitins(Ins *, E *); 328 329 static void 330 fixarg(Ref *pr, int sz, E *e) 331 { 332 Ins *i; 333 Ref r; 334 uint64_t s; 335 336 r = *pr; 337 if (rtype(r) == RSlot) { 338 s = slot(r, e); 339 if (s > sz * 4095u) { 340 i = &(Ins){Oaddr, Kl, TMP(IP0), {r}}; 341 emitins(i, e); 342 *pr = TMP(IP0); 343 } 344 } 345 } 346 347 static void 348 emitins(Ins *i, E *e) 349 { 350 char *l, *p, *rn; 351 uint64_t s; 352 int o; 353 Ref r; 354 Con *c; 355 356 switch (i->op) { 357 default: 358 if (isload(i->op)) 359 fixarg(&i->arg[0], loadsz(i), e); 360 if (isstore(i->op)) 361 fixarg(&i->arg[1], storesz(i), e); 362 Table: 363 /* most instructions are just pulled out of 364 * the table omap[], some special cases are 365 * detailed below */ 366 for (o=0;; o++) { 367 /* this linear search should really be a binary 368 * search */ 369 if (omap[o].op == NOp) 370 die("no match for %s(%c)", 371 optab[i->op].name, "wlsd"[i->cls]); 372 if (omap[o].op == i->op) 373 if (omap[o].cls == i->cls || omap[o].cls == Ka 374 || (omap[o].cls == Ki && KBASE(i->cls) == 0)) 375 break; 376 } 377 emitf(omap[o].asm, i, e); 378 break; 379 case Onop: 380 break; 381 case Ocopy: 382 if (req(i->to, i->arg[0])) 383 break; 384 if (rtype(i->to) == RSlot) { 385 r = i->to; 386 if (!isreg(i->arg[0])) { 387 i->to = TMP(R18); 388 emitins(i, e); 389 i->arg[0] = i->to; 390 } 391 i->op = Ostorew + i->cls; 392 i->cls = Kw; 393 i->arg[1] = r; 394 emitins(i, e); 395 break; 396 } 397 assert(isreg(i->to)); 398 switch (rtype(i->arg[0])) { 399 case RCon: 400 c = &e->fn->con[i->arg[0].val]; 401 loadcon(c, i->to.val, i->cls, e); 402 break; 403 case RSlot: 404 i->op = Oload; 405 emitins(i, e); 406 break; 407 default: 408 assert(i->to.val != R18); 409 goto Table; 410 } 411 break; 412 case Oaddr: 413 assert(rtype(i->arg[0]) == RSlot); 414 rn = rname(i->to.val, Kl); 415 s = slot(i->arg[0], e); 416 if (s <= 4095) 417 fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n", rn, s); 418 else if (s <= 65535) 419 fprintf(e->f, 420 "\tmov\t%s, #%"PRIu64"\n" 421 "\tadd\t%s, x29, %s\n", 422 rn, s, rn, rn 423 ); 424 else 425 fprintf(e->f, 426 "\tmov\t%s, #%"PRIu64"\n" 427 "\tmovk\t%s, #%"PRIu64", lsl #16\n" 428 "\tadd\t%s, x29, %s\n", 429 rn, s & 0xFFFF, rn, s >> 16, rn, rn 430 ); 431 break; 432 case Ocall: 433 if (rtype(i->arg[0]) != RCon) 434 goto Table; 435 c = &e->fn->con[i->arg[0].val]; 436 if (c->type != CAddr 437 || c->sym.type != SGlo 438 || c->bits.i) 439 die("invalid call argument"); 440 l = str(c->sym.id); 441 p = l[0] == '"' ? "" : T.assym; 442 fprintf(e->f, "\tbl\t%s%s\n", p, l); 443 break; 444 case Osalloc: 445 emitf("sub sp, sp, %0", i, e); 446 if (!req(i->to, R)) 447 emitf("mov %=, sp", i, e); 448 break; 449 case Odbgloc: 450 emitdbgloc(i->arg[0].val, e->f); 451 break; 452 } 453 } 454 455 static void 456 framelayout(E *e) 457 { 458 int *r; 459 uint o; 460 uint64_t f; 461 462 for (o=0, r=arm64_rclob; *r>=0; r++) 463 o += 1 & (e->fn->reg >> *r); 464 f = e->fn->slot; 465 f = (f + 3) & -4; 466 o += o & 1; 467 e->padding = 4*(f-e->fn->slot); 468 e->frame = 4*f + 8*o; 469 } 470 471 /* 472 473 Stack-frame layout: 474 475 +=============+ 476 | varargs | 477 | save area | 478 +-------------+ 479 | callee-save | ^ 480 | registers | | 481 +-------------+ | 482 | ... | | 483 | spill slots | | 484 | ... | | e->frame 485 +-------------+ | 486 | ... | | 487 | locals | | 488 | ... | | 489 +-------------+ | 490 | e->padding | v 491 +-------------+ 492 | saved x29 | 493 | saved x30 | 494 +=============+ <- x29 495 496 */ 497 498 void 499 arm64_emitfn(Fn *fn, FILE *out) 500 { 501 static char *ctoa[] = { 502 #define X(c, s) [c] = s, 503 CMP(X) 504 #undef X 505 }; 506 static int id0; 507 int s, n, c, lbl, *r; 508 uint64_t o; 509 Blk *b, *t; 510 Ins *i; 511 E *e; 512 513 e = &(E){.f = out, .fn = fn}; 514 if (T.apple) 515 e->fn->lnk.align = 4; 516 emitfnlnk(e->fn->name, &e->fn->lnk, e->f); 517 framelayout(e); 518 519 if (e->fn->vararg && !T.apple) { 520 for (n=7; n>=0; n--) 521 fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n); 522 for (n=7; n>=0; n-=2) 523 fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n", n-1, n); 524 } 525 526 if (e->frame + 16 <= 512) 527 fprintf(e->f, 528 "\tstp\tx29, x30, [sp, -%"PRIu64"]!\n", 529 e->frame + 16 530 ); 531 else if (e->frame <= 4095) 532 fprintf(e->f, 533 "\tsub\tsp, sp, #%"PRIu64"\n" 534 "\tstp\tx29, x30, [sp, -16]!\n", 535 e->frame 536 ); 537 else if (e->frame <= 65535) 538 fprintf(e->f, 539 "\tmov\tx16, #%"PRIu64"\n" 540 "\tsub\tsp, sp, x16\n" 541 "\tstp\tx29, x30, [sp, -16]!\n", 542 e->frame 543 ); 544 else 545 fprintf(e->f, 546 "\tmov\tx16, #%"PRIu64"\n" 547 "\tmovk\tx16, #%"PRIu64", lsl #16\n" 548 "\tsub\tsp, sp, x16\n" 549 "\tstp\tx29, x30, [sp, -16]!\n", 550 e->frame & 0xFFFF, e->frame >> 16 551 ); 552 fputs("\tmov\tx29, sp\n", e->f); 553 s = (e->frame - e->padding) / 4; 554 for (r=arm64_rclob; *r>=0; r++) 555 if (e->fn->reg & BIT(*r)) { 556 s -= 2; 557 i = &(Ins){.arg = {TMP(*r), SLOT(s)}}; 558 i->op = *r >= V0 ? Ostored : Ostorel; 559 emitins(i, e); 560 } 561 562 for (lbl=0, b=e->fn->start; b; b=b->link) { 563 if (lbl || b->npred > 1) 564 fprintf(e->f, "%s%d:\n", T.asloc, id0+b->id); 565 for (i=b->ins; i!=&b->ins[b->nins]; i++) 566 emitins(i, e); 567 lbl = 1; 568 switch (b->jmp.type) { 569 case Jhlt: 570 fprintf(e->f, "\tbrk\t#1000\n"); 571 break; 572 case Jret0: 573 s = (e->frame - e->padding) / 4; 574 for (r=arm64_rclob; *r>=0; r++) 575 if (e->fn->reg & BIT(*r)) { 576 s -= 2; 577 i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}}; 578 i->cls = *r >= V0 ? Kd : Kl; 579 emitins(i, e); 580 } 581 if (e->fn->dynalloc) 582 fputs("\tmov sp, x29\n", e->f); 583 o = e->frame + 16; 584 if (e->fn->vararg && !T.apple) 585 o += 192; 586 if (o <= 504) 587 fprintf(e->f, 588 "\tldp\tx29, x30, [sp], %"PRIu64"\n", 589 o 590 ); 591 else if (o - 16 <= 4095) 592 fprintf(e->f, 593 "\tldp\tx29, x30, [sp], 16\n" 594 "\tadd\tsp, sp, #%"PRIu64"\n", 595 o - 16 596 ); 597 else if (o - 16 <= 65535) 598 fprintf(e->f, 599 "\tldp\tx29, x30, [sp], 16\n" 600 "\tmov\tx16, #%"PRIu64"\n" 601 "\tadd\tsp, sp, x16\n", 602 o - 16 603 ); 604 else 605 fprintf(e->f, 606 "\tldp\tx29, x30, [sp], 16\n" 607 "\tmov\tx16, #%"PRIu64"\n" 608 "\tmovk\tx16, #%"PRIu64", lsl #16\n" 609 "\tadd\tsp, sp, x16\n", 610 (o - 16) & 0xFFFF, (o - 16) >> 16 611 ); 612 fprintf(e->f, "\tret\n"); 613 break; 614 case Jjmp: 615 Jmp: 616 if (b->s1 != b->link) 617 fprintf(e->f, 618 "\tb\t%s%d\n", 619 T.asloc, id0+b->s1->id 620 ); 621 else 622 lbl = 0; 623 break; 624 default: 625 c = b->jmp.type - Jjf; 626 if (c < 0 || c > NCmp) 627 die("unhandled jump %d", b->jmp.type); 628 if (b->link == b->s2) { 629 t = b->s1; 630 b->s1 = b->s2; 631 b->s2 = t; 632 } else 633 c = cmpneg(c); 634 fprintf(e->f, 635 "\tb%s\t%s%d\n", 636 ctoa[c], T.asloc, id0+b->s2->id 637 ); 638 goto Jmp; 639 } 640 } 641 id0 += e->fn->nblk; 642 if (!T.apple) 643 elf_emitfnfin(fn->name, out); 644 }