qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

abi.c (16696B)


      1 #include "all.h"
      2 
      3 typedef struct Abi Abi;
      4 typedef struct Class Class;
      5 typedef struct Insl Insl;
      6 typedef struct Params Params;
      7 
      8 enum {
      9 	Cstk = 1, /* pass on the stack */
     10 	Cptr = 2, /* replaced by a pointer */
     11 };
     12 
     13 struct Class {
     14 	char class;
     15 	char ishfa;
     16 	struct {
     17 		char base;
     18 		uchar size;
     19 	} hfa;
     20 	uint size;
     21 	uint align;
     22 	Typ *t;
     23 	uchar nreg;
     24 	uchar ngp;
     25 	uchar nfp;
     26 	int reg[4];
     27 	int cls[4];
     28 };
     29 
     30 struct Insl {
     31 	Ins i;
     32 	Insl *link;
     33 };
     34 
     35 struct Params {
     36 	uint ngp;
     37 	uint nfp;
     38 	uint stk;
     39 };
     40 
     41 static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
     42 static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
     43 static int store[] = {
     44 	[Kw] = Ostorew, [Kl] = Ostorel,
     45 	[Ks] = Ostores, [Kd] = Ostored
     46 };
     47 
     48 /* layout of call's second argument (RCall)
     49  *
     50  *         13
     51  *  29   14 |    9    5   2  0
     52  *  |0.00|x|x|xxxx|xxxx|xxx|xx|                  range
     53  *        | |    |    |   |  ` gp regs returned (0..2)
     54  *        | |    |    |   ` fp regs returned    (0..4)
     55  *        | |    |    ` gp regs passed          (0..8)
     56  *        | |     ` fp regs passed              (0..8)
     57  *        | ` indirect result register x8 used  (0..1)
     58  *        ` env pointer passed in x9            (0..1)
     59  */
     60 
     61 static int
     62 isfloatv(Typ *t, char *cls)
     63 {
     64 	Field *f;
     65 	uint n;
     66 
     67 	for (n=0; n<t->nunion; n++)
     68 		for (f=t->fields[n]; f->type != FEnd; f++)
     69 			switch (f->type) {
     70 			case Fs:
     71 				if (*cls == Kd)
     72 					return 0;
     73 				*cls = Ks;
     74 				break;
     75 			case Fd:
     76 				if (*cls == Ks)
     77 					return 0;
     78 				*cls = Kd;
     79 				break;
     80 			case FTyp:
     81 				if (isfloatv(&typ[f->len], cls))
     82 					break;
     83 				/* fall through */
     84 			default:
     85 				return 0;
     86 			}
     87 	return 1;
     88 }
     89 
     90 static void
     91 typclass(Class *c, Typ *t, int *gp, int *fp)
     92 {
     93 	uint64_t sz;
     94 	uint n;
     95 
     96 	sz = (t->size + 7) & -8;
     97 	c->t = t;
     98 	c->class = 0;
     99 	c->ngp = 0;
    100 	c->nfp = 0;
    101 	c->align = 8;
    102 
    103 	if (t->align > 3)
    104 		err("alignments larger than 8 are not supported");
    105 
    106 	if (t->isdark || sz > 16 || sz == 0) {
    107 		/* large structs are replaced by a
    108 		 * pointer to some caller-allocated
    109 		 * memory */
    110 		c->class |= Cptr;
    111 		c->size = 8;
    112 		c->ngp = 1;
    113 		*c->reg = *gp;
    114 		*c->cls = Kl;
    115 		return;
    116 	}
    117 
    118 	c->size = sz;
    119 	c->hfa.base = Kx;
    120 	c->ishfa = isfloatv(t, &c->hfa.base);
    121 	c->hfa.size = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
    122 
    123 	if (c->ishfa)
    124 		for (n=0; n<c->hfa.size; n++, c->nfp++) {
    125 			c->reg[n] = *fp++;
    126 			c->cls[n] = c->hfa.base;
    127 		}
    128 	else
    129 		for (n=0; n<sz/8; n++, c->ngp++) {
    130 			c->reg[n] = *gp++;
    131 			c->cls[n] = Kl;
    132 		}
    133 
    134 	c->nreg = n;
    135 }
    136 
    137 static void
    138 sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
    139 {
    140 	uint n;
    141 	uint64_t off;
    142 	Ref r;
    143 
    144 	assert(nreg <= 4);
    145 	off = 0;
    146 	for (n=0; n<nreg; n++) {
    147 		tmp[n] = newtmp("abi", cls[n], fn);
    148 		r = newtmp("abi", Kl, fn);
    149 		emit(store[cls[n]], 0, R, tmp[n], r);
    150 		emit(Oadd, Kl, r, mem, getcon(off, fn));
    151 		off += KWIDE(cls[n]) ? 8 : 4;
    152 	}
    153 }
    154 
    155 /* todo, may read out of bounds */
    156 static void
    157 ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
    158 {
    159 	int i;
    160 	uint64_t off;
    161 	Ref r;
    162 
    163 	off = 0;
    164 	for (i=0; i<n; i++) {
    165 		r = newtmp("abi", Kl, fn);
    166 		emit(Oload, cls[i], TMP(reg[i]), r, R);
    167 		emit(Oadd, Kl, r, mem, getcon(off, fn));
    168 		off += KWIDE(cls[i]) ? 8 : 4;
    169 	}
    170 }
    171 
    172 static void
    173 selret(Blk *b, Fn *fn)
    174 {
    175 	int j, k, cty;
    176 	Ref r;
    177 	Class cr;
    178 
    179 	j = b->jmp.type;
    180 
    181 	if (!isret(j) || j == Jret0)
    182 		return;
    183 
    184 	r = b->jmp.arg;
    185 	b->jmp.type = Jret0;
    186 
    187 	if (j == Jretc) {
    188 		typclass(&cr, &typ[fn->retty], gpreg, fpreg);
    189 		if (cr.class & Cptr) {
    190 			assert(rtype(fn->retr) == RTmp);
    191 			emit(Oblit1, 0, R, INT(cr.t->size), R);
    192 			emit(Oblit0, 0, R, r, fn->retr);
    193 			cty = 0;
    194 		} else {
    195 			ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
    196 			cty = (cr.nfp << 2) | cr.ngp;
    197 		}
    198 	} else {
    199 		k = j - Jretw;
    200 		if (KBASE(k) == 0) {
    201 			emit(Ocopy, k, TMP(R0), r, R);
    202 			cty = 1;
    203 		} else {
    204 			emit(Ocopy, k, TMP(V0), r, R);
    205 			cty = 1 << 2;
    206 		}
    207 	}
    208 
    209 	b->jmp.arg = CALL(cty);
    210 }
    211 
    212 static int
    213 argsclass(Ins *i0, Ins *i1, Class *carg)
    214 {
    215 	int va, envc, ngp, nfp, *gp, *fp;
    216 	Class *c;
    217 	Ins *i;
    218 
    219 	va = 0;
    220 	envc = 0;
    221 	gp = gpreg;
    222 	fp = fpreg;
    223 	ngp = 8;
    224 	nfp = 8;
    225 	for (i=i0, c=carg; i<i1; i++, c++)
    226 		switch (i->op) {
    227 		case Oargsb:
    228 		case Oargub:
    229 		case Oparsb:
    230 		case Oparub:
    231 			c->size = 1;
    232 			goto Scalar;
    233 		case Oargsh:
    234 		case Oarguh:
    235 		case Oparsh:
    236 		case Oparuh:
    237 			c->size = 2;
    238 			goto Scalar;
    239 		case Opar:
    240 		case Oarg:
    241 			c->size = 8;
    242 			if (T.apple && !KWIDE(i->cls))
    243 				c->size = 4;
    244 		Scalar:
    245 			c->align = c->size;
    246 			*c->cls = i->cls;
    247 			if (va) {
    248 				c->class |= Cstk;
    249 				c->size = 8;
    250 				c->align = 8;
    251 				break;
    252 			}
    253 			if (KBASE(i->cls) == 0 && ngp > 0) {
    254 				ngp--;
    255 				*c->reg = *gp++;
    256 				break;
    257 			}
    258 			if (KBASE(i->cls) == 1 && nfp > 0) {
    259 				nfp--;
    260 				*c->reg = *fp++;
    261 				break;
    262 			}
    263 			c->class |= Cstk;
    264 			break;
    265 		case Oparc:
    266 		case Oargc:
    267 			typclass(c, &typ[i->arg[0].val], gp, fp);
    268 			if (c->ngp <= ngp) {
    269 				if (c->nfp <= nfp) {
    270 					ngp -= c->ngp;
    271 					nfp -= c->nfp;
    272 					gp += c->ngp;
    273 					fp += c->nfp;
    274 					break;
    275 				} else
    276 					nfp = 0;
    277 			} else
    278 				ngp = 0;
    279 			c->class |= Cstk;
    280 			break;
    281 		case Opare:
    282 		case Oarge:
    283 			*c->reg = R9;
    284 			*c->cls = Kl;
    285 			envc = 1;
    286 			break;
    287 		case Oargv:
    288 			va = T.apple != 0;
    289 			break;
    290 		default:
    291 			die("unreachable");
    292 		}
    293 
    294 	return envc << 14 | (gp-gpreg) << 5 | (fp-fpreg) << 9;
    295 }
    296 
    297 bits
    298 arm64_retregs(Ref r, int p[2])
    299 {
    300 	bits b;
    301 	int ngp, nfp;
    302 
    303 	assert(rtype(r) == RCall);
    304 	ngp = r.val & 3;
    305 	nfp = (r.val >> 2) & 7;
    306 	if (p) {
    307 		p[0] = ngp;
    308 		p[1] = nfp;
    309 	}
    310 	b = 0;
    311 	while (ngp--)
    312 		b |= BIT(R0+ngp);
    313 	while (nfp--)
    314 		b |= BIT(V0+nfp);
    315 	return b;
    316 }
    317 
    318 bits
    319 arm64_argregs(Ref r, int p[2])
    320 {
    321 	bits b;
    322 	int ngp, nfp, x8, x9;
    323 
    324 	assert(rtype(r) == RCall);
    325 	ngp = (r.val >> 5) & 15;
    326 	nfp = (r.val >> 9) & 15;
    327 	x8 = (r.val >> 13) & 1;
    328 	x9 = (r.val >> 14) & 1;
    329 	if (p) {
    330 		p[0] = ngp + x8 + x9;
    331 		p[1] = nfp;
    332 	}
    333 	b = 0;
    334 	while (ngp--)
    335 		b |= BIT(R0+ngp);
    336 	while (nfp--)
    337 		b |= BIT(V0+nfp);
    338 	return b | ((bits)x8 << R8) | ((bits)x9 << R9);
    339 }
    340 
    341 static void
    342 stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
    343 {
    344 	Insl *il;
    345 	int al;
    346 	uint64_t sz;
    347 
    348 	il = alloc(sizeof *il);
    349 	al = c->t->align - 2; /* NAlign == 3 */
    350 	if (al < 0)
    351 		al = 0;
    352 	sz = c->class & Cptr ? c->t->size : c->size;
    353 	il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
    354 	il->link = *ilp;
    355 	*ilp = il;
    356 }
    357 
    358 static uint
    359 align(uint x, uint al)
    360 {
    361 	return (x + al-1) & -al;
    362 }
    363 
    364 static void
    365 selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
    366 {
    367 	Ins *i;
    368 	Class *ca, *c, cr;
    369 	int op, cty;
    370 	uint n, stk, off;;
    371 	Ref r, rstk, tmp[4];
    372 
    373 	ca = alloc((i1-i0) * sizeof ca[0]);
    374 	cty = argsclass(i0, i1, ca);
    375 
    376 	stk = 0;
    377 	for (i=i0, c=ca; i<i1; i++, c++) {
    378 		if (c->class & Cptr) {
    379 			i->arg[0] = newtmp("abi", Kl, fn);
    380 			stkblob(i->arg[0], c, fn, ilp);
    381 			i->op = Oarg;
    382 		}
    383 		if (c->class & Cstk) {
    384 			stk = align(stk, c->align);
    385 			stk += c->size;
    386 		}
    387 	}
    388 	stk = align(stk, 16);
    389 	rstk = getcon(stk, fn);
    390 	if (stk)
    391 		emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
    392 
    393 	if (!req(i1->arg[1], R)) {
    394 		typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
    395 		stkblob(i1->to, &cr, fn, ilp);
    396 		cty |= (cr.nfp << 2) | cr.ngp;
    397 		if (cr.class & Cptr) {
    398 			/* spill & rega expect calls to be
    399 			 * followed by copies from regs,
    400 			 * so we emit a dummy
    401 			 */
    402 			cty |= 1 << 13 | 1;
    403 			emit(Ocopy, Kw, R, TMP(R0), R);
    404 		} else {
    405 			sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
    406 			for (n=0; n<cr.nreg; n++) {
    407 				r = TMP(cr.reg[n]);
    408 				emit(Ocopy, cr.cls[n], tmp[n], r, R);
    409 			}
    410 		}
    411 	} else {
    412 		if (KBASE(i1->cls) == 0) {
    413 			emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
    414 			cty |= 1;
    415 		} else {
    416 			emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
    417 			cty |= 1 << 2;
    418 		}
    419 	}
    420 
    421 	emit(Ocall, 0, R, i1->arg[0], CALL(cty));
    422 
    423 	if (cty & (1 << 13))
    424 		/* struct return argument */
    425 		emit(Ocopy, Kl, TMP(R8), i1->to, R);
    426 
    427 	for (i=i0, c=ca; i<i1; i++, c++) {
    428 		if ((c->class & Cstk) != 0)
    429 			continue;
    430 		if (i->op == Oarg || i->op == Oarge)
    431 			emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
    432 		if (i->op == Oargc)
    433 			ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
    434 	}
    435 
    436 	/* populate the stack */
    437 	off = 0;
    438 	for (i=i0, c=ca; i<i1; i++, c++) {
    439 		if ((c->class & Cstk) == 0)
    440 			continue;
    441 		off = align(off, c->align);
    442 		r = newtmp("abi", Kl, fn);
    443 		if (i->op == Oarg || isargbh(i->op)) {
    444 			switch (c->size) {
    445 			case 1: op = Ostoreb; break;
    446 			case 2: op = Ostoreh; break;
    447 			case 4:
    448 			case 8: op = store[*c->cls]; break;
    449 			default: die("unreachable");
    450 			}
    451 			emit(op, 0, R, i->arg[0], r);
    452 		} else {
    453 			assert(i->op == Oargc);
    454 			emit(Oblit1, 0, R, INT(c->size), R);
    455 			emit(Oblit0, 0, R, i->arg[1], r);
    456 		}
    457 		emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
    458 		off += c->size;
    459 	}
    460 	if (stk)
    461 		emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
    462 
    463 	for (i=i0, c=ca; i<i1; i++, c++)
    464 		if (c->class & Cptr) {
    465 			emit(Oblit1, 0, R, INT(c->t->size), R);
    466 			emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
    467 		}
    468 }
    469 
    470 static Params
    471 selpar(Fn *fn, Ins *i0, Ins *i1)
    472 {
    473 	Class *ca, *c, cr;
    474 	Insl *il;
    475 	Ins *i;
    476 	int op, n, cty;
    477 	uint off;
    478 	Ref r, tmp[16], *t;
    479 
    480 	ca = alloc((i1-i0) * sizeof ca[0]);
    481 	curi = &insb[NIns];
    482 
    483 	cty = argsclass(i0, i1, ca);
    484 	fn->reg = arm64_argregs(CALL(cty), 0);
    485 
    486 	il = 0;
    487 	t = tmp;
    488 	for (i=i0, c=ca; i<i1; i++, c++) {
    489 		if (i->op != Oparc || (c->class & (Cptr|Cstk)))
    490 			continue;
    491 		sttmps(t, c->cls, c->nreg, i->to, fn);
    492 		stkblob(i->to, c, fn, &il);
    493 		t += c->nreg;
    494 	}
    495 	for (; il; il=il->link)
    496 		emiti(il->i);
    497 
    498 	if (fn->retty >= 0) {
    499 		typclass(&cr, &typ[fn->retty], gpreg, fpreg);
    500 		if (cr.class & Cptr) {
    501 			fn->retr = newtmp("abi", Kl, fn);
    502 			emit(Ocopy, Kl, fn->retr, TMP(R8), R);
    503 			fn->reg |= BIT(R8);
    504 		}
    505 	}
    506 
    507 	t = tmp;
    508 	off = 0;
    509 	for (i=i0, c=ca; i<i1; i++, c++)
    510 		if (i->op == Oparc && !(c->class & Cptr)) {
    511 			if (c->class & Cstk) {
    512 				off = align(off, c->align);
    513 				fn->tmp[i->to.val].slot = -(off+2);
    514 				off += c->size;
    515 			} else
    516 				for (n=0; n<c->nreg; n++) {
    517 					r = TMP(c->reg[n]);
    518 					emit(Ocopy, c->cls[n], *t++, r, R);
    519 				}
    520 		} else if (c->class & Cstk) {
    521 			off = align(off, c->align);
    522 			if (isparbh(i->op))
    523 				op = Oloadsb + (i->op - Oparsb);
    524 			else
    525 				op = Oload;
    526 			emit(op, *c->cls, i->to, SLOT(-(off+2)), R);
    527 			off += c->size;
    528 		} else {
    529 			emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
    530 		}
    531 
    532 	return (Params){
    533 		.stk = align(off, 8),
    534 		.ngp = (cty >> 5) & 15,
    535 		.nfp = (cty >> 9) & 15
    536 	};
    537 }
    538 
    539 static Blk *
    540 split(Fn *fn, Blk *b)
    541 {
    542 	Blk *bn;
    543 
    544 	++fn->nblk;
    545 	bn = newblk();
    546 	bn->nins = &insb[NIns] - curi;
    547 	idup(&bn->ins, curi, bn->nins);
    548 	curi = &insb[NIns];
    549 	bn->visit = ++b->visit;
    550 	strf(bn->name, "%s.%d", b->name, b->visit);
    551 	bn->loop = b->loop;
    552 	bn->link = b->link;
    553 	b->link = bn;
    554 	return bn;
    555 }
    556 
    557 static void
    558 chpred(Blk *b, Blk *bp, Blk *bp1)
    559 {
    560 	Phi *p;
    561 	uint a;
    562 
    563 	for (p=b->phi; p; p=p->link) {
    564 		for (a=0; p->blk[a]!=bp; a++)
    565 			assert(a+1<p->narg);
    566 		p->blk[a] = bp1;
    567 	}
    568 }
    569 
    570 static void
    571 apple_selvaarg(Fn *fn, Blk *b, Ins *i)
    572 {
    573 	Ref ap, stk, stk8, c8;
    574 
    575 	(void)b;
    576 	c8 = getcon(8, fn);
    577 	ap = i->arg[0];
    578 	stk8 = newtmp("abi", Kl, fn);
    579 	stk = newtmp("abi", Kl, fn);
    580 
    581 	emit(Ostorel, 0, R, stk8, ap);
    582 	emit(Oadd, Kl, stk8, stk, c8);
    583 	emit(Oload, i->cls, i->to, stk, R);
    584 	emit(Oload, Kl, stk, ap, R);
    585 }
    586 
    587 static void
    588 arm64_selvaarg(Fn *fn, Blk *b, Ins *i)
    589 {
    590 	Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
    591 	Blk *b0, *bstk, *breg;
    592 	int isgp;
    593 
    594 	c8 = getcon(8, fn);
    595 	c16 = getcon(16, fn);
    596 	c24 = getcon(24, fn);
    597 	c28 = getcon(28, fn);
    598 	ap = i->arg[0];
    599 	isgp = KBASE(i->cls) == 0;
    600 
    601 	/* @b [...]
    602 	       r0 =l add ap, (24 or 28)
    603 	       nr =l loadsw r0
    604 	       r1 =w csltw nr, 0
    605 	       jnz r1, @breg, @bstk
    606 	   @breg
    607 	       r0 =l add ap, (8 or 16)
    608 	       r1 =l loadl r0
    609 	       lreg =l add r1, nr
    610 	       r0 =w add nr, (8 or 16)
    611 	       r1 =l add ap, (24 or 28)
    612 	       storew r0, r1
    613 	   @bstk
    614 	       lstk =l loadl ap
    615 	       r0 =l add lstk, 8
    616 	       storel r0, ap
    617 	   @b0
    618 	       %loc =l phi @breg %lreg, @bstk %lstk
    619 	       i->to =(i->cls) load %loc
    620 	*/
    621 
    622 	loc = newtmp("abi", Kl, fn);
    623 	emit(Oload, i->cls, i->to, loc, R);
    624 	b0 = split(fn, b);
    625 	b0->jmp = b->jmp;
    626 	b0->s1 = b->s1;
    627 	b0->s2 = b->s2;
    628 	if (b->s1)
    629 		chpred(b->s1, b, b0);
    630 	if (b->s2 && b->s2 != b->s1)
    631 		chpred(b->s2, b, b0);
    632 
    633 	lreg = newtmp("abi", Kl, fn);
    634 	nr = newtmp("abi", Kl, fn);
    635 	r0 = newtmp("abi", Kw, fn);
    636 	r1 = newtmp("abi", Kl, fn);
    637 	emit(Ostorew, Kw, R, r0, r1);
    638 	emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
    639 	emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
    640 	r0 = newtmp("abi", Kl, fn);
    641 	r1 = newtmp("abi", Kl, fn);
    642 	emit(Oadd, Kl, lreg, r1, nr);
    643 	emit(Oload, Kl, r1, r0, R);
    644 	emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
    645 	breg = split(fn, b);
    646 	breg->jmp.type = Jjmp;
    647 	breg->s1 = b0;
    648 
    649 	lstk = newtmp("abi", Kl, fn);
    650 	r0 = newtmp("abi", Kl, fn);
    651 	emit(Ostorel, Kw, R, r0, ap);
    652 	emit(Oadd, Kl, r0, lstk, c8);
    653 	emit(Oload, Kl, lstk, ap, R);
    654 	bstk = split(fn, b);
    655 	bstk->jmp.type = Jjmp;
    656 	bstk->s1 = b0;
    657 
    658 	b0->phi = alloc(sizeof *b0->phi);
    659 	*b0->phi = (Phi){
    660 		.cls = Kl, .to = loc,
    661 		.narg = 2,
    662 		.blk = vnew(2, sizeof b0->phi->blk[0], PFn),
    663 		.arg = vnew(2, sizeof b0->phi->arg[0], PFn),
    664 	};
    665 	b0->phi->blk[0] = bstk;
    666 	b0->phi->blk[1] = breg;
    667 	b0->phi->arg[0] = lstk;
    668 	b0->phi->arg[1] = lreg;
    669 	r0 = newtmp("abi", Kl, fn);
    670 	r1 = newtmp("abi", Kw, fn);
    671 	b->jmp.type = Jjnz;
    672 	b->jmp.arg = r1;
    673 	b->s1 = breg;
    674 	b->s2 = bstk;
    675 	emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
    676 	emit(Oloadsw, Kl, nr, r0, R);
    677 	emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
    678 }
    679 
    680 static void
    681 apple_selvastart(Fn *fn, Params p, Ref ap)
    682 {
    683 	Ref off, stk, arg;
    684 
    685 	off = getcon(p.stk, fn);
    686 	stk = newtmp("abi", Kl, fn);
    687 	arg = newtmp("abi", Kl, fn);
    688 
    689 	emit(Ostorel, 0, R, arg, ap);
    690 	emit(Oadd, Kl, arg, stk, off);
    691 	emit(Oaddr, Kl, stk, SLOT(-1), R);
    692 }
    693 
    694 static void
    695 arm64_selvastart(Fn *fn, Params p, Ref ap)
    696 {
    697 	Ref r0, r1, rsave;
    698 
    699 	rsave = newtmp("abi", Kl, fn);
    700 
    701 	r0 = newtmp("abi", Kl, fn);
    702 	emit(Ostorel, Kw, R, r0, ap);
    703 	emit(Oadd, Kl, r0, rsave, getcon(p.stk + 192, fn));
    704 
    705 	r0 = newtmp("abi", Kl, fn);
    706 	r1 = newtmp("abi", Kl, fn);
    707 	emit(Ostorel, Kw, R, r1, r0);
    708 	emit(Oadd, Kl, r1, rsave, getcon(64, fn));
    709 	emit(Oadd, Kl, r0, ap, getcon(8, fn));
    710 
    711 	r0 = newtmp("abi", Kl, fn);
    712 	r1 = newtmp("abi", Kl, fn);
    713 	emit(Ostorel, Kw, R, r1, r0);
    714 	emit(Oadd, Kl, r1, rsave, getcon(192, fn));
    715 	emit(Oaddr, Kl, rsave, SLOT(-1), R);
    716 	emit(Oadd, Kl, r0, ap, getcon(16, fn));
    717 
    718 	r0 = newtmp("abi", Kl, fn);
    719 	emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
    720 	emit(Oadd, Kl, r0, ap, getcon(24, fn));
    721 
    722 	r0 = newtmp("abi", Kl, fn);
    723 	emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
    724 	emit(Oadd, Kl, r0, ap, getcon(28, fn));
    725 }
    726 
    727 void
    728 arm64_abi(Fn *fn)
    729 {
    730 	Blk *b;
    731 	Ins *i, *i0, *ip;
    732 	Insl *il;
    733 	int n;
    734 	Params p;
    735 
    736 	for (b=fn->start; b; b=b->link)
    737 		b->visit = 0;
    738 
    739 	/* lower parameters */
    740 	for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
    741 		if (!ispar(i->op))
    742 			break;
    743 	p = selpar(fn, b->ins, i);
    744 	n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
    745 	i0 = alloc(n * sizeof(Ins));
    746 	ip = icpy(ip = i0, curi, &insb[NIns] - curi);
    747 	ip = icpy(ip, i, &b->ins[b->nins] - i);
    748 	b->nins = n;
    749 	b->ins = i0;
    750 
    751 	/* lower calls, returns, and vararg instructions */
    752 	il = 0;
    753 	b = fn->start;
    754 	do {
    755 		if (!(b = b->link))
    756 			b = fn->start; /* do it last */
    757 		if (b->visit)
    758 			continue;
    759 		curi = &insb[NIns];
    760 		selret(b, fn);
    761 		for (i=&b->ins[b->nins]; i!=b->ins;)
    762 			switch ((--i)->op) {
    763 			default:
    764 				emiti(*i);
    765 				break;
    766 			case Ocall:
    767 				for (i0=i; i0>b->ins; i0--)
    768 					if (!isarg((i0-1)->op))
    769 						break;
    770 				selcall(fn, i0, i, &il);
    771 				i = i0;
    772 				break;
    773 			case Ovastart:
    774 				if (T.apple)
    775 					apple_selvastart(fn, p, i->arg[0]);
    776 				else
    777 					arm64_selvastart(fn, p, i->arg[0]);
    778 				break;
    779 			case Ovaarg:
    780 				if (T.apple)
    781 					apple_selvaarg(fn, b, i);
    782 				else
    783 					arm64_selvaarg(fn, b, i);
    784 				break;
    785 			case Oarg:
    786 			case Oargc:
    787 				die("unreachable");
    788 			}
    789 		if (b == fn->start)
    790 			for (; il; il=il->link)
    791 				emiti(il->i);
    792 		b->nins = &insb[NIns] - curi;
    793 		idup(&b->ins, curi, b->nins);
    794 	} while (b != fn->start);
    795 
    796 	if (debug['A']) {
    797 		fprintf(stderr, "\n> After ABI lowering:\n");
    798 		printfn(fn, stderr);
    799 	}
    800 }
    801 
    802 /* abi0 for apple target; introduces
    803  * necessary sign extensions in calls
    804  * and returns
    805  */
    806 void
    807 apple_extsb(Fn *fn)
    808 {
    809 	Blk *b;
    810 	Ins *i0, *i1, *i;
    811 	int j, op;
    812 	Ref r;
    813 
    814 	for (b=fn->start; b; b=b->link) {
    815 		curi = &insb[NIns];
    816 		j = b->jmp.type;
    817 		if (isretbh(j)) {
    818 			r = newtmp("abi", Kw, fn);
    819 			op = Oextsb + (j - Jretsb);
    820 			emit(op, Kw, r, b->jmp.arg, R);
    821 			b->jmp.arg = r;
    822 			b->jmp.type = Jretw;
    823 		}
    824 		for (i=&b->ins[b->nins]; i>b->ins;) {
    825 			emiti(*--i);
    826 			if (i->op != Ocall)
    827 				continue;
    828 			for (i0=i1=i; i0>b->ins; i0--)
    829 				if (!isarg((i0-1)->op))
    830 					break;
    831 			for (i=i1; i>i0;) {
    832 				emiti(*--i);
    833 				if (isargbh(i->op)) {
    834 					i->to = newtmp("abi", Kl, fn);
    835 					curi->arg[0] = i->to;
    836 				}
    837 			}
    838 			for (i=i1; i>i0;)
    839 				if (isargbh((--i)->op)) {
    840 					op = Oextsb + (i->op - Oargsb);
    841 					emit(op, Kw, i->to, i->arg[0], R);
    842 				}
    843 		}
    844 		b->nins = &insb[NIns] - curi;
    845 		idup(&b->ins, curi, b->nins);
    846 	}
    847 
    848 	if (debug['A']) {
    849 		fprintf(stderr, "\n> After Apple pre-ABI:\n");
    850 		printfn(fn, stderr);
    851 	}
    852 }