qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

isel.c (16491B)


      1 #include "all.h"
      2 #include <limits.h>
      3 
      4 /* For x86_64, do the following:
      5  *
      6  * - check that constants are used only in
      7  *   places allowed
      8  * - ensure immediates always fit in 32b
      9  * - expose machine register contraints
     10  *   on instructions like division.
     11  * - implement fast locals (the streak of
     12  *   constant allocX in the first basic block)
     13  * - recognize complex addressing modes
     14  *
     15  * Invariant: the use counts that are used
     16  *            in sel() must be sound.  This
     17  *            is not so trivial, maybe the
     18  *            dce should be moved out...
     19  */
     20 
     21 typedef struct ANum ANum;
     22 
     23 struct ANum {
     24 	char n, l, r;
     25 	Ins *i;
     26 };
     27 
     28 static int amatch(Addr *, Ref, int, ANum *, Fn *);
     29 
     30 static int
     31 noimm(Ref r, Fn *fn)
     32 {
     33 	int64_t val;
     34 
     35 	if (rtype(r) != RCon)
     36 		return 0;
     37 	switch (fn->con[r.val].type) {
     38 	case CAddr:
     39 		/* we only support the 'small'
     40 		 * code model of the ABI, this
     41 		 * means that we can always
     42 		 * address data with 32bits
     43 		 */
     44 		return 0;
     45 	case CBits:
     46 		val = fn->con[r.val].bits.i;
     47 		return (val < INT32_MIN || val > INT32_MAX);
     48 	default:
     49 		die("invalid constant");
     50 	}
     51 }
     52 
     53 static int
     54 rslot(Ref r, Fn *fn)
     55 {
     56 	if (rtype(r) != RTmp)
     57 		return -1;
     58 	return fn->tmp[r.val].slot;
     59 }
     60 
     61 static int
     62 hascon(Ref r, Con **pc, Fn *fn)
     63 {
     64 	switch (rtype(r)) {
     65 	case RCon:
     66 		*pc = &fn->con[r.val];
     67 		return 1;
     68 	case RMem:
     69 		*pc = &fn->mem[r.val].offset;
     70 		return 1;
     71 	default:
     72 		return 0;
     73 	}
     74 }
     75 
     76 static void
     77 fixarg(Ref *r, int k, Ins *i, Fn *fn)
     78 {
     79 	char buf[32];
     80 	Addr a, *m;
     81 	Con cc, *c;
     82 	Ref r0, r1, r2, r3;
     83 	int s, n, op;
     84 
     85 	r1 = r0 = *r;
     86 	s = rslot(r0, fn);
     87 	op = i ? i->op : Ocopy;
     88 	if (KBASE(k) == 1 && rtype(r0) == RCon) {
     89 		/* load floating points from memory
     90 		 * slots, they can't be used as
     91 		 * immediates
     92 		 */
     93 		r1 = MEM(fn->nmem);
     94 		vgrow(&fn->mem, ++fn->nmem);
     95 		memset(&a, 0, sizeof a);
     96 		a.offset.type = CAddr;
     97 		n = stashbits(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4);
     98 		sprintf(buf, "\"%sfp%d\"", T.asloc, n);
     99 		a.offset.sym.id = intern(buf);
    100 		fn->mem[fn->nmem-1] = a;
    101 	}
    102 	else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
    103 		/* load constants that do not fit in
    104 		 * a 32bit signed integer into a
    105 		 * long temporary
    106 		 */
    107 		r1 = newtmp("isel", Kl, fn);
    108 		emit(Ocopy, Kl, r1, r0, R);
    109 	}
    110 	else if (s != -1) {
    111 		/* load fast locals' addresses into
    112 		 * temporaries right before the
    113 		 * instruction
    114 		 */
    115 		r1 = newtmp("isel", Kl, fn);
    116 		emit(Oaddr, Kl, r1, SLOT(s), R);
    117 	}
    118 	else if (T.apple && hascon(r0, &c, fn)
    119 	&& c->type == CAddr && c->sym.type == SThr) {
    120 		r1 = newtmp("isel", Kl, fn);
    121 		if (c->bits.i) {
    122 			r2 = newtmp("isel", Kl, fn);
    123 			cc = (Con){.type = CBits};
    124 			cc.bits.i = c->bits.i;
    125 			r3 = newcon(&cc, fn);
    126 			emit(Oadd, Kl, r1, r2, r3);
    127 		} else
    128 			r2 = r1;
    129 		emit(Ocopy, Kl, r2, TMP(RAX), R);
    130 		r2 = newtmp("isel", Kl, fn);
    131 		r3 = newtmp("isel", Kl, fn);
    132 		emit(Ocall, 0, R, r3, CALL(17));
    133 		emit(Ocopy, Kl, TMP(RDI), r2, R);
    134 		emit(Oload, Kl, r3, r2, R);
    135 		cc = *c;
    136 		cc.bits.i = 0;
    137 		r3 = newcon(&cc, fn);
    138 		emit(Oload, Kl, r2, r3, R);
    139 		if (rtype(r0) == RMem) {
    140 			m = &fn->mem[r0.val];
    141 			m->offset.type = CUndef;
    142 			m->base = r1;
    143 			r1 = r0;
    144 		}
    145 	}
    146 	else if (!(isstore(op) && r == &i->arg[1])
    147 	&& !isload(op) && op != Ocall && rtype(r0) == RCon
    148 	&& fn->con[r0.val].type == CAddr) {
    149 		/* apple as does not support 32-bit
    150 		 * absolute addressing, use a rip-
    151 		 * relative leaq instead
    152 		 */
    153 		r1 = newtmp("isel", Kl, fn);
    154 		emit(Oaddr, Kl, r1, r0, R);
    155 	}
    156 	else if (rtype(r0) == RMem) {
    157 		/* eliminate memory operands of
    158 		 * the form $foo(%rip, ...)
    159 		 */
    160 		m = &fn->mem[r0.val];
    161 		if (req(m->base, R))
    162 		if (m->offset.type == CAddr) {
    163 			r0 = newtmp("isel", Kl, fn);
    164 			emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
    165 			m->offset.type = CUndef;
    166 			m->base = r0;
    167 		}
    168 	}
    169 	*r = r1;
    170 }
    171 
    172 static void
    173 seladdr(Ref *r, ANum *an, Fn *fn)
    174 {
    175 	Addr a;
    176 	Ref r0;
    177 
    178 	r0 = *r;
    179 	if (rtype(r0) == RTmp) {
    180 		memset(&a, 0, sizeof a);
    181 		if (!amatch(&a, r0, an[r0.val].n, an, fn))
    182 			return;
    183 		if (!req(a.base, R))
    184 		if (a.offset.type == CAddr) {
    185 			/* apple as does not support
    186 			 * $foo(%r0, %r1, M); try to
    187 			 * rewrite it or bail out if
    188 			 * impossible
    189 			 */
    190 			if (!req(a.index, R) || rtype(a.base) != RTmp)
    191 				return;
    192 			else {
    193 				a.index = a.base;
    194 				a.scale = 1;
    195 				a.base = R;
    196 			}
    197 		}
    198 		chuse(r0, -1, fn);
    199 		vgrow(&fn->mem, ++fn->nmem);
    200 		fn->mem[fn->nmem-1] = a;
    201 		chuse(a.base, +1, fn);
    202 		chuse(a.index, +1, fn);
    203 		*r = MEM(fn->nmem-1);
    204 	}
    205 }
    206 
    207 static int
    208 cmpswap(Ref arg[2], int op)
    209 {
    210 	switch (op) {
    211 	case NCmpI+Cflt:
    212 	case NCmpI+Cfle:
    213 		return 1;
    214 	case NCmpI+Cfgt:
    215 	case NCmpI+Cfge:
    216 		return 0;
    217 	}
    218 	return rtype(arg[0]) == RCon;
    219 }
    220 
    221 static void
    222 selcmp(Ref arg[2], int k, int swap, Fn *fn)
    223 {
    224 	Ref r;
    225 	Ins *icmp;
    226 
    227 	if (swap) {
    228 		r = arg[1];
    229 		arg[1] = arg[0];
    230 		arg[0] = r;
    231 	}
    232 	emit(Oxcmp, k, R, arg[1], arg[0]);
    233 	icmp = curi;
    234 	if (rtype(arg[0]) == RCon) {
    235 		assert(k != Kw);
    236 		icmp->arg[1] = newtmp("isel", k, fn);
    237 		emit(Ocopy, k, icmp->arg[1], arg[0], R);
    238 		fixarg(&curi->arg[0], k, curi, fn);
    239 	}
    240 	fixarg(&icmp->arg[0], k, icmp, fn);
    241 	fixarg(&icmp->arg[1], k, icmp, fn);
    242 }
    243 
    244 static void
    245 sel(Ins i, ANum *an, Fn *fn)
    246 {
    247 	Ref r0, r1, tmp[7];
    248 	int x, j, k, kc, sh, swap;
    249 	Ins *i0, *i1;
    250 
    251 	if (rtype(i.to) == RTmp)
    252 	if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
    253 	if (fn->tmp[i.to.val].nuse == 0) {
    254 		chuse(i.arg[0], -1, fn);
    255 		chuse(i.arg[1], -1, fn);
    256 		return;
    257 	}
    258 	i0 = curi;
    259 	k = i.cls;
    260 	switch (i.op) {
    261 	case Odiv:
    262 	case Orem:
    263 	case Oudiv:
    264 	case Ourem:
    265 		if (KBASE(k) == 1)
    266 			goto Emit;
    267 		if (i.op == Odiv || i.op == Oudiv)
    268 			r0 = TMP(RAX), r1 = TMP(RDX);
    269 		else
    270 			r0 = TMP(RDX), r1 = TMP(RAX);
    271 		emit(Ocopy, k, i.to, r0, R);
    272 		emit(Ocopy, k, R, r1, R);
    273 		if (rtype(i.arg[1]) == RCon) {
    274 			/* immediates not allowed for
    275 			 * divisions in x86
    276 			 */
    277 			r0 = newtmp("isel", k, fn);
    278 		} else
    279 			r0 = i.arg[1];
    280 		if (fn->tmp[r0.val].slot != -1)
    281 			err("unlikely argument %%%s in %s",
    282 				fn->tmp[r0.val].name, optab[i.op].name);
    283 		if (i.op == Odiv || i.op == Orem) {
    284 			emit(Oxidiv, k, R, r0, R);
    285 			emit(Osign, k, TMP(RDX), TMP(RAX), R);
    286 		} else {
    287 			emit(Oxdiv, k, R, r0, R);
    288 			emit(Ocopy, k, TMP(RDX), CON_Z, R);
    289 		}
    290 		emit(Ocopy, k, TMP(RAX), i.arg[0], R);
    291 		fixarg(&curi->arg[0], k, curi, fn);
    292 		if (rtype(i.arg[1]) == RCon)
    293 			emit(Ocopy, k, r0, i.arg[1], R);
    294 		break;
    295 	case Osar:
    296 	case Oshr:
    297 	case Oshl:
    298 		r0 = i.arg[1];
    299 		if (rtype(r0) == RCon)
    300 			goto Emit;
    301 		if (fn->tmp[r0.val].slot != -1)
    302 			err("unlikely argument %%%s in %s",
    303 				fn->tmp[r0.val].name, optab[i.op].name);
    304 		i.arg[1] = TMP(RCX);
    305 		emit(Ocopy, Kw, R, TMP(RCX), R);
    306 		emiti(i);
    307 		i1 = curi;
    308 		emit(Ocopy, Kw, TMP(RCX), r0, R);
    309 		fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
    310 		break;
    311 	case Ouwtof:
    312 		r0 = newtmp("utof", Kl, fn);
    313 		emit(Osltof, k, i.to, r0, R);
    314 		emit(Oextuw, Kl, r0, i.arg[0], R);
    315 		fixarg(&curi->arg[0], k, curi, fn);
    316 		break;
    317 	case Oultof:
    318 		/* %mask =l and %arg.0, 1
    319 		 * %isbig =l shr %arg.0, 63
    320 		 * %divided =l shr %arg.0, %isbig
    321 		 * %or =l or %mask, %divided
    322 		 * %float =d sltof %or
    323 		 * %cast =l cast %float
    324 		 * %addend =l shl %isbig, 52
    325 		 * %sum =l add %cast, %addend
    326 		 * %result =d cast %sum
    327 		 */
    328 		r0 = newtmp("utof", k, fn);
    329 		if (k == Ks)
    330 			kc = Kw, sh = 23;
    331 		else
    332 			kc = Kl, sh = 52;
    333 		for (j=0; j<4; j++)
    334 			tmp[j] = newtmp("utof", Kl, fn);
    335 		for (; j<7; j++)
    336 			tmp[j] = newtmp("utof", kc, fn);
    337 		emit(Ocast, k, i.to, tmp[6], R);
    338 		emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
    339 		emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
    340 		emit(Ocast, kc, tmp[4], r0, R);
    341 		emit(Osltof, k, r0, tmp[3], R);
    342 		emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
    343 		emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
    344 		sel(*curi++, 0, fn);
    345 		emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
    346 		fixarg(&curi->arg[0], Kl, curi, fn);
    347 		emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
    348 		fixarg(&curi->arg[0], Kl, curi, fn);
    349 		break;
    350 	case Ostoui:
    351 		i.op = Ostosi;
    352 		kc = Ks;
    353 		tmp[4] = getcon(0xdf000000, fn);
    354 		goto Oftoui;
    355 	case Odtoui:
    356 		i.op = Odtosi;
    357 		kc = Kd;
    358 		tmp[4] = getcon(0xc3e0000000000000, fn);
    359 	Oftoui:
    360 		if (k == Kw) {
    361 			r0 = newtmp("ftou", Kl, fn);
    362 			emit(Ocopy, Kw, i.to, r0, R);
    363 			i.cls = Kl;
    364 			i.to = r0;
    365 			goto Emit;
    366 		}
    367 		/* %try0 =l {s,d}tosi %fp
    368 		 * %mask =l sar %try0, 63
    369 		 *
    370 		 *    mask is all ones if the first
    371 		 *    try was oob, all zeroes o.w.
    372 		 *
    373 		 * %fps ={s,d} sub %fp, (1<<63)
    374 		 * %try1 =l {s,d}tosi %fps
    375 		 *
    376 		 * %tmp =l and %mask, %try1
    377 		 * %res =l or %tmp, %try0
    378 		 */
    379 		r0 = newtmp("ftou", kc, fn);
    380 		for (j=0; j<4; j++)
    381 			tmp[j] = newtmp("ftou", Kl, fn);
    382 		emit(Oor, Kl, i.to, tmp[0], tmp[3]);
    383 		emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
    384 		emit(i.op, Kl, tmp[2], r0, R);
    385 		emit(Oadd, kc, r0, tmp[4], i.arg[0]);
    386 		i1 = curi; /* fixarg() can change curi */
    387 		fixarg(&i1->arg[0], kc, i1, fn);
    388 		fixarg(&i1->arg[1], kc, i1, fn);
    389 		emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
    390 		emit(i.op, Kl, tmp[0], i.arg[0], R);
    391 		fixarg(&curi->arg[0], Kl, curi, fn);
    392 		break;
    393 	case Onop:
    394 		break;
    395 	case Ostored:
    396 	case Ostores:
    397 	case Ostorel:
    398 	case Ostorew:
    399 	case Ostoreh:
    400 	case Ostoreb:
    401 		if (rtype(i.arg[0]) == RCon) {
    402 			if (i.op == Ostored)
    403 				i.op = Ostorel;
    404 			if (i.op == Ostores)
    405 				i.op = Ostorew;
    406 		}
    407 		seladdr(&i.arg[1], an, fn);
    408 		goto Emit;
    409 	case_Oload:
    410 		seladdr(&i.arg[0], an, fn);
    411 		goto Emit;
    412 	case Odbgloc:
    413 	case Ocall:
    414 	case Osalloc:
    415 	case Ocopy:
    416 	case Oadd:
    417 	case Osub:
    418 	case Oneg:
    419 	case Omul:
    420 	case Oand:
    421 	case Oor:
    422 	case Oxor:
    423 	case Oxtest:
    424 	case Ostosi:
    425 	case Odtosi:
    426 	case Oswtof:
    427 	case Osltof:
    428 	case Oexts:
    429 	case Otruncd:
    430 	case Ocast:
    431 	case_OExt:
    432 Emit:
    433 		emiti(i);
    434 		i1 = curi; /* fixarg() can change curi */
    435 		fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
    436 		fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
    437 		break;
    438 	case Oalloc4:
    439 	case Oalloc8:
    440 	case Oalloc16:
    441 		salloc(i.to, i.arg[0], fn);
    442 		break;
    443 	default:
    444 		if (isext(i.op))
    445 			goto case_OExt;
    446 		if (isload(i.op))
    447 			goto case_Oload;
    448 		if (iscmp(i.op, &kc, &x)) {
    449 			switch (x) {
    450 			case NCmpI+Cfeq:
    451 				/* zf is set when operands are
    452 				 * unordered, so we may have to
    453 				 * check pf
    454 				 */
    455 				r0 = newtmp("isel", Kw, fn);
    456 				r1 = newtmp("isel", Kw, fn);
    457 				emit(Oand, Kw, i.to, r0, r1);
    458 				emit(Oflagfo, k, r1, R, R);
    459 				i.to = r0;
    460 				break;
    461 			case NCmpI+Cfne:
    462 				r0 = newtmp("isel", Kw, fn);
    463 				r1 = newtmp("isel", Kw, fn);
    464 				emit(Oor, Kw, i.to, r0, r1);
    465 				emit(Oflagfuo, k, r1, R, R);
    466 				i.to = r0;
    467 				break;
    468 			}
    469 			swap = cmpswap(i.arg, x);
    470 			if (swap)
    471 				x = cmpop(x);
    472 			emit(Oflag+x, k, i.to, R, R);
    473 			selcmp(i.arg, kc, swap, fn);
    474 			break;
    475 		}
    476 		die("unknown instruction %s", optab[i.op].name);
    477 	}
    478 
    479 	while (i0>curi && --i0) {
    480 		assert(rslot(i0->arg[0], fn) == -1);
    481 		assert(rslot(i0->arg[1], fn) == -1);
    482 	}
    483 }
    484 
    485 static Ins *
    486 flagi(Ins *i0, Ins *i)
    487 {
    488 	while (i>i0) {
    489 		i--;
    490 		if (amd64_op[i->op].zflag)
    491 			return i;
    492 		if (amd64_op[i->op].lflag)
    493 			continue;
    494 		return 0;
    495 	}
    496 	return 0;
    497 }
    498 
    499 static void
    500 seljmp(Blk *b, Fn *fn)
    501 {
    502 	Ref r;
    503 	int c, k, swap;
    504 	Ins *fi;
    505 	Tmp *t;
    506 
    507 	if (b->jmp.type == Jret0
    508 	|| b->jmp.type == Jjmp
    509 	|| b->jmp.type == Jhlt)
    510 		return;
    511 	assert(b->jmp.type == Jjnz);
    512 	r = b->jmp.arg;
    513 	t = &fn->tmp[r.val];
    514 	b->jmp.arg = R;
    515 	assert(rtype(r) == RTmp);
    516 	if (b->s1 == b->s2) {
    517 		chuse(r, -1, fn);
    518 		b->jmp.type = Jjmp;
    519 		b->s2 = 0;
    520 		return;
    521 	}
    522 	fi = flagi(b->ins, &b->ins[b->nins]);
    523 	if (!fi || !req(fi->to, r)) {
    524 		selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn);
    525 		b->jmp.type = Jjf + Cine;
    526 	}
    527 	else if (iscmp(fi->op, &k, &c)
    528 	     && c != NCmpI+Cfeq /* see sel() */
    529 	     && c != NCmpI+Cfne) {
    530 		swap = cmpswap(fi->arg, c);
    531 		if (swap)
    532 			c = cmpop(c);
    533 		if (t->nuse == 1) {
    534 			selcmp(fi->arg, k, swap, fn);
    535 			*fi = (Ins){.op = Onop};
    536 		}
    537 		b->jmp.type = Jjf + c;
    538 	}
    539 	else if (fi->op == Oand && t->nuse == 1
    540 	     && (rtype(fi->arg[0]) == RTmp ||
    541 	         rtype(fi->arg[1]) == RTmp)) {
    542 		fi->op = Oxtest;
    543 		fi->to = R;
    544 		b->jmp.type = Jjf + Cine;
    545 		if (rtype(fi->arg[1]) == RCon) {
    546 			r = fi->arg[1];
    547 			fi->arg[1] = fi->arg[0];
    548 			fi->arg[0] = r;
    549 		}
    550 	}
    551 	else {
    552 		/* since flags are not tracked in liveness,
    553 		 * the result of the flag-setting instruction
    554 		 * has to be marked as live
    555 		 */
    556 		if (t->nuse == 1)
    557 			emit(Ocopy, Kw, R, r, R);
    558 		b->jmp.type = Jjf + Cine;
    559 	}
    560 }
    561 
    562 static int
    563 aref(Ref r, ANum *ai)
    564 {
    565 	switch (rtype(r)) {
    566 	case RCon:
    567 		return 2;
    568 	case RTmp:
    569 		return ai[r.val].n;
    570 	default:
    571 		die("constant or temporary expected");
    572 	}
    573 }
    574 
    575 static int
    576 ascale(Ref r, Con *con)
    577 {
    578 	int64_t n;
    579 
    580 	if (rtype(r) != RCon)
    581 		return 0;
    582 	if (con[r.val].type != CBits)
    583 		return 0;
    584 	n = con[r.val].bits.i;
    585 	return n == 1 || n == 2 || n == 4 || n == 8;
    586 }
    587 
    588 static void
    589 anumber(ANum *ai, Blk *b, Con *con)
    590 {
    591 	/* This should be made obsolete by a proper
    592 	 * reassoc pass.
    593 	 *
    594 	 * Rules:
    595 	 *
    596 	 *   RTmp(_) -> 0    tmp
    597 	 *   ( RTmp(_) -> 1    slot )
    598 	 *   RCon(_) -> 2    con
    599 	 *   0 * 2   -> 3    s * i (when constant is 1,2,4,8)
    600 	 */
    601 	static char add[10][10] = {
    602 		[2] [4] = 4, [4] [2] = 4,
    603 		[2] [6] = 6, [6] [2] = 6,
    604 		[2] [7] = 7, [7] [2] = 7,
    605 		[0] [2] = 4, [2] [0] = 4, /* 4: o + b */
    606 		[0] [0] = 5,              /* 5: b + s * i */
    607 		[0] [3] = 5, [3] [0] = 5,
    608 		[2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */
    609 		[2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */
    610 		[0] [6] = 7, [6] [0] = 7,
    611 		[4] [3] = 7, [3] [4] = 7,
    612 	};
    613 	int a, a1, a2, n1, n2, t1, t2;
    614 	Ins *i;
    615 
    616 	for (i=b->ins; i<&b->ins[b->nins]; i++) {
    617 		if (rtype(i->to) == RTmp)
    618 			ai[i->to.val].i = i;
    619 		if (i->op != Oadd && i->op != Omul)
    620 			continue;
    621 		a1 = aref(i->arg[0], ai);
    622 		a2 = aref(i->arg[1], ai);
    623 		t1 = a1 != 1 && a1 != 2;
    624 		t2 = a2 != 1 && a2 != 2;
    625 		if (i->op == Oadd) {
    626 			a = add[n1 = a1][n2 = a2];
    627 			if (t1 && a < add[0][a2])
    628 				a = add[n1 = 0][n2 = a2];
    629 			if (t2 && a < add[a1][0])
    630 				a = add[n1 = a1][n2 = 0];
    631 			if (t1 && t2 && a < add[0][0])
    632 				a = add[n1 = 0][n2 = 0];
    633 		} else {
    634 			n1 = n2 = a = 0;
    635 			if (ascale(i->arg[0], con) && t2)
    636 				a = 3, n1 = 2, n2 = 0;
    637 			if (t1 && ascale(i->arg[1], con))
    638 				a = 3, n1 = 0, n2 = 2;
    639 		}
    640 		ai[i->to.val].n = a;
    641 		ai[i->to.val].l = n1;
    642 		ai[i->to.val].r = n2;
    643 	}
    644 }
    645 
    646 static int
    647 amatch(Addr *a, Ref r, int n, ANum *ai, Fn *fn)
    648 {
    649 	Ins *i;
    650 	int nl, nr, t, s;
    651 	Ref al, ar;
    652 
    653 	if (rtype(r) == RCon) {
    654 		if (!addcon(&a->offset, &fn->con[r.val]))
    655 			err("unlikely sum of $%s and $%s",
    656 				str(a->offset.sym.id),
    657 				str(fn->con[r.val].sym.id));
    658 		return 1;
    659 	}
    660 	assert(rtype(r) == RTmp);
    661 	i = ai[r.val].i;
    662 	nl = ai[r.val].l;
    663 	nr = ai[r.val].r;
    664 	if (i) {
    665 		if (nl > nr) {
    666 			al = i->arg[1];
    667 			ar = i->arg[0];
    668 			t = nl, nl = nr, nr = t;
    669 		} else {
    670 			al = i->arg[0];
    671 			ar = i->arg[1];
    672 		}
    673 	}
    674 	switch (n) {
    675 	case 3: /* s * i */
    676 		a->index = al;
    677 		a->scale = fn->con[ar.val].bits.i;
    678 		return 0;
    679 	case 5: /* b + s * i */
    680 		switch (nr) {
    681 		case 0:
    682 			if (fn->tmp[ar.val].slot != -1) {
    683 				al = i->arg[1];
    684 				ar = i->arg[0];
    685 			}
    686 			a->index = ar;
    687 			a->scale = 1;
    688 			break;
    689 		case 3:
    690 			amatch(a, ar, nr, ai, fn);
    691 			break;
    692 		}
    693 		r = al;
    694 		/* fall through */
    695 	case 0:
    696 		s = fn->tmp[r.val].slot;
    697 		if (s != -1)
    698 			r = SLOT(s);
    699 		a->base = r;
    700 		return n || s != -1;
    701 	case 2: /* constants */
    702 	case 4: /* o + b */
    703 	case 6: /* o + s * i */
    704 	case 7: /* o + b + s * i */
    705 		amatch(a, ar, nr, ai, fn);
    706 		amatch(a, al, nl, ai, fn);
    707 		return 1;
    708 	default:
    709 		die("unreachable");
    710 	}
    711 }
    712 
    713 /* instruction selection
    714  * requires use counts (as given by parsing)
    715  */
    716 void
    717 amd64_isel(Fn *fn)
    718 {
    719 	Blk *b, **sb;
    720 	Ins *i;
    721 	Phi *p;
    722 	uint a;
    723 	int n, al;
    724 	int64_t sz;
    725 	ANum *ainfo;
    726 
    727 	/* assign slots to fast allocs */
    728 	b = fn->start;
    729 	/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
    730 	for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
    731 		for (i=b->ins; i<&b->ins[b->nins]; i++)
    732 			if (i->op == al) {
    733 				if (rtype(i->arg[0]) != RCon)
    734 					break;
    735 				sz = fn->con[i->arg[0].val].bits.i;
    736 				if (sz < 0 || sz >= INT_MAX-15)
    737 					err("invalid alloc size %"PRId64, sz);
    738 				sz = (sz + n-1) & -n;
    739 				sz /= 4;
    740 				if (sz > INT_MAX - fn->slot)
    741 					die("alloc too large");
    742 				fn->tmp[i->to.val].slot = fn->slot;
    743 				fn->slot += sz;
    744 				*i = (Ins){.op = Onop};
    745 			}
    746 
    747 	/* process basic blocks */
    748 	n = fn->ntmp;
    749 	ainfo = emalloc(n * sizeof ainfo[0]);
    750 	for (b=fn->start; b; b=b->link) {
    751 		curi = &insb[NIns];
    752 		for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
    753 			for (p=(*sb)->phi; p; p=p->link) {
    754 				for (a=0; p->blk[a] != b; a++)
    755 					assert(a+1 < p->narg);
    756 				fixarg(&p->arg[a], p->cls, 0, fn);
    757 			}
    758 		memset(ainfo, 0, n * sizeof ainfo[0]);
    759 		anumber(ainfo, b, fn->con);
    760 		seljmp(b, fn);
    761 		for (i=&b->ins[b->nins]; i!=b->ins;)
    762 			sel(*--i, ainfo, fn);
    763 		b->nins = &insb[NIns] - curi;
    764 		idup(&b->ins, curi, b->nins);
    765 	}
    766 	free(ainfo);
    767 
    768 	if (debug['I']) {
    769 		fprintf(stderr, "\n> After instruction selection:\n");
    770 		printfn(fn, stderr);
    771 	}
    772 }