qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

isel.c (15178B)


      1 #include "all.h"
      2 #include <limits.h>
      3 
      4 /* For x86_64, do the following:
      5  *
      6  * - check that constants are used only in
      7  *   places allowed
      8  * - ensure immediates always fit in 32b
      9  * - expose machine register contraints
     10  *   on instructions like division.
     11  * - implement fast locals (the streak of
     12  *   constant allocX in the first basic block)
     13  * - recognize complex addressing modes
     14  *
     15  * Invariant: the use counts that are used
     16  *            in sel() must be sound.  This
     17  *            is not so trivial, maybe the
     18  *            dce should be moved out...
     19  */
     20 
     21 typedef struct ANum ANum;
     22 
     23 struct ANum {
     24 	char n, l, r;
     25 	Ins *i;
     26 };
     27 
     28 static int amatch(Addr *, Ref, int, ANum *, Fn *);
     29 
     30 static int
     31 noimm(Ref r, Fn *fn)
     32 {
     33 	int64_t val;
     34 
     35 	if (rtype(r) != RCon)
     36 		return 0;
     37 	switch (fn->con[r.val].type) {
     38 	case CAddr:
     39 		/* we only support the 'small'
     40 		 * code model of the ABI, this
     41 		 * means that we can always
     42 		 * address data with 32bits
     43 		 */
     44 		return 0;
     45 	case CBits:
     46 		val = fn->con[r.val].bits.i;
     47 		return (val < INT32_MIN || val > INT32_MAX);
     48 	default:
     49 		die("invalid constant");
     50 	}
     51 }
     52 
     53 static int
     54 rslot(Ref r, Fn *fn)
     55 {
     56 	if (rtype(r) != RTmp)
     57 		return -1;
     58 	return fn->tmp[r.val].slot;
     59 }
     60 
     61 static void
     62 fixarg(Ref *r, int k, Ins *i, Fn *fn)
     63 {
     64 	char buf[32];
     65 	Addr a, *m;
     66 	Ref r0, r1;
     67 	int s, n, op;
     68 
     69 	r1 = r0 = *r;
     70 	s = rslot(r0, fn);
     71 	op = i ? i->op : Ocopy;
     72 	if (KBASE(k) == 1 && rtype(r0) == RCon) {
     73 		/* load floating points from memory
     74 		 * slots, they can't be used as
     75 		 * immediates
     76 		 */
     77 		r1 = MEM(fn->nmem);
     78 		vgrow(&fn->mem, ++fn->nmem);
     79 		memset(&a, 0, sizeof a);
     80 		a.offset.type = CAddr;
     81 		a.offset.local = 1;
     82 		n = gasstash(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4);
     83 		sprintf(buf, "fp%d", n);
     84 		a.offset.label = intern(buf);
     85 		fn->mem[fn->nmem-1] = a;
     86 	}
     87 	else if (op != Ocopy && k == Kl && noimm(r0, fn)) {
     88 		/* load constants that do not fit in
     89 		 * a 32bit signed integer into a
     90 		 * long temporary
     91 		 */
     92 		r1 = newtmp("isel", Kl, fn);
     93 		emit(Ocopy, Kl, r1, r0, R);
     94 	}
     95 	else if (s != -1) {
     96 		/* load fast locals' addresses into
     97 		 * temporaries right before the
     98 		 * instruction
     99 		 */
    100 		r1 = newtmp("isel", Kl, fn);
    101 		emit(Oaddr, Kl, r1, SLOT(s), R);
    102 	}
    103 	else if (!(isstore(op) && r == &i->arg[1])
    104 	&& !isload(op) && op != Ocall && rtype(r0) == RCon
    105 	&& fn->con[r0.val].type == CAddr) {
    106 		/* apple as does not support 32-bit
    107 		 * absolute addressing, use a rip-
    108 		 * relative leaq instead
    109 		 */
    110 		r1 = newtmp("isel", Kl, fn);
    111 		emit(Oaddr, Kl, r1, r0, R);
    112 	}
    113 	else if (rtype(r0) == RMem) {
    114 		/* eliminate memory operands of
    115 		 * the form $foo(%rip, ...)
    116 		 */
    117 		m = &fn->mem[r0.val];
    118 		if (req(m->base, R))
    119 		if (m->offset.type == CAddr) {
    120 			r0 = newtmp("isel", Kl, fn);
    121 			emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R);
    122 			m->offset.type = CUndef;
    123 			m->base = r0;
    124 		}
    125 	}
    126 	*r = r1;
    127 }
    128 
    129 static void
    130 seladdr(Ref *r, ANum *an, Fn *fn)
    131 {
    132 	Addr a;
    133 	Ref r0;
    134 
    135 	r0 = *r;
    136 	if (rtype(r0) == RTmp) {
    137 		memset(&a, 0, sizeof a);
    138 		if (!amatch(&a, r0, an[r0.val].n, an, fn))
    139 			return;
    140 		if (!req(a.base, R))
    141 		if (a.offset.type == CAddr) {
    142 			/* apple as does not support
    143 			 * $foo(%r0, %r1, M); try to
    144 			 * rewrite it or bail out if
    145 			 * impossible
    146 			 */
    147 			if (!req(a.index, R) || rtype(a.base) != RTmp)
    148 				return;
    149 			else {
    150 				a.index = a.base;
    151 				a.scale = 1;
    152 				a.base = R;
    153 			}
    154 		}
    155 		chuse(r0, -1, fn);
    156 		vgrow(&fn->mem, ++fn->nmem);
    157 		fn->mem[fn->nmem-1] = a;
    158 		chuse(a.base, +1, fn);
    159 		chuse(a.index, +1, fn);
    160 		*r = MEM(fn->nmem-1);
    161 	}
    162 }
    163 
    164 static int
    165 cmpswap(Ref arg[2], int op)
    166 {
    167 	switch (op) {
    168 	case NCmpI+Cflt:
    169 	case NCmpI+Cfle:
    170 		return 1;
    171 	case NCmpI+Cfgt:
    172 	case NCmpI+Cfge:
    173 		return 0;
    174 	}
    175 	return rtype(arg[0]) == RCon;
    176 }
    177 
    178 static void
    179 selcmp(Ref arg[2], int k, int swap, Fn *fn)
    180 {
    181 	Ref r;
    182 	Ins *icmp;
    183 
    184 	if (swap) {
    185 		r = arg[1];
    186 		arg[1] = arg[0];
    187 		arg[0] = r;
    188 	}
    189 	emit(Oxcmp, k, R, arg[1], arg[0]);
    190 	icmp = curi;
    191 	if (rtype(arg[0]) == RCon) {
    192 		assert(k != Kw);
    193 		icmp->arg[1] = newtmp("isel", k, fn);
    194 		emit(Ocopy, k, icmp->arg[1], arg[0], R);
    195 		fixarg(&curi->arg[0], k, curi, fn);
    196 	}
    197 	fixarg(&icmp->arg[0], k, icmp, fn);
    198 	fixarg(&icmp->arg[1], k, icmp, fn);
    199 }
    200 
    201 static void
    202 sel(Ins i, ANum *an, Fn *fn)
    203 {
    204 	Ref r0, r1, tmp[7];
    205 	int x, j, k, kc, sh, swap;
    206 	Ins *i0, *i1;
    207 
    208 	if (rtype(i.to) == RTmp)
    209 	if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
    210 	if (fn->tmp[i.to.val].nuse == 0) {
    211 		chuse(i.arg[0], -1, fn);
    212 		chuse(i.arg[1], -1, fn);
    213 		return;
    214 	}
    215 	i0 = curi;
    216 	k = i.cls;
    217 	switch (i.op) {
    218 	case Odiv:
    219 	case Orem:
    220 	case Oudiv:
    221 	case Ourem:
    222 		if (KBASE(k) == 1)
    223 			goto Emit;
    224 		if (i.op == Odiv || i.op == Oudiv)
    225 			r0 = TMP(RAX), r1 = TMP(RDX);
    226 		else
    227 			r0 = TMP(RDX), r1 = TMP(RAX);
    228 		emit(Ocopy, k, i.to, r0, R);
    229 		emit(Ocopy, k, R, r1, R);
    230 		if (rtype(i.arg[1]) == RCon) {
    231 			/* immediates not allowed for
    232 			 * divisions in x86
    233 			 */
    234 			r0 = newtmp("isel", k, fn);
    235 		} else
    236 			r0 = i.arg[1];
    237 		if (fn->tmp[r0.val].slot != -1)
    238 			err("unlikely argument %%%s in %s",
    239 				fn->tmp[r0.val].name, optab[i.op].name);
    240 		if (i.op == Odiv || i.op == Orem) {
    241 			emit(Oxidiv, k, R, r0, R);
    242 			emit(Osign, k, TMP(RDX), TMP(RAX), R);
    243 		} else {
    244 			emit(Oxdiv, k, R, r0, R);
    245 			emit(Ocopy, k, TMP(RDX), CON_Z, R);
    246 		}
    247 		emit(Ocopy, k, TMP(RAX), i.arg[0], R);
    248 		fixarg(&curi->arg[0], k, curi, fn);
    249 		if (rtype(i.arg[1]) == RCon)
    250 			emit(Ocopy, k, r0, i.arg[1], R);
    251 		break;
    252 	case Osar:
    253 	case Oshr:
    254 	case Oshl:
    255 		r0 = i.arg[1];
    256 		if (rtype(r0) == RCon)
    257 			goto Emit;
    258 		if (fn->tmp[r0.val].slot != -1)
    259 			err("unlikely argument %%%s in %s",
    260 				fn->tmp[r0.val].name, optab[i.op].name);
    261 		i.arg[1] = TMP(RCX);
    262 		emit(Ocopy, Kw, R, TMP(RCX), R);
    263 		emiti(i);
    264 		i1 = curi;
    265 		emit(Ocopy, Kw, TMP(RCX), r0, R);
    266 		fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
    267 		break;
    268 	case Ouwtof:
    269 		r0 = newtmp("utof", Kl, fn);
    270 		emit(Osltof, k, i.to, r0, R);
    271 		emit(Oextuw, Kl, r0, i.arg[0], R);
    272 		fixarg(&curi->arg[0], k, curi, fn);
    273 		break;
    274 	case Oultof:
    275 		/* %mask =l and %arg.0, 1
    276 		   %isbig =l shr %arg.0, 63
    277 		   %divided =l shr %arg.0, %isbig
    278 		   %or =l or %mask, %divided
    279 		   %float =d sltof %or
    280 		   %cast =l cast %float
    281 		   %addend =l shl %isbig, 52
    282 		   %sum =l add %cast, %addend
    283 		   %result =d cast %sum
    284 		*/
    285 		r0 = newtmp("utof", k, fn);
    286 		if (k == Ks)
    287 			kc = Kw, sh = 23;
    288 		else
    289 			kc = Kl, sh = 52;
    290 		for (j=0; j<4; j++)
    291 			tmp[j] = newtmp("utof", Kl, fn);
    292 		for (; j<7; j++)
    293 			tmp[j] = newtmp("utof", kc, fn);
    294 		emit(Ocast, k, i.to, tmp[6], R);
    295 		emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
    296 		emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn));
    297 		emit(Ocast, kc, tmp[4], r0, R);
    298 		emit(Osltof, k, r0, tmp[3], R);
    299 		emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
    300 		emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
    301 		sel(*curi++, 0, fn);
    302 		emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
    303 		fixarg(&curi->arg[0], Kl, curi, fn);
    304 		emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
    305 		fixarg(&curi->arg[0], Kl, curi, fn);
    306 		break;
    307 	case Ostoui:
    308 		i.op = Ostosi;
    309 		kc = Ks;
    310 		tmp[4] = getcon(0xdf000000, fn);
    311 		goto Oftoui;
    312 	case Odtoui:
    313 		i.op = Odtosi;
    314 		kc = Kd;
    315 		tmp[4] = getcon(0xc3e0000000000000, fn);
    316 	Oftoui:
    317 		if (k == Kw)
    318 			goto Emit;
    319 		r0 = newtmp("ftou", kc, fn);
    320 		for (j=0; j<4; j++)
    321 			tmp[j] = newtmp("ftou", Kl, fn);
    322 		emit(Oor, Kl, i.to, tmp[0], tmp[3]);
    323 		emit(Oand, Kl, tmp[3], tmp[2], tmp[1]);
    324 		emit(i.op, Kl, tmp[2], r0, R);
    325 		emit(Oadd, kc, r0, tmp[4], i.arg[0]);
    326 		i1 = curi; /* fixarg() can change curi */
    327 		fixarg(&i1->arg[0], kc, i1, fn);
    328 		fixarg(&i1->arg[1], kc, i1, fn);
    329 		emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn));
    330 		emit(i.op, Kl, tmp[0], i.arg[0], R);
    331 		fixarg(&curi->arg[0], Kl, curi, fn);
    332 		break;
    333 	case Onop:
    334 		break;
    335 	case Ostored:
    336 	case Ostores:
    337 	case Ostorel:
    338 	case Ostorew:
    339 	case Ostoreh:
    340 	case Ostoreb:
    341 		if (rtype(i.arg[0]) == RCon) {
    342 			if (i.op == Ostored)
    343 				i.op = Ostorel;
    344 			if (i.op == Ostores)
    345 				i.op = Ostorew;
    346 		}
    347 		seladdr(&i.arg[1], an, fn);
    348 		goto Emit;
    349 	case_Oload:
    350 		seladdr(&i.arg[0], an, fn);
    351 		goto Emit;
    352 	case Ocall:
    353 	case Osalloc:
    354 	case Ocopy:
    355 	case Oadd:
    356 	case Osub:
    357 	case Oneg:
    358 	case Omul:
    359 	case Oand:
    360 	case Oor:
    361 	case Oxor:
    362 	case Oxtest:
    363 	case Ostosi:
    364 	case Odtosi:
    365 	case Oswtof:
    366 	case Osltof:
    367 	case Oexts:
    368 	case Otruncd:
    369 	case Ocast:
    370 	case_OExt:
    371 Emit:
    372 		emiti(i);
    373 		i1 = curi; /* fixarg() can change curi */
    374 		fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
    375 		fixarg(&i1->arg[1], argcls(&i, 1), i1, fn);
    376 		break;
    377 	case Oalloc4:
    378 	case Oalloc8:
    379 	case Oalloc16:
    380 		salloc(i.to, i.arg[0], fn);
    381 		break;
    382 	default:
    383 		if (isext(i.op))
    384 			goto case_OExt;
    385 		if (isload(i.op))
    386 			goto case_Oload;
    387 		if (iscmp(i.op, &kc, &x)) {
    388 			switch (x) {
    389 			case NCmpI+Cfeq:
    390 				/* zf is set when operands are
    391 				 * unordered, so we may have to
    392 				 * check pf
    393 				 */
    394 				r0 = newtmp("isel", Kw, fn);
    395 				r1 = newtmp("isel", Kw, fn);
    396 				emit(Oand, Kw, i.to, r0, r1);
    397 				emit(Oflagfo, k, r1, R, R);
    398 				i.to = r0;
    399 				break;
    400 			case NCmpI+Cfne:
    401 				r0 = newtmp("isel", Kw, fn);
    402 				r1 = newtmp("isel", Kw, fn);
    403 				emit(Oor, Kw, i.to, r0, r1);
    404 				emit(Oflagfuo, k, r1, R, R);
    405 				i.to = r0;
    406 				break;
    407 			}
    408 			swap = cmpswap(i.arg, x);
    409 			if (swap)
    410 				x = cmpop(x);
    411 			emit(Oflag+x, k, i.to, R, R);
    412 			selcmp(i.arg, kc, swap, fn);
    413 			break;
    414 		}
    415 		die("unknown instruction %s", optab[i.op].name);
    416 	}
    417 
    418 	while (i0>curi && --i0) {
    419 		assert(rslot(i0->arg[0], fn) == -1);
    420 		assert(rslot(i0->arg[1], fn) == -1);
    421 	}
    422 }
    423 
    424 static Ins *
    425 flagi(Ins *i0, Ins *i)
    426 {
    427 	while (i>i0) {
    428 		i--;
    429 		if (amd64_op[i->op].zflag)
    430 			return i;
    431 		if (amd64_op[i->op].lflag)
    432 			continue;
    433 		return 0;
    434 	}
    435 	return 0;
    436 }
    437 
    438 static void
    439 seljmp(Blk *b, Fn *fn)
    440 {
    441 	Ref r;
    442 	int c, k, swap;
    443 	Ins *fi;
    444 	Tmp *t;
    445 
    446 	if (b->jmp.type == Jret0 || b->jmp.type == Jjmp)
    447 		return;
    448 	assert(b->jmp.type == Jjnz);
    449 	r = b->jmp.arg;
    450 	t = &fn->tmp[r.val];
    451 	b->jmp.arg = R;
    452 	assert(rtype(r) == RTmp);
    453 	if (b->s1 == b->s2) {
    454 		chuse(r, -1, fn);
    455 		b->jmp.type = Jjmp;
    456 		b->s2 = 0;
    457 		return;
    458 	}
    459 	fi = flagi(b->ins, &b->ins[b->nins]);
    460 	if (!fi || !req(fi->to, r)) {
    461 		selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */
    462 		b->jmp.type = Jjf + Cine;
    463 	}
    464 	else if (iscmp(fi->op, &k, &c)
    465 	     && c != NCmpI+Cfeq /* see sel() */
    466 	     && c != NCmpI+Cfne) {
    467 		swap = cmpswap(fi->arg, c);
    468 		if (swap)
    469 			c = cmpop(c);
    470 		if (t->nuse == 1) {
    471 			selcmp(fi->arg, k, swap, fn);
    472 			*fi = (Ins){.op = Onop};
    473 		}
    474 		b->jmp.type = Jjf + c;
    475 	}
    476 	else if (fi->op == Oand && t->nuse == 1
    477 	     && (rtype(fi->arg[0]) == RTmp ||
    478 	         rtype(fi->arg[1]) == RTmp)) {
    479 		fi->op = Oxtest;
    480 		fi->to = R;
    481 		b->jmp.type = Jjf + Cine;
    482 		if (rtype(fi->arg[1]) == RCon) {
    483 			r = fi->arg[1];
    484 			fi->arg[1] = fi->arg[0];
    485 			fi->arg[0] = r;
    486 		}
    487 	}
    488 	else {
    489 		/* since flags are not tracked in liveness,
    490 		 * the result of the flag-setting instruction
    491 		 * has to be marked as live
    492 		 */
    493 		if (t->nuse == 1)
    494 			emit(Ocopy, Kw, R, r, R);
    495 		b->jmp.type = Jjf + Cine;
    496 	}
    497 }
    498 
    499 static int
    500 aref(Ref r, ANum *ai)
    501 {
    502 	switch (rtype(r)) {
    503 	case RCon:
    504 		return 2;
    505 	case RTmp:
    506 		return ai[r.val].n;
    507 	default:
    508 		die("constant or temporary expected");
    509 	}
    510 }
    511 
    512 static int
    513 ascale(Ref r, Con *con)
    514 {
    515 	int64_t n;
    516 
    517 	if (rtype(r) != RCon)
    518 		return 0;
    519 	if (con[r.val].type != CBits)
    520 		return 0;
    521 	n = con[r.val].bits.i;
    522 	return n == 1 || n == 2 || n == 4 || n == 8;
    523 }
    524 
    525 static void
    526 anumber(ANum *ai, Blk *b, Con *con)
    527 {
    528 	/* This should be made obsolete by a proper
    529 	 * reassoc pass.
    530 	 *
    531 	 * Rules:
    532 	 *
    533 	 *   RTmp(_) -> 0    tmp
    534 	 *   ( RTmp(_) -> 1    slot )
    535 	 *   RCon(_) -> 2    con
    536 	 *   0 * 2   -> 3    s * i (when constant is 1,2,4,8)
    537 	 */
    538 	static char add[10][10] = {
    539 		[2] [4] = 4, [4] [2] = 4,
    540 		[2] [6] = 6, [6] [2] = 6,
    541 		[2] [7] = 7, [7] [2] = 7,
    542 		[0] [2] = 4, [2] [0] = 4, /* 4: o + b */
    543 		[0] [0] = 5,              /* 5: b + s * i */
    544 		[0] [3] = 5, [3] [0] = 5,
    545 		[2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */
    546 		[2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */
    547 		[0] [6] = 7, [6] [0] = 7,
    548 		[4] [3] = 7, [3] [4] = 7,
    549 	};
    550 	int a, a1, a2, n1, n2, t1, t2;
    551 	Ins *i;
    552 
    553 	for (i=b->ins; i<&b->ins[b->nins]; i++) {
    554 		if (rtype(i->to) == RTmp)
    555 			ai[i->to.val].i = i;
    556 		if (i->op != Oadd && i->op != Omul)
    557 			continue;
    558 		a1 = aref(i->arg[0], ai);
    559 		a2 = aref(i->arg[1], ai);
    560 		t1 = a1 != 1 && a1 != 2;
    561 		t2 = a2 != 1 && a2 != 2;
    562 		if (i->op == Oadd) {
    563 			a = add[n1 = a1][n2 = a2];
    564 			if (t1 && a < add[0][a2])
    565 				a = add[n1 = 0][n2 = a2];
    566 			if (t2 && a < add[a1][0])
    567 				a = add[n1 = a1][n2 = 0];
    568 			if (t1 && t2 && a < add[0][0])
    569 				a = add[n1 = 0][n2 = 0];
    570 		} else {
    571 			n1 = n2 = a = 0;
    572 			if (ascale(i->arg[0], con) && t2)
    573 				a = 3, n1 = 2, n2 = 0;
    574 			if (t1 && ascale(i->arg[1], con))
    575 				a = 3, n1 = 0, n2 = 2;
    576 		}
    577 		ai[i->to.val].n = a;
    578 		ai[i->to.val].l = n1;
    579 		ai[i->to.val].r = n2;
    580 	}
    581 }
    582 
    583 static int
    584 amatch(Addr *a, Ref r, int n, ANum *ai, Fn *fn)
    585 {
    586 	Ins *i;
    587 	int nl, nr, t, s;
    588 	Ref al, ar;
    589 
    590 	if (rtype(r) == RCon) {
    591 		if (!addcon(&a->offset, &fn->con[r.val]))
    592 			err("unlikely sum of $%s and $%s",
    593 				str(a->offset.label), str(fn->con[r.val].label));
    594 		return 1;
    595 	}
    596 	assert(rtype(r) == RTmp);
    597 	i = ai[r.val].i;
    598 	nl = ai[r.val].l;
    599 	nr = ai[r.val].r;
    600 	if (i) {
    601 		if (nl > nr) {
    602 			al = i->arg[1];
    603 			ar = i->arg[0];
    604 			t = nl, nl = nr, nr = t;
    605 		} else {
    606 			al = i->arg[0];
    607 			ar = i->arg[1];
    608 		}
    609 	}
    610 	switch (n) {
    611 	case 3: /* s * i */
    612 		a->index = al;
    613 		a->scale = fn->con[ar.val].bits.i;
    614 		return 0;
    615 	case 5: /* b + s * i */
    616 		switch (nr) {
    617 		case 0:
    618 			if (fn->tmp[ar.val].slot != -1) {
    619 				al = i->arg[1];
    620 				ar = i->arg[0];
    621 			}
    622 			a->index = ar;
    623 			a->scale = 1;
    624 			break;
    625 		case 3:
    626 			amatch(a, ar, nr, ai, fn);
    627 			break;
    628 		}
    629 		r = al;
    630 		/* fall through */
    631 	case 0:
    632 		s = fn->tmp[r.val].slot;
    633 		if (s != -1)
    634 			r = SLOT(s);
    635 		a->base = r;
    636 		return n || s != -1;
    637 	case 2: /* constants */
    638 	case 4: /* o + b */
    639 	case 6: /* o + s * i */
    640 	case 7: /* o + b + s * i */
    641 		amatch(a, ar, nr, ai, fn);
    642 		amatch(a, al, nl, ai, fn);
    643 		return 1;
    644 	default:
    645 		die("unreachable");
    646 	}
    647 }
    648 
    649 /* instruction selection
    650  * requires use counts (as given by parsing)
    651  */
    652 void
    653 amd64_isel(Fn *fn)
    654 {
    655 	Blk *b, **sb;
    656 	Ins *i;
    657 	Phi *p;
    658 	uint a;
    659 	int n, al;
    660 	int64_t sz;
    661 	ANum *ainfo;
    662 
    663 	/* assign slots to fast allocs */
    664 	b = fn->start;
    665 	/* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
    666 	for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
    667 		for (i=b->ins; i<&b->ins[b->nins]; i++)
    668 			if (i->op == al) {
    669 				if (rtype(i->arg[0]) != RCon)
    670 					break;
    671 				sz = fn->con[i->arg[0].val].bits.i;
    672 				if (sz < 0 || sz >= INT_MAX-15)
    673 					err("invalid alloc size %"PRId64, sz);
    674 				sz = (sz + n-1) & -n;
    675 				sz /= 4;
    676 				if (sz > INT_MAX - fn->slot)
    677 					die("alloc too large");
    678 				fn->tmp[i->to.val].slot = fn->slot;
    679 				fn->slot += sz;
    680 				*i = (Ins){.op = Onop};
    681 			}
    682 
    683 	/* process basic blocks */
    684 	n = fn->ntmp;
    685 	ainfo = emalloc(n * sizeof ainfo[0]);
    686 	for (b=fn->start; b; b=b->link) {
    687 		curi = &insb[NIns];
    688 		for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
    689 			for (p=(*sb)->phi; p; p=p->link) {
    690 				for (a=0; p->blk[a] != b; a++)
    691 					assert(a+1 < p->narg);
    692 				fixarg(&p->arg[a], p->cls, 0, fn);
    693 			}
    694 		memset(ainfo, 0, n * sizeof ainfo[0]);
    695 		anumber(ainfo, b, fn->con);
    696 		seljmp(b, fn);
    697 		for (i=&b->ins[b->nins]; i!=b->ins;)
    698 			sel(*--i, ainfo, fn);
    699 		b->nins = &insb[NIns] - curi;
    700 		idup(&b->ins, curi, b->nins);
    701 	}
    702 	free(ainfo);
    703 
    704 	if (debug['I']) {
    705 		fprintf(stderr, "\n> After instruction selection:\n");
    706 		printfn(fn, stderr);
    707 	}
    708 }