isel.c (5561B)
1 #include "all.h" 2 3 static int 4 memarg(Ref *r, int op, Ins *i) 5 { 6 if (isload(op) || op == Ocall) 7 return r == &i->arg[0]; 8 if (isstore(op)) 9 return r == &i->arg[1]; 10 return 0; 11 } 12 13 static int 14 immarg(Ref *r, int op, Ins *i) 15 { 16 return rv64_op[op].imm && r == &i->arg[1]; 17 } 18 19 static void 20 fixarg(Ref *r, int k, Ins *i, Fn *fn) 21 { 22 char buf[32]; 23 Ref r0, r1; 24 int s, n, op; 25 Con *c; 26 27 r0 = r1 = *r; 28 op = i ? i->op : Ocopy; 29 switch (rtype(r0)) { 30 case RCon: 31 c = &fn->con[r0.val]; 32 if (c->type == CAddr && memarg(r, op, i)) 33 break; 34 if (c->type == CBits && immarg(r, op, i)) 35 if (-2048 <= c->bits.i && c->bits.i < 2048) 36 break; 37 r1 = newtmp("isel", k, fn); 38 if (KBASE(k) == 1) { 39 /* load floating points from memory 40 * slots, they can't be used as 41 * immediates 42 */ 43 assert(c->type == CBits); 44 n = stashbits(&c->bits, KWIDE(k) ? 8 : 4); 45 vgrow(&fn->con, ++fn->ncon); 46 c = &fn->con[fn->ncon-1]; 47 sprintf(buf, "\"%sfp%d\"", T.asloc, n); 48 *c = (Con){.type = CAddr}; 49 c->sym.id = intern(buf); 50 emit(Oload, k, r1, CON(c-fn->con), R); 51 break; 52 } 53 emit(Ocopy, k, r1, r0, R); 54 break; 55 case RTmp: 56 if (isreg(r0)) 57 break; 58 s = fn->tmp[r0.val].slot; 59 if (s != -1) { 60 /* aggregate passed by value on 61 * stack, or fast local address, 62 * replace with slot if we can 63 */ 64 if (memarg(r, op, i)) { 65 r1 = SLOT(s); 66 break; 67 } 68 r1 = newtmp("isel", k, fn); 69 emit(Oaddr, k, r1, SLOT(s), R); 70 break; 71 } 72 if (k == Kw && fn->tmp[r0.val].cls == Kl) { 73 /* TODO: this sign extension isn't needed 74 * for 32-bit arithmetic instructions 75 */ 76 r1 = newtmp("isel", k, fn); 77 emit(Oextsw, Kl, r1, r0, R); 78 } else { 79 assert(k == fn->tmp[r0.val].cls); 80 } 81 break; 82 } 83 *r = r1; 84 } 85 86 static void 87 negate(Ref *pr, Fn *fn) 88 { 89 Ref r; 90 91 r = newtmp("isel", Kw, fn); 92 emit(Oxor, Kw, *pr, r, getcon(1, fn)); 93 *pr = r; 94 } 95 96 static void 97 selcmp(Ins i, int k, int op, Fn *fn) 98 { 99 Ins *icmp; 100 Ref r, r0, r1; 101 int sign, swap, neg; 102 103 switch (op) { 104 case Cieq: 105 r = newtmp("isel", k, fn); 106 emit(Oreqz, i.cls, i.to, r, R); 107 emit(Oxor, k, r, i.arg[0], i.arg[1]); 108 icmp = curi; 109 fixarg(&icmp->arg[0], k, icmp, fn); 110 fixarg(&icmp->arg[1], k, icmp, fn); 111 return; 112 case Cine: 113 r = newtmp("isel", k, fn); 114 emit(Ornez, i.cls, i.to, r, R); 115 emit(Oxor, k, r, i.arg[0], i.arg[1]); 116 icmp = curi; 117 fixarg(&icmp->arg[0], k, icmp, fn); 118 fixarg(&icmp->arg[1], k, icmp, fn); 119 return; 120 case Cisge: sign = 1, swap = 0, neg = 1; break; 121 case Cisgt: sign = 1, swap = 1, neg = 0; break; 122 case Cisle: sign = 1, swap = 1, neg = 1; break; 123 case Cislt: sign = 1, swap = 0, neg = 0; break; 124 case Ciuge: sign = 0, swap = 0, neg = 1; break; 125 case Ciugt: sign = 0, swap = 1, neg = 0; break; 126 case Ciule: sign = 0, swap = 1, neg = 1; break; 127 case Ciult: sign = 0, swap = 0, neg = 0; break; 128 case NCmpI+Cfeq: 129 case NCmpI+Cfge: 130 case NCmpI+Cfgt: 131 case NCmpI+Cfle: 132 case NCmpI+Cflt: 133 swap = 0, neg = 0; 134 break; 135 case NCmpI+Cfuo: 136 negate(&i.to, fn); 137 /* fall through */ 138 case NCmpI+Cfo: 139 r0 = newtmp("isel", i.cls, fn); 140 r1 = newtmp("isel", i.cls, fn); 141 emit(Oand, i.cls, i.to, r0, r1); 142 op = KWIDE(k) ? Oceqd : Oceqs; 143 emit(op, i.cls, r0, i.arg[0], i.arg[0]); 144 icmp = curi; 145 fixarg(&icmp->arg[0], k, icmp, fn); 146 fixarg(&icmp->arg[1], k, icmp, fn); 147 emit(op, i.cls, r1, i.arg[1], i.arg[1]); 148 icmp = curi; 149 fixarg(&icmp->arg[0], k, icmp, fn); 150 fixarg(&icmp->arg[1], k, icmp, fn); 151 return; 152 case NCmpI+Cfne: 153 swap = 0, neg = 1; 154 i.op = KWIDE(k) ? Oceqd : Oceqs; 155 break; 156 default: 157 assert(0 && "unknown comparison"); 158 } 159 if (op < NCmpI) 160 i.op = sign ? Ocsltl : Ocultl; 161 if (swap) { 162 r = i.arg[0]; 163 i.arg[0] = i.arg[1]; 164 i.arg[1] = r; 165 } 166 if (neg) 167 negate(&i.to, fn); 168 emiti(i); 169 icmp = curi; 170 fixarg(&icmp->arg[0], k, icmp, fn); 171 fixarg(&icmp->arg[1], k, icmp, fn); 172 } 173 174 static void 175 sel(Ins i, Fn *fn) 176 { 177 Ins *i0; 178 int ck, cc; 179 180 if (INRANGE(i.op, Oalloc, Oalloc1)) { 181 i0 = curi - 1; 182 salloc(i.to, i.arg[0], fn); 183 fixarg(&i0->arg[0], Kl, i0, fn); 184 return; 185 } 186 if (iscmp(i.op, &ck, &cc)) { 187 selcmp(i, ck, cc, fn); 188 return; 189 } 190 if (i.op != Onop) { 191 emiti(i); 192 i0 = curi; /* fixarg() can change curi */ 193 fixarg(&i0->arg[0], argcls(&i, 0), i0, fn); 194 fixarg(&i0->arg[1], argcls(&i, 1), i0, fn); 195 } 196 } 197 198 static void 199 seljmp(Blk *b, Fn *fn) 200 { 201 /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */ 202 if (b->jmp.type == Jjnz) 203 fixarg(&b->jmp.arg, Kw, 0, fn); 204 } 205 206 void 207 rv64_isel(Fn *fn) 208 { 209 Blk *b, **sb; 210 Ins *i; 211 Phi *p; 212 uint n; 213 int al; 214 int64_t sz; 215 216 /* assign slots to fast allocs */ 217 b = fn->start; 218 /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ 219 for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) 220 for (i=b->ins; i<&b->ins[b->nins]; i++) 221 if (i->op == al) { 222 if (rtype(i->arg[0]) != RCon) 223 break; 224 sz = fn->con[i->arg[0].val].bits.i; 225 if (sz < 0 || sz >= INT_MAX-15) 226 err("invalid alloc size %"PRId64, sz); 227 sz = (sz + n-1) & -n; 228 sz /= 4; 229 if (sz > INT_MAX - fn->slot) 230 die("alloc too large"); 231 fn->tmp[i->to.val].slot = fn->slot; 232 fn->slot += sz; 233 *i = (Ins){.op = Onop}; 234 } 235 236 for (b=fn->start; b; b=b->link) { 237 curi = &insb[NIns]; 238 for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) 239 for (p=(*sb)->phi; p; p=p->link) { 240 for (n=0; p->blk[n] != b; n++) 241 assert(n+1 < p->narg); 242 fixarg(&p->arg[n], p->cls, 0, fn); 243 } 244 seljmp(b, fn); 245 for (i=&b->ins[b->nins]; i!=b->ins;) 246 sel(*--i, fn); 247 b->nins = &insb[NIns] - curi; 248 idup(&b->ins, curi, b->nins); 249 } 250 251 if (debug['I']) { 252 fprintf(stderr, "\n> After instruction selection:\n"); 253 printfn(fn, stderr); 254 } 255 }