commit 61090c758d36f3919a222efe01c9794fdf7987ef
parent 96836855a55cd28f1449b4a58d1e5301669350c0
Author: Quentin Carbonneaux <quentin@c9x.me>
Date: Sat, 8 Apr 2017 21:31:59 -0400
new arm64 backend, yeepee
Diffstat:
M | Makefile | | | 17 | ++++++++++++++--- |
M | README | | | 4 | +++- |
A | arm64/abi.c | | | 703 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | arm64/all.h | | | 37 | +++++++++++++++++++++++++++++++++++++ |
A | arm64/emit.c | | | 455 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | arm64/isel.c | | | 266 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | arm64/targ.c | | | 51 | +++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | main.c | | | 2 | ++ |
M | ops.h | | | 3 | +++ |
M | parse.c | | | 3 | +++ |
10 files changed, 1537 insertions(+), 4 deletions(-)
diff --git a/Makefile b/Makefile
@@ -6,10 +6,12 @@ OBJDIR = obj
SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c \
fold.c live.c spill.c rega.c gas.c
AMD64SRC = amd64/targ.c amd64/sysv.c amd64/isel.c amd64/emit.c
-SRCALL = $(SRC) $(AMD64SRC)
+ARM64SRC = arm64/targ.c arm64/abi.c arm64/isel.c arm64/emit.c
+SRCALL = $(SRC) $(AMD64SRC) $(ARM64SRC)
AMD64OBJ = $(AMD64SRC:%.c=$(OBJDIR)/%.o)
-OBJ = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ)
+ARM64OBJ = $(ARM64SRC:%.c=$(OBJDIR)/%.o)
+OBJ = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ) $(ARM64OBJ)
CFLAGS += -Wall -Wextra -std=c99 -g -pedantic
@@ -24,10 +26,12 @@ $(OBJDIR)/%.o: %.c $(OBJDIR)/timestamp
$(OBJDIR)/timestamp:
@mkdir -p $(OBJDIR)
@mkdir -p $(OBJDIR)/amd64
+ @mkdir -p $(OBJDIR)/arm64
@touch $@
$(OBJ): all.h ops.h
$(AMD64OBJ): amd64/all.h
+$(ARM64OBJ): arm64/all.h
obj/main.o: config.h
config.h:
@@ -38,7 +42,14 @@ config.h:
;; \
*) \
echo "#define Defasm Gaself"; \
- echo "#define Deftgt T_amd64_sysv"; \
+ case `uname -m` in \
+ *aarch64*) \
+ echo "$define Deftgt T_arm64"; \
+ ;; \
+ *) \
+ echo "#define Deftgt T_amd64_sysv";\
+ ;; \
+ esac \
;; \
esac > $@
diff --git a/README b/README
@@ -3,7 +3,9 @@ QBE - Backend Compiler http://c9x.me/compile/
doc/ Documentation.
minic/ An example C frontend for QBE.
tools/ Miscellaneous tools (testing).
-test/ Unit tests.
+test/ Tests.
+amd64/
+arm64/ Architecture-specific code.
The LICENSE file applies to all files distributed.
diff --git a/arm64/abi.c b/arm64/abi.c
@@ -0,0 +1,703 @@
+#include "all.h"
+
+typedef struct Class_ Class;
+typedef struct Insl Insl;
+typedef struct Params Params;
+
+enum {
+ Cstk = 1, /* pass on the stack */
+ Cptr = 2, /* replaced by a pointer */
+};
+
+struct Class_ {
+ char class;
+ char ishfa;
+ struct {
+ char base;
+ uchar size;
+ } hfa;
+ uint size;
+ Typ *t;
+ uchar nreg;
+ uchar ngp;
+ uchar nfp;
+ int reg[4];
+ int cls[4];
+};
+
+struct Insl {
+ Ins i;
+ Insl *link;
+};
+
+struct Params {
+ uint ngp;
+ uint nfp;
+ uint nstk;
+};
+
+static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
+static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
+
+/* layout of call's second argument (RCall)
+ *
+ * 29 13 9 5 2 0
+ * |0.00|x|xxxx|xxxx|xxx|xx| range
+ * | | | | ` gp regs returned (0..2)
+ * | | | ` fp regs returned (0..4)
+ * | | ` gp regs passed (0..8)
+ * | ` fp regs passed (0..8)
+ * ` is x8 used (0..1)
+ */
+
+static int
+isfloatv(Typ *t, char *cls)
+{
+ Field *f;
+ uint n;
+
+ for (n=0; n<t->nunion; n++)
+ for (f=t->fields[n]; f->type != FEnd; f++)
+ switch (f->type) {
+ case Fs:
+ if (*cls == Kd)
+ return 0;
+ *cls = Ks;
+ break;
+ case Fd:
+ if (*cls == Ks)
+ return 0;
+ *cls = Kd;
+ break;
+ case FTyp:
+ if (isfloatv(&typ[f->len], cls))
+ break;
+ default:
+ return 0;
+ }
+ return 1;
+}
+
+static void
+typclass(Class *c, Typ *t, int *gp, int *fp)
+{
+ uint64_t sz;
+ uint n;
+
+ sz = (t->size + 7) & -8;
+ c->t = t;
+ c->class = 0;
+ c->ngp = 0;
+ c->nfp = 0;
+
+ if (t->align > 4)
+ err("alignments larger than 16 are not supported");
+
+ if (t->dark || sz > 16 || sz == 0) {
+ /* large structs are replaced by a
+ * pointer to some caller-allocated
+ * memory */
+ c->class |= Cptr;
+ c->size = 8;
+ return;
+ }
+
+ c->size = sz;
+ c->hfa.base = Kx;
+ c->ishfa = isfloatv(t, &c->hfa.base);
+ c->hfa.size = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
+
+ if (c->ishfa)
+ for (n=0; n<c->hfa.size; n++, c->nfp++) {
+ c->reg[n] = *fp++;
+ c->cls[n] = c->hfa.base;
+ }
+ else
+ for (n=0; n<sz/8; n++, c->ngp++) {
+ c->reg[n] = *gp++;
+ c->cls[n] = Kl;
+ }
+
+ c->nreg = n;
+}
+
+static void
+sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
+{
+ static int st[] = {
+ [Kw] = Ostorew, [Kl] = Ostorel,
+ [Ks] = Ostores, [Kd] = Ostored
+ };
+ uint n;
+ uint64_t off;
+ Ref r;
+
+ assert(nreg <= 4);
+ off = 0;
+ for (n=0; n<nreg; n++) {
+ tmp[n] = newtmp("abi", cls[n], fn);
+ r = newtmp("abi", Kl, fn);
+ emit(st[cls[n]], 0, R, tmp[n], r);
+ emit(Oadd, Kl, r, mem, getcon(off, fn));
+ off += KWIDE(cls[n]) ? 8 : 4;
+ }
+}
+
+static void
+ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
+{
+ int i;
+ uint64_t off;
+ Ref r;
+
+ off = 0;
+ for (i=0; i<n; i++) {
+ r = newtmp("abi", Kl, fn);
+ emit(Oload, cls[i], TMP(reg[i]), r, R);
+ emit(Oadd, Kl, r, mem, getcon(off, fn));
+ off += KWIDE(cls[i]) ? 8 : 4;
+ }
+}
+
+static void
+selret(Blk *b, Fn *fn)
+{
+ int j, k, cty;
+ Ref r;
+ Class cr;
+
+ j = b->jmp.type;
+
+ if (!isret(j) || j == Jret0)
+ return;
+
+ r = b->jmp.arg;
+ b->jmp.type = Jret0;
+
+ if (j == Jretc) {
+ typclass(&cr, &typ[fn->retty], gpreg, fpreg);
+ cty = (cr.nfp << 2) | cr.ngp;
+ if (cr.class & Cptr) {
+ assert(rtype(fn->retr) == RTmp);
+ blit(fn->retr, 0, r, cr.t->size, fn);
+ } else
+ ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
+ } else {
+ k = j - Jretw;
+ if (KBASE(k) == 0) {
+ emit(Ocopy, k, TMP(R0), r, R);
+ cty = 1;
+ } else {
+ emit(Ocopy, k, TMP(V0), r, R);
+ cty = 1 << 2;
+ }
+ }
+
+ b->jmp.arg = CALL(cty);
+}
+
+static int
+argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env)
+{
+ int ngp, nfp, *gp, *fp;
+ Class *c;
+ Ins *i;
+
+ gp = gpreg;
+ fp = fpreg;
+ ngp = 8;
+ nfp = 8;
+ for (i=i0, c=carg; i<i1; i++, c++)
+ switch (i->op) {
+ case Opar:
+ case Oarg:
+ c->cls[0] = i->cls;
+ c->size = 8;
+ if (KBASE(i->cls) == 0 && ngp > 0) {
+ ngp--;
+ c->reg[0] = *gp++;
+ break;
+ }
+ if (KBASE(i->cls) == 1 && nfp > 0) {
+ nfp--;
+ c->reg[0] = *fp++;
+ break;
+ }
+ c->class |= Cstk;
+ break;
+ case Oparc:
+ case Oargc:
+ typclass(c, &typ[i->arg[0].val], gp, fp);
+ if (c->class & Cptr) {
+ if (ngp > 0) {
+ ngp--;
+ c->reg[0] = *gp++;
+ c->cls[0] = Kl;
+ break;
+ }
+ } else if (c->ngp <= ngp) {
+ if (c->nfp <= nfp) {
+ ngp -= c->ngp;
+ nfp -= c->nfp;
+ gp += c->ngp;
+ fp += c->nfp;
+ break;
+ } else
+ nfp = 0;
+ } else
+ ngp = 0;
+ c->class |= Cstk;
+ break;
+ case Opare:
+ *env = i->to;
+ break;
+ case Oarge:
+ *env = i->arg[0];
+ break;
+ }
+
+ return ((gp-gpreg) << 5) | ((fp-fpreg) << 9);
+}
+
+bits
+arm64_retregs(Ref r, int p[2])
+{
+ bits b;
+ int ngp, nfp;
+
+ assert(rtype(r) == RCall);
+ ngp = r.val & 3;
+ nfp = (r.val >> 2) & 7;
+ if (p) {
+ p[0] = ngp;
+ p[1] = nfp;
+ }
+ b = 0;
+ while (ngp--)
+ b |= BIT(R0+ngp);
+ while (nfp--)
+ b |= BIT(V0+nfp);
+ return b;
+}
+
+bits
+arm64_argregs(Ref r, int p[2])
+{
+ bits b;
+ int ngp, nfp, x8;
+
+ assert(rtype(r) == RCall);
+ ngp = (r.val >> 5) & 15;
+ nfp = (r.val >> 9) & 15;
+ x8 = (r.val >> 13) & 1;
+ if (p) {
+ p[0] = ngp + x8;
+ p[1] = nfp;
+ }
+ b = 0;
+ while (ngp--)
+ b |= BIT(R0+ngp);
+ while (nfp--)
+ b |= BIT(V0+nfp);
+ return b | ((bits)x8 << R8);
+}
+
+static void
+stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
+{
+ Insl *il;
+ int al;
+
+ il = alloc(sizeof *il);
+ al = c->t->align - 2; /* NAlign == 3 */
+ if (al < 0)
+ al = 0;
+ il->i = (Ins){
+ Oalloc + al, r,
+ {getcon(c->t->size, fn)}, Kl
+ };
+ il->link = *ilp;
+ *ilp = il;
+}
+
+static void
+selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
+{
+ Ins *i;
+ Class *ca, *c, cr;
+ int cty, envc;
+ uint n;
+ uint64_t stk, off;
+ Ref r, rstk, env, tmp[4];
+
+ env = R;
+ ca = alloc((i1-i0) * sizeof ca[0]);
+ cty = argsclass(i0, i1, ca, &env);
+
+ stk = 0;
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if (c->class & Cptr) {
+ i->arg[0] = newtmp("abi", Kl, fn);
+ stkblob(i->arg[0], c, fn, ilp);
+ i->op = Oarg;
+ }
+ if (c->class & Cstk)
+ stk += c->size;
+ }
+ stk += stk & 15;
+ rstk = getcon(stk, fn);
+ if (stk)
+ emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
+
+ if (!req(i1->arg[1], R)) {
+ typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
+ stkblob(i1->to, &cr, fn, ilp);
+ cty |= (cr.nfp << 2) | cr.ngp;
+ if (cr.class & Cptr) {
+ cty |= 1 << 13;
+ emit(Ocopy, Kw, R, TMP(R0), R);
+ } else {
+ sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
+ for (n=0; n<cr.nreg; n++) {
+ r = TMP(cr.reg[n]);
+ emit(Ocopy, cr.cls[n], tmp[n], r, R);
+ }
+ }
+ } else {
+ if (KBASE(i1->cls) == 0) {
+ emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
+ cty |= 1;
+ } else {
+ emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
+ cty |= 1 << 2;
+ }
+ }
+
+ envc = !req(R, env);
+ if (envc)
+ die("todo (arm abi): env calls");
+ emit(Ocall, 0, R, i1->arg[0], CALL(cty));
+
+ if (cty & (1 << 13))
+ /* struct return argument */
+ emit(Ocopy, Kl, TMP(R8), i1->to, R);
+
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if ((c->class & Cstk) != 0)
+ continue;
+ if (i->op != Oargc)
+ emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
+ else
+ ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
+ }
+
+ off = 0;
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if ((c->class & Cstk) == 0)
+ continue;
+ if (i->op != Oargc) {
+ r = newtmp("abi", Kl, fn);
+ emit(Ostorel, 0, R, i->arg[0], r);
+ emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
+ } else
+ blit(TMP(SP), off, i->arg[1], c->size, fn);
+ off += c->size;
+ }
+ if (stk)
+ emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
+
+ for (i=i0, c=ca; i<i1; i++, c++)
+ if (c->class & Cptr)
+ blit(i->arg[0], 0, i->arg[1], c->t->size, fn);
+}
+
+static Params
+selpar(Fn *fn, Ins *i0, Ins *i1)
+{
+ Class *ca, *c, cr;
+ Insl *il;
+ Ins *i;
+ int n, s, cty;
+ Ref r, env, tmp[16], *t;
+
+ env = R;
+ ca = alloc((i1-i0) * sizeof ca[0]);
+ curi = &insb[NIns];
+
+ cty = argsclass(i0, i1, ca, &env);
+
+ il = 0;
+ t = tmp;
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if (i->op != Oparc || (c->class & (Cptr|Cstk)))
+ continue;
+ sttmps(t, c->cls, c->nreg, i->to, fn);
+ stkblob(i->to, c, fn, &il);
+ t += c->nreg;
+ }
+ for (; il; il=il->link)
+ emiti(il->i);
+
+ if (fn->retty >= 0) {
+ typclass(&cr, &typ[fn->retty], gpreg, fpreg);
+ if (cr.class & Cptr) {
+ fn->retr = newtmp("abi", Kl, fn);
+ emit(Ocopy, Kl, fn->retr, TMP(R8), R);
+ }
+ }
+
+ t = tmp;
+ for (i=i0, c=ca, s=2; i<i1; i++, c++) {
+ if (i->op == Oparc
+ && (c->class & Cptr) == 0) {
+ if (c->class & Cstk) {
+ fn->tmp[i->to.val].slot = -s;
+ s += c->size / 8;
+ } else
+ for (n=0; n<c->nreg; n++) {
+ r = TMP(c->reg[n]);
+ emit(Ocopy, c->cls[n], *t++, r, R);
+ }
+ } else if (c->class & Cstk) {
+ r = newtmp("abi", Kl, fn);
+ emit(Oload, *c->cls, i->to, r, R);
+ emit(Oaddr, Kl, r, SLOT(-s), R);
+ s++;
+ } else {
+ r = TMP(*c->reg);
+ emit(Ocopy, *c->cls, i->to, r, R);
+ }
+ }
+
+ if (!req(R, env))
+ die("todo (arm abi): env calls");
+
+ return (Params){
+ .nstk = s - 2,
+ .ngp = (cty >> 5) & 15,
+ .nfp = (cty >> 9) & 15
+ };
+}
+
+static Blk *
+split(Fn *fn, Blk *b)
+{
+ Blk *bn;
+
+ ++fn->nblk;
+ bn = blknew();
+ bn->nins = &insb[NIns] - curi;
+ idup(&bn->ins, curi, bn->nins);
+ curi = &insb[NIns];
+ bn->visit = ++b->visit;
+ snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
+ bn->loop = b->loop;
+ bn->link = b->link;
+ b->link = bn;
+ return bn;
+}
+
+static void
+chpred(Blk *b, Blk *bp, Blk *bp1)
+{
+ Phi *p;
+ uint a;
+
+ for (p=b->phi; p; p=p->link) {
+ for (a=0; p->blk[a]!=bp; a++)
+ assert(a+1<p->narg);
+ p->blk[a] = bp1;
+ }
+}
+
+static void
+selvaarg(Fn *fn, Blk *b, Ins *i)
+{
+ Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
+ Blk *b0, *bstk, *breg;
+ int isgp;
+
+ c8 = getcon(8, fn);
+ c16 = getcon(16, fn);
+ c24 = getcon(24, fn);
+ c28 = getcon(28, fn);
+ ap = i->arg[0];
+ isgp = KBASE(i->cls) == 0;
+
+ /* @b [...]
+ r0 =l add ap, (24 or 28)
+ nr =l loadsw r0
+ r1 =w csltw nr, 0
+ jnz r1, @breg, @bstk
+ @breg
+ r0 =l add ap, (8 or 16)
+ r1 =l loadl r0
+ lreg =l add r1, nr
+ r0 =w add nr, (8 or 16)
+ r1 =l add ap, (24 or 28)
+ storew r0, r1
+ @bstk
+ lstk =l loadl ap
+ r0 =l add lstk, 8
+ storel r0, ap
+ @b0
+ %loc =l phi @breg %lreg, @bstk %lstk
+ i->to =(i->cls) load %loc
+ */
+
+ loc = newtmp("abi", Kl, fn);
+ emit(Oload, i->cls, i->to, loc, R);
+ b0 = split(fn, b);
+ b0->jmp = b->jmp;
+ b0->s1 = b->s1;
+ b0->s2 = b->s2;
+ if (b->s1)
+ chpred(b->s1, b, b0);
+ if (b->s2 && b->s2 != b->s1)
+ chpred(b->s2, b, b0);
+
+ lreg = newtmp("abi", Kl, fn);
+ nr = newtmp("abi", Kl, fn);
+ r0 = newtmp("abi", Kw, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorew, Kw, R, r0, r1);
+ emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
+ emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Oadd, Kl, lreg, r1, nr);
+ emit(Oload, Kl, r1, r0, R);
+ emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
+ breg = split(fn, b);
+ breg->jmp.type = Jjmp;
+ breg->s1 = b0;
+
+ lstk = newtmp("abi", Kl, fn);
+ r0 = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, r0, ap);
+ emit(Oadd, Kl, r0, lstk, c8);
+ emit(Oload, Kl, lstk, ap, R);
+ bstk = split(fn, b);
+ bstk->jmp.type = Jjmp;
+ bstk->s1 = b0;
+
+ b0->phi = alloc(sizeof *b0->phi);
+ *b0->phi = (Phi){
+ .cls = Kl, .to = loc,
+ .narg = 2,
+ .blk = {bstk, breg},
+ .arg = {lstk, lreg},
+ };
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kw, fn);
+ b->jmp.type = Jjnz;
+ b->jmp.arg = r1;
+ b->s1 = breg;
+ b->s2 = bstk;
+ emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
+ emit(Oloadsw, Kl, nr, r0, R);
+ emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
+}
+
+static void
+selvastart(Fn *fn, Params p, Ref ap)
+{
+ Ref r0, r1, rsave;
+
+ rsave = newtmp("abi", Kl, fn);
+
+ r0 = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, r0, ap);
+ emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn));
+
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, r1, r0);
+ emit(Oadd, Kl, r1, rsave, getcon(64, fn));
+ emit(Oadd, Kl, r0, ap, getcon(8, fn));
+
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, r1, r0);
+ emit(Oadd, Kl, r1, rsave, getcon(192, fn));
+ emit(Oaddr, Kl, rsave, SLOT(-1), R);
+ emit(Oadd, Kl, r0, ap, getcon(16, fn));
+
+ r0 = newtmp("abi", Kl, fn);
+ emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
+ emit(Oadd, Kl, r0, ap, getcon(24, fn));
+
+ r0 = newtmp("abi", Kl, fn);
+ emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
+ emit(Oadd, Kl, r0, ap, getcon(28, fn));
+}
+
+void
+arm64_abi(Fn *fn)
+{
+ Blk *b;
+ Ins *i, *i0, *ip;
+ Insl *il;
+ int n;
+ Params p;
+
+ for (b=fn->start; b; b=b->link)
+ b->visit = 0;
+
+ /* lower parameters */
+ for (b=fn->start, i=b->ins; i-b->ins<b->nins; i++)
+ if (!ispar(i->op))
+ break;
+ p = selpar(fn, b->ins, i);
+ n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
+ i0 = alloc(n * sizeof(Ins));
+ ip = icpy(ip = i0, curi, &insb[NIns] - curi);
+ ip = icpy(ip, i, &b->ins[b->nins] - i);
+ b->nins = n;
+ b->ins = i0;
+
+ /* lower calls, returns, and vararg instructions */
+ il = 0;
+ b = fn->start;
+ do {
+ if (!(b = b->link))
+ b = fn->start; /* do it last */
+ if (b->visit)
+ continue;
+ curi = &insb[NIns];
+ selret(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;)
+ switch ((--i)->op) {
+ default:
+ emiti(*i);
+ break;
+ case Ocall:
+ case Ovacall:
+ for (i0=i; i0>b->ins; i0--)
+ if (!isarg((i0-1)->op))
+ break;
+ selcall(fn, i0, i, &il);
+ i = i0;
+ break;
+ case Ovastart:
+ selvastart(fn, p, i->arg[0]);
+ break;
+ case Ovaarg:
+ selvaarg(fn, b, i);
+ break;
+ case Oarg:
+ case Oargc:
+ die("unreachable");
+ }
+ if (b == fn->start)
+ for (; il; il=il->link)
+ emiti(il->i);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ } while (b != fn->start);
+
+ if (debug['A']) {
+ fprintf(stderr, "\n> After ABI lowering:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/arm64/all.h b/arm64/all.h
@@ -0,0 +1,37 @@
+#include "../all.h"
+
+enum Arm64Reg {
+ R0 = RXX + 1,
+ R1, R2, R3, R4, R5, R6, R7,
+ R8, R9, R10, R11, R12, R13, R14, R15,
+ IP0, IP1, R18, R19, R20, R21, R22, R23,
+ R24, R25, R26, R27, R28, FP, LR, SP,
+
+ V0, V1, V2, V3, V4, V5, V6, V7,
+ V8, V9, V10, V11, V12, V13, V14, V15,
+ V16, V17, V18, V19, V20, V21, V22, V23,
+ V24, V25, V26, V27, V28, V29, V30, /* V31, */
+
+ NFPR = V30 - V0 + 1,
+ NGPR = SP - R0 + 1,
+ NGPS = R18 - R0 + 1,
+ NFPS = (V7 - V0 + 1) + (V30 - V16 + 1),
+ NCLR = (R28 - R19 + 1) + (V15 - V8 + 1),
+};
+MAKESURE(reg_not_tmp, V30 < (int)Tmp0);
+
+/* targ.c */
+extern int arm64_rsave[];
+extern int arm64_rclob[];
+
+/* abi.c */
+bits arm64_retregs(Ref, int[2]);
+bits arm64_argregs(Ref, int[2]);
+void arm64_abi(Fn *);
+
+/* isel.c */
+int arm64_logimm(uint64_t, int);
+void arm64_isel(Fn *);
+
+/* emit.c */
+void arm64_emitfn(Fn *, FILE *);
diff --git a/arm64/emit.c b/arm64/emit.c
@@ -0,0 +1,455 @@
+#include "all.h"
+
+typedef struct E E;
+
+struct E {
+ FILE *f;
+ Fn *fn;
+ uint64_t frame;
+ uint padding;
+};
+
+#define CMP(X) \
+ X(Cieq, "eq") \
+ X(Cine, "ne") \
+ X(Cisge, "ge") \
+ X(Cisgt, "gt") \
+ X(Cisle, "le") \
+ X(Cislt, "lt") \
+ X(Ciuge, "cs") \
+ X(Ciugt, "hi") \
+ X(Ciule, "ls") \
+ X(Ciult, "cc") \
+ X(NCmpI+Cfeq, "eq") \
+ X(NCmpI+Cfge, "ge") \
+ X(NCmpI+Cfgt, "gt") \
+ X(NCmpI+Cfle, "ls") \
+ X(NCmpI+Cflt, "mi") \
+ X(NCmpI+Cfne, "ne") \
+ X(NCmpI+Cfo, "vc") \
+ X(NCmpI+Cfuo, "vs")
+
+enum {
+ Ki = -1, /* matches Kw and Kl */
+ Ka = -2, /* matches all classes */
+};
+
+static struct {
+ short op;
+ short cls;
+ char *asm;
+} omap[] = {
+ { Oadd, Ki, "add %=, %0, %1" },
+ { Oadd, Ka, "fadd %=, %0, %1" },
+ { Osub, Ki, "sub %=, %0, %1" },
+ { Osub, Ka, "fsub %=, %0, %1" },
+ { Oand, Ki, "and %=, %0, %1" },
+ { Oor, Ki, "orr %=, %0, %1" },
+ { Oxor, Ki, "eor %=, %0, %1" },
+ { Osar, Ki, "asr %=, %0, %1" },
+ { Oshr, Ki, "lsr %=, %0, %1" },
+ { Oshl, Ki, "lsl %=, %0, %1" },
+ { Omul, Ki, "mul %=, %0, %1" },
+ { Omul, Ka, "fmul %=, %0, %1" },
+ { Odiv, Ki, "sdiv %=, %0, %1" },
+ { Odiv, Ka, "fdiv %=, %0, %1" },
+ { Oudiv, Ki, "udiv %=, %0, %1" },
+ { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
+ { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" },
+ { Ocopy, Ki, "mov %=, %0" },
+ { Ocopy, Ka, "fmov %=, %0" },
+ { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" },
+ { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" },
+ { Ostoreb, Kw, "strb %W0, %M1" },
+ { Ostoreh, Kw, "strh %W0, %M1" },
+ { Ostorew, Kw, "str %W0, %M1" },
+ { Ostorel, Kw, "str %L0, %M1" },
+ { Ostores, Kw, "str %S0, %M1" },
+ { Ostored, Kw, "str %D0, %M1" },
+ { Oloadsb, Ki, "ldrsb %=, %M0" },
+ { Oloadub, Ki, "ldrb %=, %M0" },
+ { Oloadsh, Ki, "ldrsh %=, %M0" },
+ { Oloaduh, Ki, "ldrh %=, %M0" },
+ { Oloadsw, Kw, "ldr %=, %M0" },
+ { Oloadsw, Kl, "ldrsw %=, %M0" },
+ { Oloaduw, Ki, "ldr %W=, %M0" },
+ { Oload, Ka, "ldr %=, %M0" },
+ { Oextsb, Ki, "sxtb %=, %W0" },
+ { Oextub, Ki, "uxtb %W=, %W0" },
+ { Oextsh, Ki, "sxth %=, %W0" },
+ { Oextuh, Ki, "uxth %W=, %W0" },
+ { Oextsw, Ki, "sxtw %L=, %W0" },
+ { Oextuw, Ki, "mov %W=, %W0" },
+ { Oexts, Kd, "fcvt %=, %S0" },
+ { Ocast, Kw, "fmov %=, %S0" },
+ { Ocast, Kl, "fmov %=, %D0" },
+ { Ocast, Ks, "fmov %=, %W0" },
+ { Ocast, Kd, "fmov %=, %L0" },
+ { Ostosi, Ka, "fcvtzs %=, %S0" },
+ { Odtosi, Ka, "fcvtzs %=, %D0" },
+ { Oswtof, Ka, "scvtf %=, %W0" },
+ { Osltof, Ka, "scvtf %=, %L0" },
+ { Ocall, Kw, "blr %L0" },
+
+ { Oacmp, Ki, "cmp %0, %1" },
+ { Oacmn, Ki, "cmn %0, %1" },
+ { Oafcmp, Ka, "fcmpe %0, %1" },
+
+#define X(c, str) \
+ { Oflag+c, Ki, "cset %=, " str },
+ CMP(X)
+#undef X
+ { NOp, 0, 0 }
+};
+
+static char *
+rname(int r, int k)
+{
+ static char buf[4];
+
+ if (r == SP) {
+ assert(k == Kl);
+ sprintf(buf, "sp");
+ }
+ else if (R0 <= r && r <= LR)
+ switch (k) {
+ default: die("invalid class");
+ case Kw: sprintf(buf, "w%d", r-R0); break;
+ case Kx:
+ case Kl: sprintf(buf, "x%d", r-R0); break;
+ }
+ else if (V0 <= r && r <= V30)
+ switch (k) {
+ default: die("invalid class");
+ case Ks: sprintf(buf, "s%d", r-V0); break;
+ case Kx:
+ case Kd: sprintf(buf, "d%d", r-V0); break;
+ }
+ else
+ die("invalid register");
+ return buf;
+}
+
+static uint64_t
+slot(int s, E *e)
+{
+ s = ((int32_t)s << 3) >> 3;
+ if (s == -1)
+ return 16 + e->frame;
+ if (s < 0) {
+ if (e->fn->vararg)
+ return 16 + e->frame + 192 - (s+2)*8;
+ else
+ return 16 + e->frame - (s+2)*8;
+ } else
+ return 16 + e->padding + 4 * s;
+}
+
+static void
+emitf(char *s, Ins *i, E *e)
+{
+ Ref r;
+ int k, c;
+ Con *pc;
+ unsigned n, sp;
+
+ fputc('\t', e->f);
+
+ sp = 0;
+ for (;;) {
+ k = i->cls;
+ while ((c = *s++) != '%')
+ if (c == ' ' && !sp) {
+ fputc('\t', e->f);
+ sp = 1;
+ } else if ( !c) {
+ fputc('\n', e->f);
+ return;
+ } else
+ fputc(c, e->f);
+ Switch:
+ switch ((c = *s++)) {
+ default:
+ die("invalid escape");
+ case 'W':
+ k = Kw;
+ goto Switch;
+ case 'L':
+ k = Kl;
+ goto Switch;
+ case 'S':
+ k = Ks;
+ goto Switch;
+ case 'D':
+ k = Kd;
+ goto Switch;
+ case '?':
+ if (KBASE(k) == 0)
+ fputs(rname(R18, k), e->f);
+ else
+ fputs(k==Ks ? "s31" : "d31", e->f);
+ break;
+ case '=':
+ case '0':
+ r = c == '=' ? i->to : i->arg[0];
+ assert(isreg(r));
+ fputs(rname(r.val, k), e->f);
+ break;
+ case '1':
+ r = i->arg[1];
+ switch (rtype(r)) {
+ default:
+ die("invalid second argument");
+ case RTmp:
+ assert(isreg(r));
+ fputs(rname(r.val, k), e->f);
+ break;
+ case RCon:
+ pc = &e->fn->con[r.val];
+ n = pc->bits.i;
+ assert(pc->type == CBits);
+ if (n & 0xfff000)
+ fprintf(e->f, "#%u, lsl #12", n>>12);
+ else
+ fprintf(e->f, "#%u", n);
+ break;
+ }
+ break;
+ case 'M':
+ c = *s++;
+ assert(c == '0' || c == '1');
+ r = i->arg[c - '0'];
+ assert(isreg(r) && "TODO emit non reg addresses");
+ fprintf(e->f, "[%s]", rname(r.val, Kl));
+ break;
+ }
+ }
+}
+
+static void
+loadcon(Con *c, int r, int k, FILE *f)
+{
+ char *rn, *p, off[32];
+ int64_t n;
+ int w, sh;
+
+ w = KWIDE(k);
+ rn = rname(r, k);
+ n = c->bits.i;
+ if (c->type == CAddr) {
+ rn = rname(r, Kl);
+ if (n)
+ sprintf(off, "+%"PRIi64, n);
+ else
+ off[0] = 0;
+ p = c->local ? ".L" : "";
+ fprintf(f, "\tadrp\t%s, %s%s%s\n",
+ rn, p, c->label, off);
+ fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n",
+ rn, rn, p, c->label, off);
+ return;
+ }
+ assert(c->type == CBits);
+ if (!w)
+ n = (int32_t)n;
+ if ((n | 0xffff) == -1 || arm64_logimm(n, k)) {
+ fprintf(f, "\tmov\t%s, #%"PRIi64"\n", rn, n);
+ } else {
+ fprintf(f, "\tmov\t%s, #%d\n",
+ rn, (int)(n & 0xffff));
+ for (sh=16; n>>=16; sh+=16) {
+ if ((!w && sh == 32) || sh == 64)
+ break;
+ fprintf(f, "\tmovk\t%s, #0x%x, lsl #%d\n",
+ rn, (unsigned)(n & 0xffff), sh);
+ }
+ }
+}
+
+static void
+emitins(Ins *i, E *e)
+{
+ int o;
+
+ switch (i->op) {
+ default:
+ Table:
+ /* most instructions are just pulled out of
+ * the table omap[], some special cases are
+ * detailed below */
+ for (o=0;; o++) {
+ /* this linear search should really be a binary
+ * search */
+ if (omap[o].op == NOp)
+ die("no match for %s(%c)",
+ optab[i->op].name, "wlsd"[i->cls]);
+ if (omap[o].op == i->op)
+ if (omap[o].cls == i->cls || omap[o].cls == Ka
+ || (omap[o].cls == Ki && KBASE(i->cls) == 0))
+ break;
+ }
+ emitf(omap[o].asm, i, e);
+ break;
+ case Onop:
+ break;
+ case Ocopy:
+ if (req(i->to, i->arg[0]))
+ break;
+ if (rtype(i->arg[0]) != RCon)
+ goto Table;
+ loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f);
+ break;
+ case Oaddr:
+ assert(rtype(i->arg[0]) == RSlot);
+ fprintf(e->f, "\tadd\t%s, x29, #%"PRIu64"\n",
+ rname(i->to.val, Kl), slot(i->arg[0].val, e)
+ );
+ break;
+ }
+}
+
+static void
+framelayout(E *e)
+{
+ int *r;
+ uint o;
+ uint64_t f;
+
+ for (o=0, r=arm64_rclob; *r>=0; r++)
+ o += 1 & (e->fn->reg >> *r);
+ f = e->fn->slot;
+ f = (f + 3) & -4;
+ o += o & 1;
+ e->padding = 4*(f-e->fn->slot);
+ e->frame = 4*f + 8*o;
+}
+
+/*
+
+ Stack-frame layout:
+
+ +=============+
+ | varargs |
+ | save area |
+ +-------------+
+ | callee-save | ^
+ | registers | |
+ +-------------+ |
+ | ... | |
+ | spill slots | |
+ | ... | | e->frame
+ +-------------+ |
+ | ... | |
+ | locals | |
+ | ... | |
+ +-------------+ |
+ | e->padding | v
+ +-------------+
+ | saved x29 |
+ | saved x30 |
+ +=============+ <- x29
+
+*/
+
+void
+arm64_emitfn(Fn *fn, FILE *out)
+{
+ static char *ctoa[] = {
+ #define X(c, s) [c] = s,
+ CMP(X)
+ #undef X
+ };
+ static int id0;
+ int n, c, lbl, *r;
+ uint64_t o;
+ Blk *b, *s;
+ Ins *i;
+ E *e;
+
+ e = &(E){.f = out, .fn = fn};
+ framelayout(e);
+
+ fprintf(e->f, ".text\n");
+ if (e->fn->export)
+ fprintf(e->f, ".globl %s\n", e->fn->name);
+ fprintf(e->f, "%s:\n", e->fn->name);
+
+ if (e->fn->vararg) {
+ for (n=7; n>=0; n--)
+ fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n", n);
+ for (n=7; n>=0; n--)
+ fprintf(e->f, "\tstr\tx%d, [sp, -8]!\n", n);
+ }
+
+ if (e->frame + 16 > 512)
+ fprintf(e->f,
+ "\tsub\tsp, sp, #%"PRIu64"\n"
+ "\tstp\tx29, x30, [sp, -16]!\n",
+ e->frame
+ );
+ else
+ fprintf(e->f,
+ "\tstp\tx29, x30, [sp, -%"PRIu64"]!\n",
+ e->frame + 16
+ );
+ fputs("\tadd\tx29, sp, 0\n", e->f);
+ for (o=e->frame+16, r=arm64_rclob; *r>=0; r++)
+ if (e->fn->reg & BIT(*r))
+ fprintf(e->f,
+ "\tstr\t%s, [sp, %"PRIu64"]\n",
+ rname(*r, Kx), o -= 8
+ );
+
+ for (lbl=0, b=e->fn->start; b; b=b->link) {
+ if (lbl || b->npred > 1)
+ fprintf(e->f, ".L%d:\n", id0+b->id);
+ for (i=b->ins; i!=&b->ins[b->nins]; i++)
+ emitins(i, e);
+ lbl = 1;
+ switch (b->jmp.type) {
+ case Jret0:
+ for (o=e->frame+16, r=arm64_rclob; *r>=0; r++)
+ if (e->fn->reg & BIT(*r))
+ fprintf(e->f,
+ "\tldr\t%s, [sp, %"PRIu64"]\n",
+ rname(*r, Kx), o -= 8
+ );
+ o = e->frame + 16;
+ if (e->fn->vararg)
+ o += 192;
+ if (o > 504)
+ fprintf(e->f,
+ "\tldp\tx29, x30, [sp], 16\n"
+ "\tadd\tsp, sp, #%"PRIu64"\n",
+ o - 16
+ );
+ else
+ fprintf(e->f,
+ "\tldp\tx29, x30, [sp], %"PRIu64"\n",
+ o
+ );
+ fprintf(e->f, "\tret\n");
+ break;
+ case Jjmp:
+ Jmp:
+ if (b->s1 != b->link)
+ fprintf(e->f, "\tb\t.L%d\n", id0+b->s1->id);
+ else
+ lbl = 0;
+ break;
+ default:
+ c = b->jmp.type - Jjf;
+ if (c < 0 || c > NCmp)
+ die("unhandled jump %d", b->jmp.type);
+ if (b->link == b->s2) {
+ s = b->s1;
+ b->s1 = b->s2;
+ b->s2 = s;
+ } else
+ c = cmpneg(c);
+ fprintf(e->f, "\tb%s\t.L%d\n", ctoa[c], id0+b->s2->id);
+ goto Jmp;
+ }
+ }
+ id0 += e->fn->nblk;
+}
diff --git a/arm64/isel.c b/arm64/isel.c
@@ -0,0 +1,266 @@
+#include "all.h"
+
+enum Imm {
+ Iother,
+ Iplo12,
+ Iphi12,
+ Iplo24,
+ Inlo12,
+ Inhi12,
+ Inlo24
+};
+
+static enum Imm
+imm(Con *c, int k, int64_t *pn)
+{
+ int64_t n;
+ int i;
+
+ if (c->type != CBits)
+ return Iother;
+ n = c->bits.i;
+ if (k == Kw)
+ n = (int32_t)n;
+ i = Iplo12;
+ if (n < 0) {
+ i = Inlo12;
+ n = -n;
+ }
+ *pn = n;
+ if ((n & 0x000fff) == n)
+ return i;
+ if ((n & 0xfff000) == n)
+ return i + 1;
+ if ((n & 0xffffff) == n)
+ return i + 2;
+ return Iother;
+}
+
+int
+arm64_logimm(uint64_t x, int k)
+{
+ uint64_t n;
+
+ if (k == Kw)
+ x = (x & 0xffffffff) | x << 32;
+ if (x & 1)
+ x = ~x;
+ if (x == 0)
+ return 0;
+ if (x == 0xaaaaaaaaaaaaaaaa)
+ return 1;
+ n = x & 0xf;
+ if (0x1111111111111111 * n == x)
+ goto Check;
+ n = x & 0xff;
+ if (0x0101010101010101 * n == x)
+ goto Check;
+ n = x & 0xffff;
+ if (0x0001000100010001 * n == x)
+ goto Check;
+ n = x & 0xffffffff;
+ if (0x0000000100000001 * n == x)
+ goto Check;
+ n = x;
+Check:
+ return (n & (n + (n & -n))) == 0;
+}
+
+static void
+fixarg(Ref *pr, int k, int phi, Fn *fn)
+{
+ Ref r0, r1, r2;
+ int s, n;
+ Con *c;
+
+ r0 = *pr;
+ switch (rtype(r0)) {
+ case RCon:
+ if (KBASE(k) == 0 && phi)
+ return;
+ r1 = newtmp("isel", k, fn);
+ if (KBASE(k) == 0) {
+ emit(Ocopy, k, r1, r0, R);
+ } else {
+ c = &fn->con[r0.val];
+ n = gasstashfp(c->bits.i, KWIDE(k));
+ vgrow(&fn->con, ++fn->ncon);
+ c = &fn->con[fn->ncon-1];
+ *c = (Con){.type = CAddr, .local = 1};
+ sprintf(c->label, "fp%d", n);
+ r2 = newtmp("isel", Kl, fn);
+ emit(Oload, k, r1, r2, R);
+ emit(Ocopy, Kl, r2, CON(c-fn->con), R);
+ }
+ *pr = r1;
+ break;
+ case RTmp:
+ s = fn->tmp[r0.val].slot;
+ if (s == -1)
+ break;
+ r1 = newtmp("isel", Kl, fn);
+ emit(Oaddr, Kl, r1, SLOT(s), R);
+ *pr = r1;
+ break;
+ }
+}
+
+static int
+selcmp(Ref arg[2], int k, Fn *fn)
+{
+ Ref r, *iarg;
+ Con *c;
+ int swap, cmp, fix;
+ int64_t n;
+
+ if (KBASE(k) == 1) {
+ emit(Oafcmp, k, R, arg[0], arg[1]);
+ iarg = curi->arg;
+ fixarg(&iarg[0], k, 0, fn);
+ fixarg(&iarg[1], k, 0, fn);
+ return 0;
+ }
+ swap = rtype(arg[0]) == RCon;
+ if (swap) {
+ r = arg[1];
+ arg[1] = arg[0];
+ arg[0] = r;
+ }
+ fix = 1;
+ cmp = Oacmp;
+ r = arg[1];
+ if (rtype(r) == RCon) {
+ c = &fn->con[r.val];
+ switch (imm(c, k, &n)) {
+ default:
+ break;
+ case Iplo12:
+ case Iphi12:
+ fix = 0;
+ break;
+ case Inlo12:
+ case Inhi12:
+ cmp = Oacmn;
+ r = getcon(n, fn);
+ fix = 0;
+ break;
+ }
+ }
+ emit(cmp, k, R, arg[0], r);
+ iarg = curi->arg;
+ fixarg(&iarg[0], k, 0, fn);
+ if (fix)
+ fixarg(&iarg[1], k, 0, fn);
+ return swap;
+}
+
+static void
+sel(Ins i, Fn *fn)
+{
+ Ref *iarg;
+ Ins *i0;
+ int ck, cc;
+
+ if (iscmp(i.op, &ck, &cc)) {
+ emit(Oflag, i.cls, i.to, R, R);
+ i0 = curi;
+ if (selcmp(i.arg, ck, fn))
+ i0->op += cmpop(cc);
+ else
+ i0->op += cc;
+ } else if (i.op != Onop) {
+ emiti(i);
+ iarg = curi->arg; /* fixarg() can change curi */
+ fixarg(&iarg[0], argcls(&i, 0), 0, fn);
+ fixarg(&iarg[1], argcls(&i, 1), 0, fn);
+ }
+}
+
+static void
+seljmp(Blk *b, Fn *fn)
+{
+ Ref r;
+ Ins *i, *ir;
+ int ck, cc, use;
+
+ switch (b->jmp.type) {
+ default:
+ assert(0 && "TODO 2");
+ break;
+ case Jret0:
+ case Jjmp:
+ return;
+ case Jjnz:
+ break;
+ }
+ r = b->jmp.arg;
+ use = -1;
+ b->jmp.arg = R;
+ ir = 0;
+ i = &b->ins[b->nins];
+ while (i > b->ins)
+ if (req((--i)->to, r)) {
+ use = fn->tmp[r.val].nuse;
+ ir = i;
+ break;
+ }
+ if (ir && use == 1
+ && iscmp(ir->op, &ck, &cc)) {
+ if (selcmp(ir->arg, ck, fn))
+ cc = cmpop(cc);
+ b->jmp.type = Jjf + cc;
+ *ir = (Ins){.op = Onop};
+ }
+ else {
+ selcmp((Ref[]){r, CON_Z}, Kw, fn);
+ b->jmp.type = Jjfine;
+ }
+}
+
+void
+arm64_isel(Fn *fn)
+{
+ Blk *b, **sb;
+ Ins *i;
+ Phi *p;
+ uint n, al;
+ int64_t sz;
+
+ /* assign slots to fast allocs */
+ b = fn->start;
+ /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
+ for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
+ for (i=b->ins; i-b->ins < b->nins; i++)
+ if (i->op == al) {
+ if (rtype(i->arg[0]) != RCon)
+ break;
+ sz = fn->con[i->arg[0].val].bits.i;
+ if (sz < 0 || sz >= INT_MAX-15)
+ err("invalid alloc size %"PRId64, sz);
+ sz = (sz + n-1) & -n;
+ sz /= 4;
+ fn->tmp[i->to.val].slot = fn->slot;
+ fn->slot += sz;
+ *i = (Ins){.op = Onop};
+ }
+
+ for (b=fn->start; b; b=b->link) {
+ curi = &insb[NIns];
+ for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
+ for (p=(*sb)->phi; p; p=p->link) {
+ for (n=0; p->blk[n] != b; n++)
+ assert(n+1 < p->narg);
+ fixarg(&p->arg[n], p->cls, 1, fn);
+ }
+ seljmp(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;)
+ sel(*--i, fn);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ }
+
+ if (debug['I']) {
+ fprintf(stderr, "\n> After instruction selection:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/arm64/targ.c b/arm64/targ.c
@@ -0,0 +1,51 @@
+#include "all.h"
+
+int arm64_rsave[] = {
+ R0, R1, R2, R3, R4, R5, R6, R7,
+ R8, R9, R10, R11, R12, R13, R14, R15,
+ IP0, IP1, R18,
+ V0, V1, V2, V3, V4, V5, V6, V7,
+ V16, V17, V18, V19, V20, V21, V22, V23,
+ V24, V25, V26, V27, V28, V29, V30,
+ -1
+};
+int arm64_rclob[] = {
+ R19, R20, R21, R22, R23, R24, R25, R26,
+ R27, R28,
+ V8, V9, V10, V11, V12, V13, V14, V15,
+ -1
+};
+
+#define RGLOB (BIT(FP) | BIT(SP) | BIT(R18))
+
+static int
+arm64_memargs(int op)
+{
+ (void)op;
+ return 0;
+}
+
+Target T_arm64 = {
+ .gpr0 = R0,
+ .ngpr = NGPR,
+ .fpr0 = V0,
+ .nfpr = NFPR,
+ .rglob = RGLOB,
+ .nrglob = 3,
+ .rsave = arm64_rsave,
+ .nrsave = {NGPS, NFPS},
+ .retregs = arm64_retregs,
+ .argregs = arm64_argregs,
+ .memargs = arm64_memargs,
+ .abi = arm64_abi,
+ .isel = arm64_isel,
+ .emitfn = arm64_emitfn,
+};
+
+MAKESURE(globals_are_not_arguments,
+ (RGLOB & (BIT(R8+1) - 1)) == 0
+);
+MAKESURE(arrays_size_ok,
+ sizeof arm64_rsave == (NGPS+NFPS+1) * sizeof(int) &&
+ sizeof arm64_rclob == (NCLR+1) * sizeof(int)
+);
diff --git a/main.c b/main.c
@@ -6,12 +6,14 @@
Target T;
extern Target T_amd64_sysv;
+extern Target T_arm64;
static struct TMap {
char *name;
Target *T;
} tmap[] = {
{ "amd64_sysv", &T_amd64_sysv },
+ { "arm64", &T_arm64 },
{ 0, 0 }
};
diff --git a/ops.h b/ops.h
@@ -126,6 +126,9 @@ O(xidiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
O(xdiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
O(xcmp, T(w,l,s,d, w,l,s,d), 0) X(1, 1, 0)
O(xtest, T(w,l,e,e, w,l,e,e), 0) X(1, 1, 0)
+O(acmp, T(w,l,e,e, w,l,e,e), 0) X(0, 0, 0)
+O(acmn, T(w,l,e,e, w,l,e,e), 0) X(0, 0, 0)
+O(afcmp, T(e,e,s,d, e,e,s,d), 0) X(0, 0, 0)
/* Arguments, Parameters, and Calls */
O(par, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
diff --git a/parse.c b/parse.c
@@ -1203,6 +1203,9 @@ printfn(Fn *fn, FILE *f)
case Oarg:
case Oswap:
case Oxcmp:
+ case Oacmp:
+ case Oacmn:
+ case Oafcmp:
case Oxtest:
case Oxdiv:
case Oxidiv: