commit 0a81036fa2dfeb44befad86f0bf3e48b2f4ffed9
parent 729aa97b799f72afdec3604f96526760701f36bc
Author: Quentin Carbonneaux <quentin.carbonneaux@yale.edu>
Date: Thu, 31 Mar 2016 17:17:09 -0400
move abi code in a new file
Diffstat:
| M | Makefile | | | 3 | ++- |
| M | all.h | | | 6 | +++++- |
| M | isel.c | | | 515 | +------------------------------------------------------------------------------ |
| M | main.c | | | 1 | + |
| M | parse.c | | | 65 | ++++++++++++++++++++++++++++++----------------------------------- |
| A | sysv.c | | | 512 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | util.c | | | 13 | ++++++++++--- |
7 files changed, 561 insertions(+), 554 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,9 +1,10 @@
BIN = qbe
+ABI = sysv
V = @
OBJDIR = obj
-SRC = main.c util.c parse.c mem.c ssa.c copy.c live.c isel.c spill.c rega.c emit.c
+SRC = main.c util.c parse.c mem.c ssa.c copy.c live.c isel.c spill.c rega.c emit.c $(ABI).c
OBJ = $(SRC:%.c=$(OBJDIR)/%.o)
CFLAGS += -Wall -Wextra -std=c99 -g -pedantic
diff --git a/all.h b/all.h
@@ -495,6 +495,7 @@ void *vnew(ulong, size_t);
void vgrow(void *, ulong);
int phicls(int, Tmp *);
Ref newtmp(char *, int, Fn *);
+void chuse(Ref, int, Fn *);
Ref getcon(int64_t, Fn *);
void addcon(Con *, Con *);
void dumpts(BSet *, Tmp *, FILE *);
@@ -541,11 +542,14 @@ void copy(Fn *);
void liveon(BSet *, Blk *, Blk *);
void filllive(Fn *);
-/* isel.c */
+/* abi: sysv.c */
extern int rsave[/* NRSave */];
extern int rclob[/* NRClob */];
bits retregs(Ref, int[2]);
bits argregs(Ref, int[2]);
+void abi(Fn *);
+
+/* isel.c */
void isel(Fn *);
/* spill.c */
diff --git a/isel.c b/isel.c
@@ -20,8 +20,6 @@
*/
typedef struct ANum ANum;
-typedef struct AClass AClass;
-typedef struct RAlloc RAlloc;
struct ANum {
char n, l, r;
@@ -157,13 +155,6 @@ fixarg(Ref *r, int k, int phi, Fn *fn)
}
static void
-chuse(Ref r, int du, Fn *fn)
-{
- if (rtype(r) == RTmp)
- fn->tmp[r.val].nuse += du;
-}
-
-static void
seladdr(Ref *r, ANum *an, Fn *fn)
{
Addr a;
@@ -357,146 +348,6 @@ flagi(Ins *i0, Ins *i)
return 0;
}
-struct AClass {
- int inmem;
- int align;
- uint size;
- int cls[2];
-};
-
-static void
-aclass(AClass *a, Typ *t)
-{
- int e, s, n, cls;
- uint sz, al;
-
- sz = t->size;
- al = 1u << t->align;
-
- /* the ABI requires sizes to be rounded
- * up to the nearest multiple of 8, moreover
- * it makes it easy load and store structures
- * in registers
- */
- if (al < 8)
- al = 8;
- sz = (sz + al-1) & -al;
-
- a->size = sz;
- a->align = t->align;
-
- if (t->dark || sz > 16) {
- /* large or unaligned structures are
- * required to be passed in memory
- */
- a->inmem = 1;
- return;
- }
-
- a->inmem = 0;
- for (e=0, s=0; e<2; e++) {
- cls = -1;
- for (n=0; n<8 && t->seg[s].len; s++) {
- if (t->seg[s].ispad) {
- /* don't change anything */
- }
- else if (t->seg[s].isflt) {
- if (cls == -1)
- cls = Kd;
- }
- else
- cls = Kl;
- n += t->seg[s].len;
- }
- assert(n <= 8);
- a->cls[e] = cls;
- }
-}
-
-static void
-blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
-{
- Ref r, r1;
- uint boff;
-
- /* it's an impolite blit, we might go across the end
- * of the source object a little bit... */
- for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
- r = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(OStorel, 0, R, r, r1);
- emit(OAdd, Kl, r1, rstk, getcon(soff, fn));
- r1 = newtmp("abi", Kl, fn);
- emit(OLoad, Kl, r, r1, R);
- emit(OAdd, Kl, r1, rsrc, getcon(boff, fn));
- chuse(rsrc, +1, fn);
- chuse(rstk, +1, fn);
- }
-}
-
-static int
-retr(Ref reg[2], AClass *aret)
-{
- static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
- int n, k, ca, nr[2];
-
- nr[0] = nr[1] = 0;
- ca = 0;
- for (n=0; aret->cls[n]>=0 && n<2; n++) {
- k = KBASE(aret->cls[n]);
- reg[n] = TMP(retreg[k][nr[k]++]);
- ca += 1 << (2 * k);
- }
- return ca;
-}
-
-static void
-selret(Blk *b, Fn *fn)
-{
- int j, k, ca;
- Ref r, r0, reg[2];
- AClass aret;
-
- j = b->jmp.type;
-
- if (!isret(j) || j == JRet0)
- return;
-
- r0 = b->jmp.arg;
- b->jmp.type = JRet0;
-
- if (j == JRetc) {
- aclass(&aret, &typ[fn->retty]);
- if (aret.inmem) {
- assert(rtype(fn->retr) == RTmp);
- emit(OCopy, Kl, TMP(RAX), fn->retr, R);
- chuse(fn->retr, +1, fn);
- blit(fn->retr, 0, r0, aret.size, fn);
- ca = 1;
- } else {
- ca = retr(reg, &aret);
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- emit(OLoad, Kl, reg[1], r, R);
- emit(OAdd, Kl, r, r0, getcon(8, fn));
- chuse(r0, +1, fn);
- }
- emit(OLoad, Kl, reg[0], r0, R);
- }
- } else {
- k = j - JRetw;
- if (KBASE(k) == 0) {
- emit(OCopy, k, TMP(RAX), r0, R);
- ca = 1;
- } else {
- emit(OCopy, k, TMP(XMM0), r0, R);
- ca = 1 << 2;
- }
- }
-
- b->jmp.arg = CALL(ca);
-}
-
static void
seljmp(Blk *b, Fn *fn)
{
@@ -557,320 +408,6 @@ seljmp(Blk *b, Fn *fn)
}
static int
-classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret)
-{
- int nint, ni, nsse, ns, n, *pn;
- AClass *a;
- Ins *i;
-
- if (aret && aret->inmem)
- nint = 5; /* hidden argument */
- else
- nint = 6;
- nsse = 8;
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (i->op == op) {
- if (KBASE(i->cls) == 0)
- pn = &nint;
- else
- pn = &nsse;
- if (*pn > 0) {
- --*pn;
- a->inmem = 0;
- } else
- a->inmem = 2;
- a->align = 3;
- a->size = 8;
- a->cls[0] = i->cls;
- } else {
- n = i->arg[0].val & AMask;
- aclass(a, &typ[n]);
- if (a->inmem)
- continue;
- ni = ns = 0;
- for (n=0; n<2; n++)
- if (KBASE(a->cls[n]) == 0)
- ni++;
- else
- ns++;
- if (nint >= ni && nsse >= ns) {
- nint -= ni;
- nsse -= ns;
- } else
- a->inmem = 1;
- }
- }
-
- return ((6-nint) << 4) | ((8-nsse) << 8);
-}
-
-int rsave[] = {
- RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
-};
-int rclob[] = {RBX, R12, R13, R14, R15};
-
-MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
-MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
-
-bits
-retregs(Ref r, int p[2])
-{
- bits b;
- int ni, nf;
-
- assert(rtype(r) == RACall);
- b = 0;
- ni = r.val & 3;
- nf = (r.val >> 2) & 3;
- if (ni >= 1)
- b |= BIT(RAX);
- if (ni >= 2)
- b |= BIT(RDX);
- if (nf >= 1)
- b |= BIT(XMM0);
- if (nf >= 2)
- b |= BIT(XMM1);
- if (p) {
- p[0] = ni;
- p[1] = nf;
- }
- return b;
-}
-
-bits
-argregs(Ref r, int p[2])
-{
- bits b;
- int j, ni, nf;
-
- assert(rtype(r) == RACall);
- b = 0;
- ni = (r.val >> 4) & 15;
- nf = (r.val >> 8) & 15;
- for (j=0; j<ni; j++)
- b |= BIT(rsave[j]);
- for (j=0; j<nf; j++)
- b |= BIT(XMM0+j);
- if (p) {
- p[0] = ni + 1;
- p[1] = nf;
- }
- return b | BIT(RAX);
-}
-
-static Ref
-rarg(int ty, int *ni, int *ns)
-{
- if (KBASE(ty) == 0)
- return TMP(rsave[(*ni)++]);
- else
- return TMP(XMM0 + (*ns)++);
-}
-
-struct RAlloc {
- Ins i;
- RAlloc *link;
-};
-
-static void
-selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
-{
- Ins *i;
- AClass *ac, *a, aret;
- int ca, ni, ns;
- uint stk, off;
- Ref r, r1, r2, reg[2], regcp[2];
- RAlloc *ra;
-
- ac = alloc((i1-i0) * sizeof ac[0]);
- if (!req(i1->arg[1], R)) {
- assert(rtype(i1->arg[1]) == RAType);
- aclass(&aret, &typ[i1->arg[1].val & AMask]);
- ca = classify(i0, i1, ac, OArg, &aret);
- } else
- ca = classify(i0, i1, ac, OArg, 0);
-
- for (stk=0, a=&ac[i1-i0]; a>ac;)
- if ((--a)->inmem) {
- assert(a->align <= 4);
- stk += a->size;
- if (a->align == 4)
- stk += stk & 15;
- }
- stk += stk & 15;
- if (stk) {
- r = getcon(-(int64_t)stk, fn);
- emit(OSAlloc, Kl, R, r, R);
- }
-
- if (!req(i1->arg[1], R)) {
- if (aret.inmem) {
- /* get the return location from eax
- * it saves one callee-save reg */
- r1 = newtmp("abi", Kl, fn);
- emit(OCopy, Kl, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- regcp[1] = newtmp("abi", aret.cls[1], fn);
- emit(OStorel, 0, R, regcp[1], r);
- emit(OAdd, Kl, r, i1->to, getcon(8, fn));
- chuse(i1->to, +1, fn);
- ca += 1 << (2 * KBASE(aret.cls[1]));
- }
- regcp[0] = newtmp("abi", aret.cls[0], fn);
- emit(OStorel, 0, R, regcp[0], i1->to);
- ca += 1 << (2 * KBASE(aret.cls[0]));
- retr(reg, &aret);
- if (aret.size > 8)
- emit(OCopy, aret.cls[1], regcp[1], reg[1], R);
- emit(OCopy, aret.cls[0], regcp[0], reg[0], R);
- r1 = i1->to;
- }
- /* allocate return pad */
- ra = alloc(sizeof *ra);
- /* specific to NAlign == 3 */
- aret.align -= 2;
- if (aret.align < 0)
- aret.align = 0;
- ra->i.op = OAlloc + aret.align;
- ra->i.cls = Kl;
- ra->i.to = r1;
- ra->i.arg[0] = getcon(aret.size, fn);
- ra->link = (*rap);
- *rap = ra;
- } else {
- ra = 0;
- if (KBASE(i1->cls) == 0) {
- emit(OCopy, i1->cls, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- emit(OCopy, i1->cls, i1->to, TMP(XMM0), R);
- ca += 1 << 2;
- }
- }
- emit(OCall, i1->cls, R, i1->arg[0], CALL(ca));
- emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
-
- ni = ns = 0;
- if (ra && aret.inmem)
- emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (a->inmem)
- continue;
- r1 = rarg(a->cls[0], &ni, &ns);
- if (i->op == OArgc) {
- if (a->size > 8) {
- r2 = rarg(a->cls[1], &ni, &ns);
- r = newtmp("abi", Kl, fn);
- emit(OLoad, a->cls[1], r2, r, R);
- emit(OAdd, Kl, r, i->arg[1], getcon(8, fn));
- chuse(i->arg[1], +1, fn);
- }
- emit(OLoad, a->cls[0], r1, i->arg[1], R);
- } else
- emit(OCopy, i->cls, r1, i->arg[0], R);
- }
-
- if (!stk)
- return;
-
- r = newtmp("abi", Kl, fn);
- chuse(r, -1, fn);
- for (i=i0, a=ac, off=0; i<i1; i++, a++) {
- if (!a->inmem)
- continue;
- if (i->op == OArgc) {
- if (a->align == 4)
- off += off & 15;
- blit(r, off, i->arg[1], a->size, fn);
- } else {
- r1 = newtmp("abi", Kl, fn);
- emit(OStorel, 0, R, i->arg[0], r1);
- emit(OAdd, Kl, r1, r, getcon(off, fn));
- chuse(r, +1, fn);
- }
- off += a->size;
- }
- emit(OSAlloc, Kl, r, getcon(stk, fn), R);
-}
-
-static void
-selpar(Fn *fn, Ins *i0, Ins *i1)
-{
- AClass *ac, *a, aret;
- Ins *i;
- int ni, ns, s, al;
- Ref r, r1;
-
- ac = alloc((i1-i0) * sizeof ac[0]);
- curi = insb;
- ni = ns = 0;
-
- if (fn->retty >= 0) {
- aclass(&aret, &typ[fn->retty]);
- if (aret.inmem) {
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl};
- fn->retr = r;
- }
- classify(i0, i1, ac, OPar, &aret);
- } else
- classify(i0, i1, ac, OPar, 0);
-
- /* specific to NAlign == 3 */
-
- s = 4;
- for (i=i0, a=ac; i<i1; i++, a++) {
- switch (a->inmem) {
- case 1:
- assert(a->align <= 4);
- if (a->align == 4)
- s = (s+3) & -4;
- fn->tmp[i->to.val].slot = -s; /* HACK! */
- s += a->size / 4;
- continue;
- case 2:
- *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls};
- s += 2;
- continue;
- }
- r1 = rarg(a->cls[0], &ni, &ns);
- if (i->op == OParc) {
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OCopy, r, {r1}, Kl};
- a->cls[0] = r.val;
- if (a->size > 8) {
- r1 = rarg(a->cls[1], &ni, &ns);
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OCopy, r, {r1}, Kl};
- a->cls[1] = r.val;
- }
- } else
- *curi++ = (Ins){OCopy, i->to, {r1}, i->cls};
- }
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (i->op != OParc || a->inmem)
- continue;
- for (al=0; a->align >> (al+2); al++)
- ;
- r = TMP(a->cls[0]);
- r1 = i->to;
- *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl};
- *curi++ = (Ins){OStorel, R, {r, r1}, 0};
- if (a->size > 8) {
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl};
- r1 = TMP(a->cls[1]);
- *curi++ = (Ins){OStorel, R, {r1, r}, 0};
- }
- }
-}
-
-static int
aref(Ref r, ANum *ai)
{
switch (rtype(r)) {
@@ -1031,62 +568,12 @@ void
isel(Fn *fn)
{
Blk *b, **sb;
- Ins *i, *i0, *ip;
+ Ins *i;
Phi *p;
uint a;
int n, al;
int64_t sz;
ANum *ainfo;
- RAlloc *ral;
-
- for (n=0; n<fn->ntmp; n++)
- fn->tmp[n].slot = -1;
- fn->slot = 0;
-
- /* lower arguments */
- for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
- if (i->op != OPar && i->op != OParc)
- break;
- selpar(fn, b->ins, i);
- n = b->nins - (i - b->ins) + (curi - insb);
- i0 = alloc(n * sizeof(Ins));
- ip = icpy(ip = i0, insb, curi - insb);
- ip = icpy(ip, i, &b->ins[b->nins] - i);
- b->nins = n;
- b->ins = i0;
-
- /* lower function calls and returns */
- ral = 0;
- b = fn->start;
- do {
- if (!(b = b->link))
- b = fn->start; /* do it last */
- curi = &insb[NIns];
- selret(b, fn);
- for (i=&b->ins[b->nins]; i!=b->ins;) {
- if ((--i)->op == OCall) {
- for (i0=i; i0>b->ins; i0--)
- if ((i0-1)->op != OArg)
- if ((i0-1)->op != OArgc)
- break;
- selcall(fn, i0, i, &ral);
- i = i0;
- continue;
- }
- assert(i->op != OArg && i->op != OArgc);
- emiti(*i);
- }
- if (b == fn->start)
- for (; ral; ral=ral->link)
- emiti(ral->i);
- b->nins = &insb[NIns] - curi;
- idup(&b->ins, curi, b->nins);
- } while (b != fn->start);
-
- if (debug['A']) {
- fprintf(stderr, "\n> After call lowering:\n");
- printfn(fn, stderr);
- }
/* assign slots to fast allocs */
b = fn->start;
diff --git a/main.c b/main.c
@@ -49,6 +49,7 @@ func(Fn *fn)
filluse(fn);
copy(fn);
filluse(fn);
+ abi(fn);
isel(fn);
filllive(fn);
fillcost(fn);
diff --git a/parse.c b/parse.c
@@ -143,10 +143,7 @@ static struct {
} tokval;
static int lnum;
-static Tmp *tmp;
-static Con *con;
-static int ntmp;
-static int ncon;
+static Fn *curf;
static Phi **plink;
static Blk **bmap;
static Blk *curb;
@@ -360,11 +357,11 @@ tmpref(char *v)
{
int t;
- for (t=Tmp0; t<ntmp; t++)
- if (strcmp(v, tmp[t].name) == 0)
+ for (t=Tmp0; t<curf->ntmp; t++)
+ if (strcmp(v, curf->tmp[t].name) == 0)
return TMP(t);
- vgrow(&tmp, ++ntmp);
- strcpy(tmp[t].name, v);
+ newtmp(0, Kw, curf);
+ strcpy(curf->tmp[t].name, v);
return TMP(t);
}
@@ -396,13 +393,13 @@ parseref()
c.type = CAddr;
strcpy(c.label, tokval.str);
Look:
- for (i=0; i<ncon; i++)
- if (con[i].type == c.type
- && con[i].bits.i == c.bits.i
- && strcmp(con[i].label, c.label) == 0)
+ for (i=0; i<curf->ncon; i++)
+ if (curf->con[i].type == c.type
+ && curf->con[i].bits.i == c.bits.i
+ && strcmp(curf->con[i].label, c.label) == 0)
return CON(i);
- vgrow(&con, ++ncon);
- con[i] = c;
+ vgrow(&curf->con, ++curf->ncon);
+ curf->con[i] = c;
return CON(i);
default:
return R;
@@ -648,29 +645,31 @@ DoOp:
static Fn *
parsefn(int export)
{
+ int r;
PState ps;
- Fn *fn;
- ntmp = Tmp0;
- ncon = 1; /* first constant must be 0 */
curb = 0;
nblk = 0;
curi = insb;
- tmp = vnew(ntmp, sizeof tmp[0]);
- con = vnew(ncon, sizeof con[0]);
+ curf = alloc(sizeof *curf);
+ curf->ntmp = 0;
+ curf->ncon = 1; /* first constant must be 0 */
+ curf->tmp = vnew(curf->ntmp, sizeof curf->tmp[0]);
+ curf->con = vnew(curf->ncon, sizeof curf->con[0]);
+ for (r=0; r<Tmp0; r++)
+ newtmp(0, r < XMM0 ? Kl : Kd, curf);
bmap = vnew(nblk, sizeof bmap[0]);
- con[0].type = CBits;
- fn = alloc(sizeof *fn);
- fn->export = export;
- blink = &fn->start;
- fn->retty = -1;
+ curf->con[0].type = CBits;
+ curf->export = export;
+ blink = &curf->start;
+ curf->retty = -1;
if (peek() != TGlo)
- rcls = parsecls(&fn->retty);
+ rcls = parsecls(&curf->retty);
else
rcls = 5;
if (next() != TGlo)
err("function name expected");
- strcpy(fn->name, tokval.str);
+ strcpy(curf->name, tokval.str);
parserefl(0);
if (nextnl() != TLBrace)
err("function body must start with {");
@@ -682,15 +681,11 @@ parsefn(int export)
err("empty file");
if (curb->jmp.type == JXXX)
err("last block misses jump");
- fn->tmp = tmp;
- fn->con = con;
- fn->mem = vnew(0, sizeof fn->mem[0]);
- fn->ntmp = ntmp;
- fn->ncon = ncon;
- fn->nmem = 0;
- fn->nblk = nblk;
- fn->rpo = 0;
- return fn;
+ curf->mem = vnew(0, sizeof curf->mem[0]);
+ curf->nmem = 0;
+ curf->nblk = nblk;
+ curf->rpo = 0;
+ return curf;
}
static void
diff --git a/sysv.c b/sysv.c
@@ -0,0 +1,512 @@
+#include "all.h"
+
+typedef struct AClass AClass;
+typedef struct RAlloc RAlloc;
+
+struct AClass {
+ int inmem;
+ int align;
+ uint size;
+ int cls[2];
+};
+
+static void
+aclass(AClass *a, Typ *t)
+{
+ int e, s, n, cls;
+ uint sz, al;
+
+ sz = t->size;
+ al = 1u << t->align;
+
+ /* the ABI requires sizes to be rounded
+ * up to the nearest multiple of 8, moreover
+ * it makes it easy load and store structures
+ * in registers
+ */
+ if (al < 8)
+ al = 8;
+ sz = (sz + al-1) & -al;
+
+ a->size = sz;
+ a->align = t->align;
+
+ if (t->dark || sz > 16) {
+ /* large or unaligned structures are
+ * required to be passed in memory
+ */
+ a->inmem = 1;
+ return;
+ }
+
+ a->inmem = 0;
+ for (e=0, s=0; e<2; e++) {
+ cls = -1;
+ for (n=0; n<8 && t->seg[s].len; s++) {
+ if (t->seg[s].ispad) {
+ /* don't change anything */
+ }
+ else if (t->seg[s].isflt) {
+ if (cls == -1)
+ cls = Kd;
+ }
+ else
+ cls = Kl;
+ n += t->seg[s].len;
+ }
+ assert(n <= 8);
+ a->cls[e] = cls;
+ }
+}
+
+static void
+blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
+{
+ Ref r, r1;
+ uint boff;
+
+ /* it's an impolite blit, we might go across the end
+ * of the source object a little bit... */
+ for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
+ r = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(OStorel, 0, R, r, r1);
+ emit(OAdd, Kl, r1, rstk, getcon(soff, fn));
+ r1 = newtmp("abi", Kl, fn);
+ emit(OLoad, Kl, r, r1, R);
+ emit(OAdd, Kl, r1, rsrc, getcon(boff, fn));
+ chuse(rsrc, +1, fn);
+ chuse(rstk, +1, fn);
+ }
+}
+
+static int
+retr(Ref reg[2], AClass *aret)
+{
+ static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
+ int n, k, ca, nr[2];
+
+ nr[0] = nr[1] = 0;
+ ca = 0;
+ for (n=0; aret->cls[n]>=0 && n<2; n++) {
+ k = KBASE(aret->cls[n]);
+ reg[n] = TMP(retreg[k][nr[k]++]);
+ ca += 1 << (2 * k);
+ }
+ return ca;
+}
+
+static void
+selret(Blk *b, Fn *fn)
+{
+ int j, k, ca;
+ Ref r, r0, reg[2];
+ AClass aret;
+
+ j = b->jmp.type;
+
+ if (!isret(j) || j == JRet0)
+ return;
+
+ r0 = b->jmp.arg;
+ b->jmp.type = JRet0;
+
+ if (j == JRetc) {
+ aclass(&aret, &typ[fn->retty]);
+ if (aret.inmem) {
+ assert(rtype(fn->retr) == RTmp);
+ emit(OCopy, Kl, TMP(RAX), fn->retr, R);
+ chuse(fn->retr, +1, fn);
+ blit(fn->retr, 0, r0, aret.size, fn);
+ ca = 1;
+ } else {
+ ca = retr(reg, &aret);
+ if (aret.size > 8) {
+ r = newtmp("abi", Kl, fn);
+ emit(OLoad, Kl, reg[1], r, R);
+ emit(OAdd, Kl, r, r0, getcon(8, fn));
+ chuse(r0, +1, fn);
+ }
+ emit(OLoad, Kl, reg[0], r0, R);
+ }
+ } else {
+ k = j - JRetw;
+ if (KBASE(k) == 0) {
+ emit(OCopy, k, TMP(RAX), r0, R);
+ ca = 1;
+ } else {
+ emit(OCopy, k, TMP(XMM0), r0, R);
+ ca = 1 << 2;
+ }
+ }
+
+ b->jmp.arg = CALL(ca);
+}
+
+static int
+classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret)
+{
+ int nint, ni, nsse, ns, n, *pn;
+ AClass *a;
+ Ins *i;
+
+ if (aret && aret->inmem)
+ nint = 5; /* hidden argument */
+ else
+ nint = 6;
+ nsse = 8;
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (i->op == op) {
+ if (KBASE(i->cls) == 0)
+ pn = &nint;
+ else
+ pn = &nsse;
+ if (*pn > 0) {
+ --*pn;
+ a->inmem = 0;
+ } else
+ a->inmem = 2;
+ a->align = 3;
+ a->size = 8;
+ a->cls[0] = i->cls;
+ } else {
+ n = i->arg[0].val & AMask;
+ aclass(a, &typ[n]);
+ if (a->inmem)
+ continue;
+ ni = ns = 0;
+ for (n=0; n<2; n++)
+ if (KBASE(a->cls[n]) == 0)
+ ni++;
+ else
+ ns++;
+ if (nint >= ni && nsse >= ns) {
+ nint -= ni;
+ nsse -= ns;
+ } else
+ a->inmem = 1;
+ }
+ }
+
+ return ((6-nint) << 4) | ((8-nsse) << 8);
+}
+
+int rsave[] = {
+ RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
+};
+int rclob[] = {RBX, R12, R13, R14, R15};
+
+MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
+MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
+
+bits
+retregs(Ref r, int p[2])
+{
+ bits b;
+ int ni, nf;
+
+ assert(rtype(r) == RACall);
+ b = 0;
+ ni = r.val & 3;
+ nf = (r.val >> 2) & 3;
+ if (ni >= 1)
+ b |= BIT(RAX);
+ if (ni >= 2)
+ b |= BIT(RDX);
+ if (nf >= 1)
+ b |= BIT(XMM0);
+ if (nf >= 2)
+ b |= BIT(XMM1);
+ if (p) {
+ p[0] = ni;
+ p[1] = nf;
+ }
+ return b;
+}
+
+bits
+argregs(Ref r, int p[2])
+{
+ bits b;
+ int j, ni, nf;
+
+ assert(rtype(r) == RACall);
+ b = 0;
+ ni = (r.val >> 4) & 15;
+ nf = (r.val >> 8) & 15;
+ for (j=0; j<ni; j++)
+ b |= BIT(rsave[j]);
+ for (j=0; j<nf; j++)
+ b |= BIT(XMM0+j);
+ if (p) {
+ p[0] = ni + 1;
+ p[1] = nf;
+ }
+ return b | BIT(RAX);
+}
+
+static Ref
+rarg(int ty, int *ni, int *ns)
+{
+ if (KBASE(ty) == 0)
+ return TMP(rsave[(*ni)++]);
+ else
+ return TMP(XMM0 + (*ns)++);
+}
+
+struct RAlloc {
+ Ins i;
+ RAlloc *link;
+};
+
+static void
+selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
+{
+ Ins *i;
+ AClass *ac, *a, aret;
+ int ca, ni, ns;
+ uint stk, off;
+ Ref r, r1, r2, reg[2], regcp[2];
+ RAlloc *ra;
+
+ ac = alloc((i1-i0) * sizeof ac[0]);
+ if (!req(i1->arg[1], R)) {
+ assert(rtype(i1->arg[1]) == RAType);
+ aclass(&aret, &typ[i1->arg[1].val & AMask]);
+ ca = classify(i0, i1, ac, OArg, &aret);
+ } else
+ ca = classify(i0, i1, ac, OArg, 0);
+
+ for (stk=0, a=&ac[i1-i0]; a>ac;)
+ if ((--a)->inmem) {
+ assert(a->align <= 4);
+ stk += a->size;
+ if (a->align == 4)
+ stk += stk & 15;
+ }
+ stk += stk & 15;
+ if (stk) {
+ r = getcon(-(int64_t)stk, fn);
+ emit(OSAlloc, Kl, R, r, R);
+ }
+
+ if (!req(i1->arg[1], R)) {
+ if (aret.inmem) {
+ /* get the return location from eax
+ * it saves one callee-save reg */
+ r1 = newtmp("abi", Kl, fn);
+ emit(OCopy, Kl, i1->to, TMP(RAX), R);
+ ca += 1;
+ } else {
+ if (aret.size > 8) {
+ r = newtmp("abi", Kl, fn);
+ regcp[1] = newtmp("abi", aret.cls[1], fn);
+ emit(OStorel, 0, R, regcp[1], r);
+ emit(OAdd, Kl, r, i1->to, getcon(8, fn));
+ chuse(i1->to, +1, fn);
+ ca += 1 << (2 * KBASE(aret.cls[1]));
+ }
+ regcp[0] = newtmp("abi", aret.cls[0], fn);
+ emit(OStorel, 0, R, regcp[0], i1->to);
+ ca += 1 << (2 * KBASE(aret.cls[0]));
+ retr(reg, &aret);
+ if (aret.size > 8)
+ emit(OCopy, aret.cls[1], regcp[1], reg[1], R);
+ emit(OCopy, aret.cls[0], regcp[0], reg[0], R);
+ r1 = i1->to;
+ }
+ /* allocate return pad */
+ ra = alloc(sizeof *ra);
+ /* specific to NAlign == 3 */
+ aret.align -= 2;
+ if (aret.align < 0)
+ aret.align = 0;
+ ra->i.op = OAlloc + aret.align;
+ ra->i.cls = Kl;
+ ra->i.to = r1;
+ ra->i.arg[0] = getcon(aret.size, fn);
+ ra->link = (*rap);
+ *rap = ra;
+ } else {
+ ra = 0;
+ if (KBASE(i1->cls) == 0) {
+ emit(OCopy, i1->cls, i1->to, TMP(RAX), R);
+ ca += 1;
+ } else {
+ emit(OCopy, i1->cls, i1->to, TMP(XMM0), R);
+ ca += 1 << 2;
+ }
+ }
+ emit(OCall, i1->cls, R, i1->arg[0], CALL(ca));
+ emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
+
+ ni = ns = 0;
+ if (ra && aret.inmem)
+ emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (a->inmem)
+ continue;
+ r1 = rarg(a->cls[0], &ni, &ns);
+ if (i->op == OArgc) {
+ if (a->size > 8) {
+ r2 = rarg(a->cls[1], &ni, &ns);
+ r = newtmp("abi", Kl, fn);
+ emit(OLoad, a->cls[1], r2, r, R);
+ emit(OAdd, Kl, r, i->arg[1], getcon(8, fn));
+ chuse(i->arg[1], +1, fn);
+ }
+ emit(OLoad, a->cls[0], r1, i->arg[1], R);
+ } else
+ emit(OCopy, i->cls, r1, i->arg[0], R);
+ }
+
+ if (!stk)
+ return;
+
+ r = newtmp("abi", Kl, fn);
+ chuse(r, -1, fn);
+ for (i=i0, a=ac, off=0; i<i1; i++, a++) {
+ if (!a->inmem)
+ continue;
+ if (i->op == OArgc) {
+ if (a->align == 4)
+ off += off & 15;
+ blit(r, off, i->arg[1], a->size, fn);
+ } else {
+ r1 = newtmp("abi", Kl, fn);
+ emit(OStorel, 0, R, i->arg[0], r1);
+ emit(OAdd, Kl, r1, r, getcon(off, fn));
+ chuse(r, +1, fn);
+ }
+ off += a->size;
+ }
+ emit(OSAlloc, Kl, r, getcon(stk, fn), R);
+}
+
+static void
+selpar(Fn *fn, Ins *i0, Ins *i1)
+{
+ AClass *ac, *a, aret;
+ Ins *i;
+ int ni, ns, s, al;
+ Ref r, r1;
+
+ ac = alloc((i1-i0) * sizeof ac[0]);
+ curi = insb;
+ ni = ns = 0;
+
+ if (fn->retty >= 0) {
+ aclass(&aret, &typ[fn->retty]);
+ if (aret.inmem) {
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl};
+ fn->retr = r;
+ }
+ classify(i0, i1, ac, OPar, &aret);
+ } else
+ classify(i0, i1, ac, OPar, 0);
+
+ /* specific to NAlign == 3 */
+
+ s = 4;
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ switch (a->inmem) {
+ case 1:
+ assert(a->align <= 4);
+ if (a->align == 4)
+ s = (s+3) & -4;
+ fn->tmp[i->to.val].slot = -s; /* HACK! */
+ s += a->size / 4;
+ continue;
+ case 2:
+ *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls};
+ s += 2;
+ continue;
+ }
+ r1 = rarg(a->cls[0], &ni, &ns);
+ if (i->op == OParc) {
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OCopy, r, {r1}, Kl};
+ a->cls[0] = r.val;
+ if (a->size > 8) {
+ r1 = rarg(a->cls[1], &ni, &ns);
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OCopy, r, {r1}, Kl};
+ a->cls[1] = r.val;
+ }
+ } else
+ *curi++ = (Ins){OCopy, i->to, {r1}, i->cls};
+ }
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (i->op != OParc || a->inmem)
+ continue;
+ for (al=0; a->align >> (al+2); al++)
+ ;
+ r = TMP(a->cls[0]);
+ r1 = i->to;
+ *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl};
+ *curi++ = (Ins){OStorel, R, {r, r1}, 0};
+ if (a->size > 8) {
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl};
+ r1 = TMP(a->cls[1]);
+ *curi++ = (Ins){OStorel, R, {r1, r}, 0};
+ }
+ }
+}
+
+void
+abi(Fn *fn)
+{
+ Blk *b;
+ Ins *i, *i0, *ip;
+ RAlloc *ral;
+ int n;
+
+ /* lower arguments */
+ for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
+ if (i->op != OPar && i->op != OParc)
+ break;
+ selpar(fn, b->ins, i);
+ n = b->nins - (i - b->ins) + (curi - insb);
+ i0 = alloc(n * sizeof(Ins));
+ ip = icpy(ip = i0, insb, curi - insb);
+ ip = icpy(ip, i, &b->ins[b->nins] - i);
+ b->nins = n;
+ b->ins = i0;
+
+ /* lower calls and returns */
+ ral = 0;
+ b = fn->start;
+ do {
+ if (!(b = b->link))
+ b = fn->start; /* do it last */
+ curi = &insb[NIns];
+ selret(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ if ((--i)->op == OCall) {
+ for (i0=i; i0>b->ins; i0--)
+ if ((i0-1)->op != OArg)
+ if ((i0-1)->op != OArgc)
+ break;
+ selcall(fn, i0, i, &ral);
+ i = i0;
+ continue;
+ }
+ assert(i->op != OArg && i->op != OArgc);
+ emiti(*i);
+ }
+ if (b == fn->start)
+ for (; ral; ral=ral->link)
+ emiti(ral->i);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ } while (b != fn->start);
+
+ if (debug['A']) {
+ fprintf(stderr, "\n> After ABI lowering:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/util.c b/util.c
@@ -187,7 +187,8 @@ newtmp(char *prfx, int k, Fn *fn)
t = fn->ntmp++;
vgrow(&fn->tmp, fn->ntmp);
- sprintf(fn->tmp[t].name, "%s%d", prfx, ++n);
+ if (prfx)
+ sprintf(fn->tmp[t].name, "%s%d", prfx, ++n);
fn->tmp[t].cls = k;
fn->tmp[t].slot = -1;
fn->tmp[t].nuse = +1;
@@ -195,6 +196,13 @@ newtmp(char *prfx, int k, Fn *fn)
return TMP(t);
}
+void
+chuse(Ref r, int du, Fn *fn)
+{
+ if (rtype(r) == RTmp)
+ fn->tmp[r.val].nuse += du;
+}
+
Ref
getcon(int64_t val, Fn *fn)
{
@@ -203,8 +211,7 @@ getcon(int64_t val, Fn *fn)
for (c=0; c<fn->ncon; c++)
if (fn->con[c].type == CBits && fn->con[c].bits.i == val)
return CON(c);
- fn->ncon++;
- vgrow(&fn->con, fn->ncon);
+ vgrow(&fn->con, ++fn->ncon);
fn->con[c] = (Con){.type = CBits, .bits.i = val};
return CON(c);
}