commit a9e4fa9715bf0f56f546dce72149da09af5349a3
parent e6debbbb02b4b0b118546e05fa88b51428abddca
Author: Quentin Carbonneaux <quentin@c9x.me>
Date: Thu, 10 Mar 2022 22:49:08 +0100
rv64: plug holes in the abi
Many things got fixed, but the most
notable change is the proper support
of floating point types in aggregates.
Minor fixes:
- selpar() did not deal correctly
with Cfpint
- typclass() was reading out of
bounds in the gp/fp arrays
- support for env calls
Diffstat:
M | rv64/abi.c | | | 345 | +++++++++++++++++++++++++++++++++++++++++++++---------------------------------- |
1 file changed, 196 insertions(+), 149 deletions(-)
diff --git a/rv64/abi.c b/rv64/abi.c
@@ -1,5 +1,7 @@
#include "all.h"
+/* the risc-v lp64d abi */
+
typedef struct Class Class;
typedef struct Insl Insl;
typedef struct Params Params;
@@ -14,13 +16,13 @@ enum {
struct Class {
char class;
- uint size;
- Typ *t;
- uchar nreg;
- uchar ngp;
- uchar nfp;
+ Typ *type;
int reg[2];
int cls[2];
+ int off[2];
+ char ngp; /* only valid after typclass() */
+ char nfp; /* ditto */
+ char nreg;
};
struct Insl {
@@ -34,17 +36,18 @@ struct Params {
int stk; /* stack offset for varargs */
};
-static int gpreg[] = { A0, A1, A2, A3, A4, A5, A6, A7};
-static int fpreg[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
+static int gpreg[10] = {A0, A1, A2, A3, A4, A5, A6, A7};
+static int fpreg[10] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
/* layout of call's second argument (RCall)
*
- * 29 8 4 2 0
- * |0.00|xxxx|xxxx|xx|xx| range
- * | | | ` gp regs returned (0..2)
- * | | ` fp regs returned (0..2)
- * | ` gp regs passed (0..8)
- * ` fp regs passed (0..8)
+ * 29 12 8 4 2 0
+ * |0.00|x|xxxx|xxxx|xx|xx| range
+ * | | | | ` gp regs returned (0..2)
+ * | | | ` fp regs returned (0..2)
+ * | | ` gp regs passed (0..8)
+ * | ` fp regs passed (0..8)
+ * ` env pointer passed in t5 (0..1)
*/
bits
@@ -72,12 +75,12 @@ bits
rv64_argregs(Ref r, int p[2])
{
bits b;
- int ngp, nfp;
+ int ngp, nfp, t5;
assert(rtype(r) == RCall);
ngp = (r.val >> 4) & 15;
nfp = (r.val >> 8) & 15;
- b = 0;
+ t5 = (r.val >> 12) & 1;
if (p) {
p[0] = ngp;
p[1] = nfp;
@@ -87,17 +90,52 @@ rv64_argregs(Ref r, int p[2])
b |= BIT(A0+ngp);
while (nfp--)
b |= BIT(FA0+nfp);
- return b;
+ return b | ((bits)t5 << T5);
+}
+
+static int
+fpstruct(Typ *t, int off, Class *c)
+{
+ Field *f;
+ int n;
+
+ if (t->isunion)
+ return -1;
+
+ for (f=*t->fields; f->type != FEnd; f++)
+ if (f->type == FPad)
+ off += f->len;
+ else if (f->type == FTyp) {
+ if (fpstruct(&typ[f->len], off, c) == -1)
+ return -1;
+ }
+ else {
+ n = c->nfp + c->ngp;
+ if (n == 2)
+ return -1;
+ switch (f->type) {
+ default: die("unreachable");
+ case Fb:
+ case Fh:
+ case Fw: c->cls[n] = Kw; c->ngp++; break;
+ case Fl: c->cls[n] = Kl; c->ngp++; break;
+ case Fs: c->cls[n] = Ks; c->nfp++; break;
+ case Fd: c->cls[n] = Kd; c->nfp++; break;
+ }
+ c->off[n] = off;
+ off += f->len;
+ }
+
+ return c->nfp;
}
static void
-typclass(Class *c, Typ *t, int *gp, int *fp)
+typclass(Class *c, Typ *t, int fpabi, int *gp, int *fp)
{
- uint64_t sz;
uint n;
+ int i;
- sz = (t->size + 7) & ~7;
- c->t = t;
+ c->type = t;
c->class = 0;
c->ngp = 0;
c->nfp = 0;
@@ -105,63 +143,63 @@ typclass(Class *c, Typ *t, int *gp, int *fp)
if (t->align > 4)
err("alignments larger than 16 are not supported");
- if (t->isdark || sz > 16 || sz == 0) {
+ if (t->isdark || t->size > 16 || t->size == 0) {
/* large structs are replaced by a
* pointer to some caller-allocated
- * memory */
+ * memory
+ */
c->class |= Cptr;
- c->size = 8;
- return;
+ *c->cls = Kl;
+ *c->off = 0;
+ c->ngp = 1;
}
-
- c->size = sz;
-
- /* TODO: float */
- (void)fp;
-
- for (n=0; n<sz/8; n++, c->ngp++) {
- c->reg[n] = *gp++;
- c->cls[n] = Kl;
+ else if (!fpabi || fpstruct(t, 0, c) <= 0) {
+ for (n=0; 8*n<t->size; n++) {
+ c->cls[n] = Kl;
+ c->off[n] = 8*n;
+ }
+ c->nfp = 0;
+ c->ngp = n;
}
- c->nreg = n;
+ c->nreg = c->nfp + c->ngp;
+ for (i=0; i<c->nreg; i++)
+ if (KBASE(c->cls[i]) == 0)
+ c->reg[i] = *gp++;
+ else
+ c->reg[i] = *fp++;
}
static void
-sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
+sttmps(Ref tmp[], int ntmp, Class *c, Ref mem, Fn *fn)
{
static int st[] = {
[Kw] = Ostorew, [Kl] = Ostorel,
[Ks] = Ostores, [Kd] = Ostored
};
- uint n;
- uint64_t off;
+ int i;
Ref r;
- assert(nreg <= 4);
- off = 0;
- for (n=0; n<nreg; n++) {
- tmp[n] = newtmp("abi", cls[n], fn);
+ assert(ntmp > 0);
+ assert(ntmp <= 2);
+ for (i=0; i<ntmp; i++) {
+ tmp[i] = newtmp("abi", c->cls[i], fn);
r = newtmp("abi", Kl, fn);
- emit(st[cls[n]], 0, R, tmp[n], r);
- emit(Oadd, Kl, r, mem, getcon(off, fn));
- off += KWIDE(cls[n]) ? 8 : 4;
+ emit(st[c->cls[i]], 0, R, tmp[i], r);
+ emit(Oadd, Kl, r, mem, getcon(c->off[i], fn));
}
}
static void
-ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
+ldregs(Class *c, Ref mem, Fn *fn)
{
int i;
- uint64_t off;
Ref r;
- off = 0;
- for (i=0; i<n; i++) {
+ for (i=0; i<c->nreg; i++) {
r = newtmp("abi", Kl, fn);
- emit(Oload, cls[i], TMP(reg[i]), r, R);
- emit(Oadd, Kl, r, mem, getcon(off, fn));
- off += KWIDE(cls[i]) ? 8 : 4;
+ emit(Oload, c->cls[i], TMP(c->reg[i]), r, R);
+ emit(Oadd, Kl, r, mem, getcon(c->off[i], fn));
}
}
@@ -181,13 +219,13 @@ selret(Blk *b, Fn *fn)
b->jmp.type = Jret0;
if (j == Jretc) {
- typclass(&cr, &typ[fn->retty], gpreg, fpreg);
+ typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg);
cty = (cr.nfp << 2) | cr.ngp;
if (cr.class & Cptr) {
assert(rtype(fn->retr) == RTmp);
- blit0(fn->retr, r, cr.t->size, fn);
+ blit0(fn->retr, r, cr.type->size, fn);
} else {
- ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
+ ldregs(&cr, r, fn);
}
} else {
k = j - Jretw;
@@ -204,10 +242,11 @@ selret(Blk *b, Fn *fn)
}
static int
-argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr)
+argsclass(Ins *i0, Ins *i1, Class *carg, int retptr)
{
- int ngp, nfp, *gp, *fp, vararg;
+ int ngp, nfp, *gp, *fp, vararg, envc;
Class *c;
+ Typ *t;
Ins *i;
gp = gpreg;
@@ -215,6 +254,7 @@ argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr)
ngp = 8;
nfp = 8;
vararg = 0;
+ envc = 0;
if (retptr) {
gp++;
ngp--;
@@ -224,8 +264,6 @@ argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr)
case Opar:
case Oarg:
*c->cls = i->cls;
- c->size = 8;
- /* variadic float args are passed in int regs */
if (!vararg && KBASE(i->cls) == 1 && nfp > 0) {
nfp--;
*c->reg = *fp++;
@@ -234,63 +272,62 @@ argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr)
c->class |= Cfpint;
ngp--;
*c->reg = *gp++;
- } else {
+ } else
c->class |= Cstk1;
- }
break;
case Oargv:
- /* subsequent arguments are variadic */
vararg = 1;
break;
case Oparc:
case Oargc:
- typclass(c, &typ[i->arg[0].val], gp, fp);
- if (c->class & Cptr) {
- c->ngp = 1;
- *c->reg = *gp;
- *c->cls = Kl;
- }
- if (c->ngp <= ngp && c->nfp <= nfp) {
+ t = &typ[i->arg[0].val];
+ typclass(c, t, 1, gp, fp);
+ if (c->nfp > 0)
+ if (c->nfp >= nfp || c->ngp >= ngp)
+ typclass(c, t, 0, gp, fp);
+ assert(c->nfp <= nfp);
+ if (c->ngp <= ngp) {
ngp -= c->ngp;
nfp -= c->nfp;
gp += c->ngp;
fp += c->nfp;
- break;
- }
- c->ngp += c->nfp;
- c->nfp = 0;
- if (c->ngp <= ngp) {
- ngp -= c->ngp;
- gp += c->ngp;
- break;
- }
- c->class |= Cstk1;
- if (c->ngp - 1 > ngp)
+ } else if (ngp > 0) {
+ assert(c->ngp == 2);
+ assert(c->class == 0);
c->class |= Cstk2;
+ c->nreg = 1;
+ ngp--;
+ gp++;
+ } else {
+ c->class |= Cstk1;
+ if (c->nreg > 1)
+ c->class |= Cstk2;
+ c->nreg = 0;
+ }
break;
case Opare:
- *env = i->to;
- break;
case Oarge:
- *env = i->arg[0];
+ *c->cls = Kl;
+ *c->reg = T5;
+ envc = 1;
break;
}
}
- return (gp-gpreg) << 4 | (fp-fpreg) << 8;
+ return envc << 12 | (gp-gpreg) << 4 | (fp-fpreg) << 8;
}
static void
-stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
+stkblob(Ref r, Typ *t, Fn *fn, Insl **ilp)
{
Insl *il;
int al;
uint64_t sz;
il = alloc(sizeof *il);
- al = c->t->align - 2; /* NAlign == 3 */
+ al = t->align - 2; /* specific to NAlign == 3 */
if (al < 0)
al = 0;
- sz = c->class & Cptr ? c->t->size : c->size;
+ sz = (t->size + 7) & ~7;
il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
il->link = *ilp;
*ilp = il;
@@ -301,26 +338,24 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
{
Ins *i;
Class *ca, *c, cr;
- int k, cty, envc;
- uint n;
+ int j, k, cty;
uint64_t stk, off;
- Ref r, r1, env, tmp[2];
+ Ref r, r1, tmp[2];
- env = R;
ca = alloc((i1-i0) * sizeof ca[0]);
cr.class = 0;
if (!req(i1->arg[1], R))
- typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
+ typclass(&cr, &typ[i1->arg[1].val], 1, gpreg, fpreg);
- cty = argsclass(i0, i1, ca, &env, cr.class & Cptr);
+ cty = argsclass(i0, i1, ca, cr.class & Cptr);
stk = 0;
for (i=i0, c=ca; i<i1; i++, c++) {
if (i->op == Oargv)
continue;
if (c->class & Cptr) {
i->arg[0] = newtmp("abi", Kl, fn);
- stkblob(i->arg[0], c, fn, ilp);
+ stkblob(i->arg[0], c->type, fn, ilp);
i->op = Oarg;
}
if (c->class & Cstk1)
@@ -328,20 +363,24 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
if (c->class & Cstk2)
stk += 8;
}
+ stk += stk & 15;
if (stk)
emit(Osalloc, Kl, R, getcon(-stk, fn), R);
if (!req(i1->arg[1], R)) {
- stkblob(i1->to, &cr, fn, ilp);
+ stkblob(i1->to, cr.type, fn, ilp);
cty |= (cr.nfp << 2) | cr.ngp;
- if (cr.class & Cptr) {
- cty |= 1;
+ if (cr.class & Cptr)
+ /* spill & rega expect calls to be
+ * followed by copies from regs,
+ * so we emit a dummy
+ */
emit(Ocopy, Kw, R, TMP(A0), R);
- } else {
- sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
- for (n=0; n<cr.nreg; n++) {
- r = TMP(cr.reg[n]);
- emit(Ocopy, cr.cls[n], tmp[n], r, R);
+ else {
+ sttmps(tmp, cr.nreg, &cr, i1->to, fn);
+ for (j=0; j<cr.nreg; j++) {
+ r = TMP(cr.reg[j]);
+ emit(Ocopy, cr.cls[j], tmp[j], r, R);
}
}
} else if (KBASE(i1->cls) == 0) {
@@ -352,9 +391,6 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
cty |= 1 << 2;
}
- envc = !req(R, env);
- if (envc)
- die("todo: env calls");
emit(Ocall, 0, R, i1->arg[0], CALL(cty));
if (cr.class & Cptr)
@@ -366,7 +402,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
if (i->op == Oargv || c->class & Cstk1)
continue;
if (i->op == Oargc) {
- ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
+ ldregs(c, i->arg[1], fn);
} else if (c->class & Cfpint) {
k = KWIDE(*c->cls) ? Kl : Kw;
r = newtmp("abi", k, fn);
@@ -383,7 +419,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
emit(Ocast, k, TMP(*c->reg), i->arg[0], R);
}
if (c->class & Cptr)
- blit0(i->arg[0], i->arg[1], c->t->size, fn);
+ blit0(i->arg[0], i->arg[1], c->type->size, fn);
}
if (!stk)
@@ -391,36 +427,33 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
r = newtmp("abi", Kl, fn);
for (i=i0, c=ca, off=0; i<i1; i++, c++) {
- if (i->op == Oargv || (c->class & Cstk) == 0)
+ if (i->op == Oargv || !(c->class & Cstk))
continue;
- if (i->op != Oargc) {
- /* w arguments are stored sign-extended
- * to 64-bits
- *
- * s arguments can just be stored with
- * Ostores into the first 32-bits in the
- * stack position since the ABI says the
- * upper bits are undefined
- */
+ if (i->op == Oarg) {
r1 = newtmp("abi", Kl, fn);
emit(Ostorew+i->cls, Kw, R, i->arg[0], r1);
if (i->cls == Kw) {
/* TODO: we only need this sign extension
* for subtyped l temporaries passed as w
* arguments (see rv64/isel.c:fixarg)
- *
- * however, we cannot just fix it in isel
- * since by that point we have forgotten
- * the original argument type
*/
curi->op = Ostorel;
curi->arg[0] = newtmp("abi", Kl, fn);
emit(Oextsw, Kl, curi->arg[0], i->arg[0], R);
}
emit(Oadd, Kl, r1, r, getcon(off, fn));
- } else
- blit(r, off, i->arg[1], 0, c->t->size, fn);
- off += c->size;
+ off += 8;
+ }
+ if (i->op == Oargc) {
+ if (c->class & Cstk1) {
+ blit(r, off, i->arg[1], 0, 8, fn);
+ off += 8;
+ }
+ if (c->class & Cstk2) {
+ blit(r, off, i->arg[1], 8, 8, fn);
+ off += 8;
+ }
+ }
}
emit(Osalloc, Kl, r, getcon(stk, fn), R);
}
@@ -431,60 +464,74 @@ selpar(Fn *fn, Ins *i0, Ins *i1)
Class *ca, *c, cr;
Insl *il;
Ins *i;
- int n, s, cty;
- Ref r, env, tmp[16], *t;
+ int j, k, s, cty, nt;
+ Ref r, tmp[17], *t;
- env = R;
ca = alloc((i1-i0) * sizeof ca[0]);
cr.class = 0;
curi = &insb[NIns];
if (fn->retty >= 0) {
- typclass(&cr, &typ[fn->retty], gpreg, fpreg);
+ typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg);
if (cr.class & Cptr) {
fn->retr = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, fn->retr, TMP(A0), R);
}
}
- cty = argsclass(i0, i1, ca, &env, cr.class & Cptr);
+ cty = argsclass(i0, i1, ca, cr.class & Cptr);
fn->reg = rv64_argregs(CALL(cty), 0);
il = 0;
t = tmp;
for (i=i0, c=ca; i<i1; i++, c++) {
- if (i->op != Oparc || (c->class & (Cptr|Cstk)))
- continue;
- sttmps(t, c->cls, c->nreg, i->to, fn);
- stkblob(i->to, c, fn, &il);
- t += c->nreg;
+ if (c->class & Cfpint) {
+ r = i->to;
+ k = *c->cls;
+ *c->cls = KWIDE(k) ? Kl : Kw;
+ i->to = newtmp("abi", k, fn);
+ emit(Ocast, k, r, i->to, R);
+ }
+ if (i->op == Oparc)
+ if (!(c->class & Cptr))
+ if (c->nreg != 0) {
+ nt = c->nreg;
+ if (c->class & Cstk2) {
+ c->cls[1] = Kl;
+ c->off[1] = 8;
+ assert(nt == 1);
+ nt = 2;
+ }
+ sttmps(t, nt, c, i->to, fn);
+ stkblob(i->to, c->type, fn, &il);
+ t += nt;
+ }
}
for (; il; il=il->link)
emiti(il->i);
t = tmp;
- for (i=i0, c=ca, s=2 + 8 * fn->vararg; i<i1; i++, c++) {
- if (i->op == Oparc
- && (c->class & Cptr) == 0) {
- if (c->class & Cstk) {
+ s = 2 + 8*fn->vararg;
+ for (i=i0, c=ca; i<i1; i++, c++)
+ if (i->op == Oparc && !(c->class & Cptr)) {
+ if (c->nreg == 0) {
fn->tmp[i->to.val].slot = -s;
- s += c->size / 8;
- } else {
- for (n=0; n<c->nreg; n++) {
- r = TMP(c->reg[n]);
- emit(Ocopy, c->cls[n], *t++, r, R);
- }
+ s += (c->class & Cstk2) ? 2 : 1;
+ continue;
+ }
+ for (j=0; j<c->nreg; j++) {
+ r = TMP(c->reg[j]);
+ emit(Ocopy, c->cls[j], *t++, r, R);
+ }
+ if (c->class & Cstk2) {
+ emit(Oload, Kl, *t, SLOT(-s), R);
+ t++, s++;
}
} else if (c->class & Cstk1) {
emit(Oload, *c->cls, i->to, SLOT(-s), R);
s++;
- } else {
+ } else
emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
- }
- }
-
- if (!req(R, env))
- die("todo: env calls");
return (Params){
.stk = s,