qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

commit a9e4fa9715bf0f56f546dce72149da09af5349a3
parent e6debbbb02b4b0b118546e05fa88b51428abddca
Author: Quentin Carbonneaux <quentin@c9x.me>
Date:   Thu, 10 Mar 2022 22:49:08 +0100

rv64: plug holes in the abi

Many things got fixed, but the most
notable change is the proper support
of floating point types in aggregates.

Minor fixes:
- selpar() did not deal correctly
  with Cfpint
- typclass() was reading out of
  bounds in the gp/fp arrays
- support for env calls

Diffstat:
Mrv64/abi.c | 345+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
1 file changed, 196 insertions(+), 149 deletions(-)

diff --git a/rv64/abi.c b/rv64/abi.c @@ -1,5 +1,7 @@ #include "all.h" +/* the risc-v lp64d abi */ + typedef struct Class Class; typedef struct Insl Insl; typedef struct Params Params; @@ -14,13 +16,13 @@ enum { struct Class { char class; - uint size; - Typ *t; - uchar nreg; - uchar ngp; - uchar nfp; + Typ *type; int reg[2]; int cls[2]; + int off[2]; + char ngp; /* only valid after typclass() */ + char nfp; /* ditto */ + char nreg; }; struct Insl { @@ -34,17 +36,18 @@ struct Params { int stk; /* stack offset for varargs */ }; -static int gpreg[] = { A0, A1, A2, A3, A4, A5, A6, A7}; -static int fpreg[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7}; +static int gpreg[10] = {A0, A1, A2, A3, A4, A5, A6, A7}; +static int fpreg[10] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7}; /* layout of call's second argument (RCall) * - * 29 8 4 2 0 - * |0.00|xxxx|xxxx|xx|xx| range - * | | | ` gp regs returned (0..2) - * | | ` fp regs returned (0..2) - * | ` gp regs passed (0..8) - * ` fp regs passed (0..8) + * 29 12 8 4 2 0 + * |0.00|x|xxxx|xxxx|xx|xx| range + * | | | | ` gp regs returned (0..2) + * | | | ` fp regs returned (0..2) + * | | ` gp regs passed (0..8) + * | ` fp regs passed (0..8) + * ` env pointer passed in t5 (0..1) */ bits @@ -72,12 +75,12 @@ bits rv64_argregs(Ref r, int p[2]) { bits b; - int ngp, nfp; + int ngp, nfp, t5; assert(rtype(r) == RCall); ngp = (r.val >> 4) & 15; nfp = (r.val >> 8) & 15; - b = 0; + t5 = (r.val >> 12) & 1; if (p) { p[0] = ngp; p[1] = nfp; @@ -87,17 +90,52 @@ rv64_argregs(Ref r, int p[2]) b |= BIT(A0+ngp); while (nfp--) b |= BIT(FA0+nfp); - return b; + return b | ((bits)t5 << T5); +} + +static int +fpstruct(Typ *t, int off, Class *c) +{ + Field *f; + int n; + + if (t->isunion) + return -1; + + for (f=*t->fields; f->type != FEnd; f++) + if (f->type == FPad) + off += f->len; + else if (f->type == FTyp) { + if (fpstruct(&typ[f->len], off, c) == -1) + return -1; + } + else { + n = c->nfp + c->ngp; + if (n == 2) + return -1; + switch (f->type) { + default: die("unreachable"); + case Fb: + case Fh: + case Fw: c->cls[n] = Kw; c->ngp++; break; + case Fl: c->cls[n] = Kl; c->ngp++; break; + case Fs: c->cls[n] = Ks; c->nfp++; break; + case Fd: c->cls[n] = Kd; c->nfp++; break; + } + c->off[n] = off; + off += f->len; + } + + return c->nfp; } static void -typclass(Class *c, Typ *t, int *gp, int *fp) +typclass(Class *c, Typ *t, int fpabi, int *gp, int *fp) { - uint64_t sz; uint n; + int i; - sz = (t->size + 7) & ~7; - c->t = t; + c->type = t; c->class = 0; c->ngp = 0; c->nfp = 0; @@ -105,63 +143,63 @@ typclass(Class *c, Typ *t, int *gp, int *fp) if (t->align > 4) err("alignments larger than 16 are not supported"); - if (t->isdark || sz > 16 || sz == 0) { + if (t->isdark || t->size > 16 || t->size == 0) { /* large structs are replaced by a * pointer to some caller-allocated - * memory */ + * memory + */ c->class |= Cptr; - c->size = 8; - return; + *c->cls = Kl; + *c->off = 0; + c->ngp = 1; } - - c->size = sz; - - /* TODO: float */ - (void)fp; - - for (n=0; n<sz/8; n++, c->ngp++) { - c->reg[n] = *gp++; - c->cls[n] = Kl; + else if (!fpabi || fpstruct(t, 0, c) <= 0) { + for (n=0; 8*n<t->size; n++) { + c->cls[n] = Kl; + c->off[n] = 8*n; + } + c->nfp = 0; + c->ngp = n; } - c->nreg = n; + c->nreg = c->nfp + c->ngp; + for (i=0; i<c->nreg; i++) + if (KBASE(c->cls[i]) == 0) + c->reg[i] = *gp++; + else + c->reg[i] = *fp++; } static void -sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn) +sttmps(Ref tmp[], int ntmp, Class *c, Ref mem, Fn *fn) { static int st[] = { [Kw] = Ostorew, [Kl] = Ostorel, [Ks] = Ostores, [Kd] = Ostored }; - uint n; - uint64_t off; + int i; Ref r; - assert(nreg <= 4); - off = 0; - for (n=0; n<nreg; n++) { - tmp[n] = newtmp("abi", cls[n], fn); + assert(ntmp > 0); + assert(ntmp <= 2); + for (i=0; i<ntmp; i++) { + tmp[i] = newtmp("abi", c->cls[i], fn); r = newtmp("abi", Kl, fn); - emit(st[cls[n]], 0, R, tmp[n], r); - emit(Oadd, Kl, r, mem, getcon(off, fn)); - off += KWIDE(cls[n]) ? 8 : 4; + emit(st[c->cls[i]], 0, R, tmp[i], r); + emit(Oadd, Kl, r, mem, getcon(c->off[i], fn)); } } static void -ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn) +ldregs(Class *c, Ref mem, Fn *fn) { int i; - uint64_t off; Ref r; - off = 0; - for (i=0; i<n; i++) { + for (i=0; i<c->nreg; i++) { r = newtmp("abi", Kl, fn); - emit(Oload, cls[i], TMP(reg[i]), r, R); - emit(Oadd, Kl, r, mem, getcon(off, fn)); - off += KWIDE(cls[i]) ? 8 : 4; + emit(Oload, c->cls[i], TMP(c->reg[i]), r, R); + emit(Oadd, Kl, r, mem, getcon(c->off[i], fn)); } } @@ -181,13 +219,13 @@ selret(Blk *b, Fn *fn) b->jmp.type = Jret0; if (j == Jretc) { - typclass(&cr, &typ[fn->retty], gpreg, fpreg); + typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg); cty = (cr.nfp << 2) | cr.ngp; if (cr.class & Cptr) { assert(rtype(fn->retr) == RTmp); - blit0(fn->retr, r, cr.t->size, fn); + blit0(fn->retr, r, cr.type->size, fn); } else { - ldregs(cr.reg, cr.cls, cr.nreg, r, fn); + ldregs(&cr, r, fn); } } else { k = j - Jretw; @@ -204,10 +242,11 @@ selret(Blk *b, Fn *fn) } static int -argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr) +argsclass(Ins *i0, Ins *i1, Class *carg, int retptr) { - int ngp, nfp, *gp, *fp, vararg; + int ngp, nfp, *gp, *fp, vararg, envc; Class *c; + Typ *t; Ins *i; gp = gpreg; @@ -215,6 +254,7 @@ argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr) ngp = 8; nfp = 8; vararg = 0; + envc = 0; if (retptr) { gp++; ngp--; @@ -224,8 +264,6 @@ argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr) case Opar: case Oarg: *c->cls = i->cls; - c->size = 8; - /* variadic float args are passed in int regs */ if (!vararg && KBASE(i->cls) == 1 && nfp > 0) { nfp--; *c->reg = *fp++; @@ -234,63 +272,62 @@ argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr) c->class |= Cfpint; ngp--; *c->reg = *gp++; - } else { + } else c->class |= Cstk1; - } break; case Oargv: - /* subsequent arguments are variadic */ vararg = 1; break; case Oparc: case Oargc: - typclass(c, &typ[i->arg[0].val], gp, fp); - if (c->class & Cptr) { - c->ngp = 1; - *c->reg = *gp; - *c->cls = Kl; - } - if (c->ngp <= ngp && c->nfp <= nfp) { + t = &typ[i->arg[0].val]; + typclass(c, t, 1, gp, fp); + if (c->nfp > 0) + if (c->nfp >= nfp || c->ngp >= ngp) + typclass(c, t, 0, gp, fp); + assert(c->nfp <= nfp); + if (c->ngp <= ngp) { ngp -= c->ngp; nfp -= c->nfp; gp += c->ngp; fp += c->nfp; - break; - } - c->ngp += c->nfp; - c->nfp = 0; - if (c->ngp <= ngp) { - ngp -= c->ngp; - gp += c->ngp; - break; - } - c->class |= Cstk1; - if (c->ngp - 1 > ngp) + } else if (ngp > 0) { + assert(c->ngp == 2); + assert(c->class == 0); c->class |= Cstk2; + c->nreg = 1; + ngp--; + gp++; + } else { + c->class |= Cstk1; + if (c->nreg > 1) + c->class |= Cstk2; + c->nreg = 0; + } break; case Opare: - *env = i->to; - break; case Oarge: - *env = i->arg[0]; + *c->cls = Kl; + *c->reg = T5; + envc = 1; break; } } - return (gp-gpreg) << 4 | (fp-fpreg) << 8; + return envc << 12 | (gp-gpreg) << 4 | (fp-fpreg) << 8; } static void -stkblob(Ref r, Class *c, Fn *fn, Insl **ilp) +stkblob(Ref r, Typ *t, Fn *fn, Insl **ilp) { Insl *il; int al; uint64_t sz; il = alloc(sizeof *il); - al = c->t->align - 2; /* NAlign == 3 */ + al = t->align - 2; /* specific to NAlign == 3 */ if (al < 0) al = 0; - sz = c->class & Cptr ? c->t->size : c->size; + sz = (t->size + 7) & ~7; il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}}; il->link = *ilp; *ilp = il; @@ -301,26 +338,24 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) { Ins *i; Class *ca, *c, cr; - int k, cty, envc; - uint n; + int j, k, cty; uint64_t stk, off; - Ref r, r1, env, tmp[2]; + Ref r, r1, tmp[2]; - env = R; ca = alloc((i1-i0) * sizeof ca[0]); cr.class = 0; if (!req(i1->arg[1], R)) - typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg); + typclass(&cr, &typ[i1->arg[1].val], 1, gpreg, fpreg); - cty = argsclass(i0, i1, ca, &env, cr.class & Cptr); + cty = argsclass(i0, i1, ca, cr.class & Cptr); stk = 0; for (i=i0, c=ca; i<i1; i++, c++) { if (i->op == Oargv) continue; if (c->class & Cptr) { i->arg[0] = newtmp("abi", Kl, fn); - stkblob(i->arg[0], c, fn, ilp); + stkblob(i->arg[0], c->type, fn, ilp); i->op = Oarg; } if (c->class & Cstk1) @@ -328,20 +363,24 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) if (c->class & Cstk2) stk += 8; } + stk += stk & 15; if (stk) emit(Osalloc, Kl, R, getcon(-stk, fn), R); if (!req(i1->arg[1], R)) { - stkblob(i1->to, &cr, fn, ilp); + stkblob(i1->to, cr.type, fn, ilp); cty |= (cr.nfp << 2) | cr.ngp; - if (cr.class & Cptr) { - cty |= 1; + if (cr.class & Cptr) + /* spill & rega expect calls to be + * followed by copies from regs, + * so we emit a dummy + */ emit(Ocopy, Kw, R, TMP(A0), R); - } else { - sttmps(tmp, cr.cls, cr.nreg, i1->to, fn); - for (n=0; n<cr.nreg; n++) { - r = TMP(cr.reg[n]); - emit(Ocopy, cr.cls[n], tmp[n], r, R); + else { + sttmps(tmp, cr.nreg, &cr, i1->to, fn); + for (j=0; j<cr.nreg; j++) { + r = TMP(cr.reg[j]); + emit(Ocopy, cr.cls[j], tmp[j], r, R); } } } else if (KBASE(i1->cls) == 0) { @@ -352,9 +391,6 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) cty |= 1 << 2; } - envc = !req(R, env); - if (envc) - die("todo: env calls"); emit(Ocall, 0, R, i1->arg[0], CALL(cty)); if (cr.class & Cptr) @@ -366,7 +402,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) if (i->op == Oargv || c->class & Cstk1) continue; if (i->op == Oargc) { - ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn); + ldregs(c, i->arg[1], fn); } else if (c->class & Cfpint) { k = KWIDE(*c->cls) ? Kl : Kw; r = newtmp("abi", k, fn); @@ -383,7 +419,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) emit(Ocast, k, TMP(*c->reg), i->arg[0], R); } if (c->class & Cptr) - blit0(i->arg[0], i->arg[1], c->t->size, fn); + blit0(i->arg[0], i->arg[1], c->type->size, fn); } if (!stk) @@ -391,36 +427,33 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) r = newtmp("abi", Kl, fn); for (i=i0, c=ca, off=0; i<i1; i++, c++) { - if (i->op == Oargv || (c->class & Cstk) == 0) + if (i->op == Oargv || !(c->class & Cstk)) continue; - if (i->op != Oargc) { - /* w arguments are stored sign-extended - * to 64-bits - * - * s arguments can just be stored with - * Ostores into the first 32-bits in the - * stack position since the ABI says the - * upper bits are undefined - */ + if (i->op == Oarg) { r1 = newtmp("abi", Kl, fn); emit(Ostorew+i->cls, Kw, R, i->arg[0], r1); if (i->cls == Kw) { /* TODO: we only need this sign extension * for subtyped l temporaries passed as w * arguments (see rv64/isel.c:fixarg) - * - * however, we cannot just fix it in isel - * since by that point we have forgotten - * the original argument type */ curi->op = Ostorel; curi->arg[0] = newtmp("abi", Kl, fn); emit(Oextsw, Kl, curi->arg[0], i->arg[0], R); } emit(Oadd, Kl, r1, r, getcon(off, fn)); - } else - blit(r, off, i->arg[1], 0, c->t->size, fn); - off += c->size; + off += 8; + } + if (i->op == Oargc) { + if (c->class & Cstk1) { + blit(r, off, i->arg[1], 0, 8, fn); + off += 8; + } + if (c->class & Cstk2) { + blit(r, off, i->arg[1], 8, 8, fn); + off += 8; + } + } } emit(Osalloc, Kl, r, getcon(stk, fn), R); } @@ -431,60 +464,74 @@ selpar(Fn *fn, Ins *i0, Ins *i1) Class *ca, *c, cr; Insl *il; Ins *i; - int n, s, cty; - Ref r, env, tmp[16], *t; + int j, k, s, cty, nt; + Ref r, tmp[17], *t; - env = R; ca = alloc((i1-i0) * sizeof ca[0]); cr.class = 0; curi = &insb[NIns]; if (fn->retty >= 0) { - typclass(&cr, &typ[fn->retty], gpreg, fpreg); + typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg); if (cr.class & Cptr) { fn->retr = newtmp("abi", Kl, fn); emit(Ocopy, Kl, fn->retr, TMP(A0), R); } } - cty = argsclass(i0, i1, ca, &env, cr.class & Cptr); + cty = argsclass(i0, i1, ca, cr.class & Cptr); fn->reg = rv64_argregs(CALL(cty), 0); il = 0; t = tmp; for (i=i0, c=ca; i<i1; i++, c++) { - if (i->op != Oparc || (c->class & (Cptr|Cstk))) - continue; - sttmps(t, c->cls, c->nreg, i->to, fn); - stkblob(i->to, c, fn, &il); - t += c->nreg; + if (c->class & Cfpint) { + r = i->to; + k = *c->cls; + *c->cls = KWIDE(k) ? Kl : Kw; + i->to = newtmp("abi", k, fn); + emit(Ocast, k, r, i->to, R); + } + if (i->op == Oparc) + if (!(c->class & Cptr)) + if (c->nreg != 0) { + nt = c->nreg; + if (c->class & Cstk2) { + c->cls[1] = Kl; + c->off[1] = 8; + assert(nt == 1); + nt = 2; + } + sttmps(t, nt, c, i->to, fn); + stkblob(i->to, c->type, fn, &il); + t += nt; + } } for (; il; il=il->link) emiti(il->i); t = tmp; - for (i=i0, c=ca, s=2 + 8 * fn->vararg; i<i1; i++, c++) { - if (i->op == Oparc - && (c->class & Cptr) == 0) { - if (c->class & Cstk) { + s = 2 + 8*fn->vararg; + for (i=i0, c=ca; i<i1; i++, c++) + if (i->op == Oparc && !(c->class & Cptr)) { + if (c->nreg == 0) { fn->tmp[i->to.val].slot = -s; - s += c->size / 8; - } else { - for (n=0; n<c->nreg; n++) { - r = TMP(c->reg[n]); - emit(Ocopy, c->cls[n], *t++, r, R); - } + s += (c->class & Cstk2) ? 2 : 1; + continue; + } + for (j=0; j<c->nreg; j++) { + r = TMP(c->reg[j]); + emit(Ocopy, c->cls[j], *t++, r, R); + } + if (c->class & Cstk2) { + emit(Oload, Kl, *t, SLOT(-s), R); + t++, s++; } } else if (c->class & Cstk1) { emit(Oload, *c->cls, i->to, SLOT(-s), R); s++; - } else { + } else emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R); - } - } - - if (!req(R, env)) - die("todo: env calls"); return (Params){ .stk = s,