qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

commit f7bfa2e435c78917bd6df0c80e7e488751dac58c
parent bad74e6dce897df9f21cea5bf0a32df298856351
Author: Quentin Carbonneaux <quentin.carbonneaux@yale.edu>
Date:   Mon, 14 Sep 2015 23:36:26 -0400

heavy modification of call handling

The IR generated by calls was very bulky because two
instructions were used for marking the live range of
a clobber.

This patch attempts to store the information of what
registers are use/def/clobber in the call instruction
itself, this leads to more compact code (even more
when we'll have SSE registers).  However, I find that
the amount of extra code needed is not really
easonable.  Fortunately it is not too invasive, thus
if the complexity creeps in, it should be easy to
revert.

Diffstat:
Mlisc/isel.c | 60++++++++++++++++++++++++++++++++++++++++++++++++------------
Mlisc/lisc.h | 13++++++++++---
Mlisc/live.c | 12++++++++++--
Mlisc/main.c | 1+
Mlisc/parse.c | 7+++++--
Mlisc/rega.c | 55+++++++++++++++++++++++++++++++++++++++----------------
Mlisc/spill.c | 16++++++++++++----
7 files changed, 125 insertions(+), 39 deletions(-)

diff --git a/lisc/isel.c b/lisc/isel.c @@ -442,10 +442,51 @@ classify(AInfo *a, Typ *t) } } +int rsave[NRSave] = {RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX}; + +uint64_t calldef(Ins i, int *p) +{ + uint64_t b; + int n; + + n = i.arg[1].val & 15; + b = 1ll << RAX; + if (n == 2) + b |= 1ll << RDX; + if (p) + *p = n; + return b; +} + +uint64_t calluse(Ins i, int *p) +{ + uint64_t b; + int j, n; + + n = (i.arg[1].val >> 4) & 15; + for (j = 0, b = 0; j < n; j++) + b |= 1ll << rsave[j]; + if (p) + *p = n; + return b; +} + +uint64_t callclb(Ins i, int *p) +{ + uint64_t b; + int j, n; + + n = (i.arg[1].val >> 4) & 15; + for (j = n, b = 0; j < NRSave; j++) + b |= 1ll << rsave[j]; + if (p) + *p = n; + return b; +} + static void selcall(Fn *fn, Ins *i0, Ins *i1) { - static int ireg[8] = {RDI, RSI, RDX, RCX, R8, R9, R10, R11}; Ins *i; AInfo *ai, *a; int nint, nsse, ni, ns, n; @@ -497,20 +538,15 @@ selcall(Fn *fn, Ins *i0, Ins *i1) } stk += stk & 15; - if (rtype(i1->arg[1]) == RTyp) + if (!req(i1->arg[1], R)) diag("struct-returning function not implemented"); if (stk) { r = newcon(-(int64_t)stk, fn); emit(OSAlloc, 0, R, r, R); } - for (n=0; n<8; n++) - emit(OCopy, 0, R, TMP(ireg[n]), R); emit(OCopy, i1->wide, i1->to, TMP(RAX), R); - emit(OCall, 0, R, i->arg[0], R); - emit(OCopy, 1, TMP(RAX), R, R); - for (n=6-nint; n<8; n++) - emit(OCopy, 1, TMP(ireg[n]), R, R); + emit(OCall, 0, R, i->arg[0], CALL(1 + ((6-nint) << 4))); for (i=i0, a=ai, ni=0; i<i1; i++, a++) { if (a->inmem) @@ -519,18 +555,18 @@ selcall(Fn *fn, Ins *i0, Ins *i1) if (a->rty[0] == RSse || a->rty[1] == RSse) diag("isel: unsupported float struct"); if (a->size > 8) { - r = TMP(ireg[ni+1]); + r = TMP(rsave[ni+1]); r1 = newtmp(fn); emit(OLoad, 1, r, r1, R); r = newcon(8, fn); emit(OAdd, 1, r1, i->arg[1], r); - r = TMP(ireg[ni]); + r = TMP(rsave[ni]); ni += 2; } else - r = TMP(ireg[ni++]); + r = TMP(rsave[ni++]); emit(OLoad, 1, r, i->arg[1], R); } else { - r = TMP(ireg[ni++]); + r = TMP(rsave[ni++]); emit(OCopy, i->wide, r, i->arg[0], R); } } diff --git a/lisc/lisc.h b/lisc/lisc.h @@ -43,7 +43,8 @@ enum Reg { Tmp0, /* first non-reg temporary */ - NReg = R12 - RAX + 1 + NReg = R12 - RAX + 1, + NRSave = 9, }; enum { @@ -76,7 +77,8 @@ enum { RTmp, RCon, RSlot, - RTyp, + RAlt, + RCallm = 0x1000, NRef = (1<<14) - 1 }; @@ -85,7 +87,8 @@ enum { #define CON(x) (Ref){RCon, x} #define CON_Z CON(0) /* reserved zero constant */ #define SLOT(x) (Ref){RSlot, x} -#define TYP(x) (Ref){RTyp, x} +#define TYP(x) (Ref){RAlt, x} +#define CALL(x) (Ref){RAlt, x|RCallm} static inline int req(Ref a, Ref b) { return a.type == b.type && a.val == b.val; } @@ -274,6 +277,10 @@ void ssafix(Fn *, int); void filllive(Fn *); /* isel.c */ +extern int rsave[NRSave]; +uint64_t calldef(Ins, int *); +uint64_t calluse(Ins, int *); +uint64_t callclb(Ins, int *); int slota(int, int, int *); void isel(Fn *); diff --git a/lisc/live.c b/lisc/live.c @@ -21,7 +21,7 @@ filllive(Fn *f) Blk *b; Ins *i; Phi *p; - int z, n, chg, nlv; + int z, m, n, chg, nlv; uint a; Bits tb; @@ -50,7 +50,15 @@ Again: bset(b->jmp.arg, b, &nlv); b->nlive = nlv; for (i=&b->ins[b->nins]; i!=b->ins;) { - i--; + if ((--i)->op == OCall) { + b->in.t[0] &= ~calldef(*i, &m); + nlv -= m; + if (nlv + NRSave > b->nlive) + b->nlive = nlv + NRSave; + b->in.t[0] |= calluse(*i, &m); + nlv += m; + bset(i->arg[0], b, &nlv); + } if (!req(i->to, R)) { assert(rtype(i->to) == RTmp); nlv -= BGET(b->in, i->to.val); diff --git a/lisc/main.c b/lisc/main.c @@ -63,6 +63,7 @@ main(int ac, char *av[]) Blk *b; fprintf(stderr, "[Testing Liveness]\n"); + isel(fn); fillrpo(fn); filllive(fn); for (b=fn->start; b; b=b->link) { diff --git a/lisc/parse.c b/lisc/parse.c @@ -779,8 +779,11 @@ printref(Ref r, Fn *fn, FILE *f) case RSlot: fprintf(f, "S%d", r.val); break; - case RTyp: - fprintf(f, ":%s", typ[r.val].name); + case RAlt: + if (r.val & RCallm) + fprintf(f, "%x", r.val & (RCallm-1)); + else + fprintf(f, ":%s", typ[r.val].name); break; } } diff --git a/lisc/rega.c b/lisc/rega.c @@ -192,36 +192,50 @@ pmgen() free(status); } +static void +move(int r, Ref to, RMap *m) +{ + int n, t, r1; + + r1 = req(to, R) ? -1 : rfree(m, to.val); + if (BGET(m->b, r) && r1 != r) { + /* r is used and not by to */ + for (n=0; m->r[n] != r; n++) + assert(n+1 < m->n); + t = m->t[n]; + rfree(m, t); + BSET(m->b, r); + ralloc(m, t); + BCLR(m->b, r); + } + t = r1 == -1 ? r : to.val; + radd(m, t, r); +} + static Ins * dopm(Blk *b, Ins *i, RMap *m) { RMap m0; int n, r, r1, t, nins; Ins *i1, *ib, *ip, *ir; + uint64_t def; m0 = *m; i1 = i+1; for (;; i--) { - r = i->arg[0].val; - r1 = req(i->to, R) ? -1 : rfree(m, i->to.val); - if (BGET(m->b, r) && r1 != r) { - /* r is used and not by i->to, */ - for (n=0; m->r[n] != r; n++) - assert(n+1 < m->n); - t = m->t[n]; - rfree(m, t); - BSET(m->b, r); - ralloc(m, t); - BCLR(m->b, r); - } - t = r1 == -1 ? r : i->to.val; - radd(m, t, r); + move(i->arg[0].val, i->to, m); if (i == b->ins || (i-1)->op != OCopy || !isreg((i-1)->arg[0])) break; } assert(m0.n <= m->n); + if (i > b->ins && (i-1)->op == OCall) { + def = calldef(*i, 0); + for (r=0; r<NRSave; r++) + if (!((1ll << rsave[r]) & def)) + move(rsave[r], R, m); + } for (npm=0, n=0; n<m->n; n++) if ((t = m->t[n]) >= Tmp0) { r1 = m->r[n]; @@ -266,6 +280,7 @@ rega(Fn *fn) Phi *p; uint a; Ref src, dst; + uint64_t rs; tmp = fn->tmp; end = alloc(fn->nblk * sizeof end[0]); @@ -303,8 +318,16 @@ rega(Fn *fn) if (rtype(b->jmp.arg) == RTmp) b->jmp.arg = ralloc(&cur, b->jmp.arg.val); for (i=&b->ins[b->nins]; i!=b->ins;) { - i--; - if (i->op == OCopy && isreg(i->arg[0])) { + switch ((--i)->op) { + case OCall: + rs = calldef(*i, 0) | callclb(*i, 0); + for (r=0; r<NReg; r++) + if ((1ll << r) & rs) + rfree(&cur, r); + continue; + case OCopy: + if (!isreg(i->arg[0])) + break; i = dopm(b, i, &cur); continue; } diff --git a/lisc/spill.c b/lisc/spill.c @@ -277,6 +277,7 @@ inregs(Blk *b, Blk *s) /* todo, move to live.c */ static Ins * dopm(Blk *b, Ins *i, Bits *v) { + int j; Ins *i1; /* consecutive moves from @@ -293,7 +294,14 @@ dopm(Blk *b, Ins *i, Bits *v) || !isreg((i-1)->arg[0])) break; } - limit(v, NReg, 0); + if (i > b->ins && (i-1)->op == OCall) { + calldef(*--i, &j); + limit(v, NReg - NRSave + j, 0); + v->t[0] &= ~calldef(*i, 0); + v->t[0] |= calluse(*i, 0); + setloc(&i->arg[0], v, &(Bits){{0}}); + } else + limit(v, NReg, 0); do emit(*--i1); while (i1 != i); @@ -388,21 +396,21 @@ spill(Fn *fn) for (i=&b->ins[b->nins]; i!=b->ins;) { assert(bcnt(&v) <= NReg); i--; - s = 0; if (i->op == OCopy && isreg(i->arg[0])) { i = dopm(b, i, &v); continue; } + s = 0; + w = (Bits){{0}}; if (!req(i->to, R)) { assert(rtype(i->to) == RTmp); t = i->to.val; if (BGET(v, t)) BCLR(v, t); else - limit(&v, NReg-1, &w); + limit(&v, NReg-1, 0); s = tmp[t].spill; } - w = (Bits){{0}}; j = opdesc[i->op].nmem; if (!j && rtype(i->arg[0]) == RTmp) BSET(w, i->arg[0].val);