commit 26c1c30b7d96d2170195970a8cdb3b024ba7421a
parent 15e25a61b38b250c7543437a093a9efe076cce0a
Author: Quentin Carbonneaux <quentin@c9x.me>
Date: Sat, 10 Dec 2022 23:16:21 +0100
new blit instruction
Diffstat:
M | Makefile | | | 2 | +- |
M | alias.c | | | 54 | ++++++++++++++++++++++++++++++++++++++---------------- |
M | all.h | | | 11 | +++++++---- |
M | amd64/sysv.c | | | 13 | +++++++------ |
M | arm64/abi.c | | | 19 | ++++++++++++------- |
M | load.c | | | 41 | +++++++++++++++++++++++++++++++++++------ |
M | main.c | | | 1 | + |
M | mem.c | | | 40 | ++++++++++++++++++++++++++++++++++++---- |
M | ops.h | | | 2 | ++ |
M | parse.c | | | 69 | +++++++++++++++++++++++++++++++++++++++++++++++---------------------- |
M | rv64/abi.c | | | 25 | +++++++++++++++++++------ |
A | simpl.c | | | 82 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | test/load2.ssa | | | 75 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | test/mem1.ssa | | | 35 | +++++++++++++++++++++++++++++++++++ |
M | tools/lexh.c | | | 2 | +- |
M | util.c | | | 30 | ------------------------------ |
16 files changed, 398 insertions(+), 103 deletions(-)
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
COMMOBJ = main.o util.o parse.o abi.o cfg.o mem.o ssa.o alias.o load.o \
- copy.o fold.o live.o spill.o rega.o emit.o
+ copy.o fold.o simpl.o live.o spill.o rega.o emit.o
AMD64OBJ = amd64/targ.o amd64/sysv.o amd64/isel.o amd64/emit.o
ARM64OBJ = arm64/targ.o arm64/abi.o arm64/isel.o arm64/emit.o
RV64OBJ = rv64/targ.o rv64/abi.o rv64/isel.o rv64/emit.o
diff --git a/alias.c b/alias.c
@@ -28,13 +28,17 @@ getalias(Alias *a, Ref r, Fn *fn)
}
int
-alias(Ref p, int sp, Ref q, int sq, int *delta, Fn *fn)
+alias(Ref p, int op, int sp, Ref q, int sq, int *delta, Fn *fn)
{
Alias ap, aq;
int ovlap;
getalias(&ap, p, fn);
getalias(&aq, q, fn);
+ ap.offset += op;
+ /* when delta is meaningful (ovlap == 1),
+ * we do not overflow int because sp and
+ * sq are bounded by 2^28 */
*delta = ap.offset - aq.offset;
ovlap = ap.offset < aq.offset + sq && aq.offset < ap.offset + sp;
@@ -103,13 +107,34 @@ esc(Ref r, Fn *fn)
}
}
+static void
+store(Ref r, int sz, Fn *fn)
+{
+ Alias *a;
+ int64_t off;
+ bits m;
+
+ if (rtype(r) == RTmp) {
+ a = &fn->tmp[r.val].alias;
+ if (a->slot) {
+ assert(astack(a->type));
+ off = a->offset;
+ if (sz >= NBit
+ || (off < 0 || off >= NBit))
+ m = -1;
+ else
+ m = (BIT(sz) - 1) << off;
+ a->slot->u.loc.m |= m;
+ }
+ }
+}
+
void
fillalias(Fn *fn)
{
uint n, m;
- int t;
+ int t, sz;
int64_t x;
- bits w;
Blk *b;
Phi *p;
Ins *i;
@@ -171,26 +196,23 @@ fillalias(Fn *fn)
a->offset += a1.offset;
}
}
- if (req(i->to, R) || a->type == AUnk) {
+ if (req(i->to, R) || a->type == AUnk)
+ if (i->op != Oblit0) {
if (!isload(i->op))
esc(i->arg[0], fn);
if (!isstore(i->op))
if (i->op != Oargc)
esc(i->arg[1], fn);
}
- if (isstore(i->op))
- if (rtype(i->arg[1]) == RTmp) {
- a = &fn->tmp[i->arg[1].val].alias;
- if (a->slot) {
- assert(astack(a->type));
- x = a->offset;
- if (0 <= x && x < NBit) {
- w = BIT(storesz(i)) - 1;
- a->slot->u.loc.m |= w << x;
- } else
- a->slot->u.loc.sz = -1;
- }
+ if (i->op == Oblit0) {
+ ++i;
+ assert(i->op == Oblit1);
+ assert(rtype(i->arg[0]) == RInt);
+ sz = abs(rsval(i->arg[0]));
+ store((i-1)->arg[1], sz, fn);
}
+ if (isstore(i->op))
+ store(i->arg[1], storesz(i), fn);
}
if (b->jmp.type != Jretc)
esc(b->jmp.arg, fn);
diff --git a/all.h b/all.h
@@ -83,7 +83,8 @@ struct Ref {
enum {
RTmp,
RCon,
- RType,
+ RInt,
+ RType, /* last kind to come out of the parser */
RSlot,
RCall,
RMem,
@@ -97,6 +98,7 @@ enum {
#define TYPE(x) (Ref){RType, x}
#define CALL(x) (Ref){RCall, x}
#define MEM(x) (Ref){RMem, x}
+#define INT(x) (Ref){RInt, (x)&0x1fffffff}
static inline int req(Ref a, Ref b)
{
@@ -474,8 +476,6 @@ int symeq(Sym, Sym);
Ref newcon(Con *, Fn *);
Ref getcon(int64_t, Fn *);
int addcon(Con *, Con *);
-void blit(Ref, uint, Ref, uint, uint, Fn *);
-void blit0(Ref, Ref, uint, Fn *);
void salloc(Ref, Ref, Fn *);
void dumpts(BSet *, Tmp *, FILE *);
@@ -528,7 +528,7 @@ void coalesce(Fn *);
/* alias.c */
void fillalias(Fn *);
void getalias(Alias *, Ref, Fn *);
-int alias(Ref, int, Ref, int, int *, Fn *);
+int alias(Ref, int, int, Ref, int, int *, Fn *);
int escapes(Ref, Fn *);
/* load.c */
@@ -549,6 +549,9 @@ void copy(Fn *);
/* fold.c */
void fold(Fn *);
+/* simpl.c */
+void simpl(Fn *);
+
/* live.c */
void liveon(BSet *, Blk *, Blk *);
void filllive(Fn *);
diff --git a/amd64/sysv.c b/amd64/sysv.c
@@ -127,7 +127,8 @@ selret(Blk *b, Fn *fn)
if (aret.inmem) {
assert(rtype(fn->retr) == RTmp);
emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
- blit0(fn->retr, r0, aret.type->size, fn);
+ emit(Oblit1, 0, R, INT(aret.type->size), R);
+ emit(Oblit0, 0, R, r0, fn->retr);
ca = 1;
} else {
ca = retr(reg, &aret);
@@ -410,15 +411,15 @@ selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
for (i=i0, a=ac, off=0; i<i1; i++, a++) {
if (i->op >= Oarge || !a->inmem)
continue;
+ r1 = newtmp("abi", Kl, fn);
if (i->op == Oargc) {
if (a->align == 4)
off += off & 15;
- blit(r, off, i->arg[1], 0, a->type->size, fn);
- } else {
- r1 = newtmp("abi", Kl, fn);
+ emit(Oblit1, 0, R, INT(a->type->size), R);
+ emit(Oblit0, 0, R, i->arg[1], r1);
+ } else
emit(Ostorel, 0, R, i->arg[0], r1);
- emit(Oadd, Kl, r1, r, getcon(off, fn));
- }
+ emit(Oadd, Kl, r1, r, getcon(off, fn));
off += a->size;
}
emit(Osalloc, Kl, r, getcon(stk, fn), R);
diff --git a/arm64/abi.c b/arm64/abi.c
@@ -188,7 +188,8 @@ selret(Blk *b, Fn *fn)
typclass(&cr, &typ[fn->retty], gpreg, fpreg);
if (cr.class & Cptr) {
assert(rtype(fn->retr) == RTmp);
- blit0(fn->retr, r, cr.t->size, fn);
+ emit(Oblit1, 0, R, INT(cr.t->size), R);
+ emit(Oblit0, 0, R, r, fn->retr);
cty = 0;
} else {
ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
@@ -438,8 +439,8 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
if ((c->class & Cstk) == 0)
continue;
off = align(off, c->align);
+ r = newtmp("abi", Kl, fn);
if (i->op == Oarg || isargbh(i->op)) {
- r = newtmp("abi", Kl, fn);
switch (c->size) {
case 1: op = Ostoreb; break;
case 2: op = Ostoreh; break;
@@ -447,18 +448,22 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
case 8: op = store[*c->cls]; break;
}
emit(op, 0, R, i->arg[0], r);
- emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
+ } else {
+ assert(i->op == Oargc);
+ emit(Oblit1, 0, R, INT(c->size), R);
+ emit(Oblit0, 0, R, i->arg[1], r);
}
- if (i->op == Oargc)
- blit(TMP(SP), off, i->arg[1], 0, c->size, fn);
+ emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
off += c->size;
}
if (stk)
emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
for (i=i0, c=ca; i<i1; i++, c++)
- if (c->class & Cptr)
- blit0(i->arg[0], i->arg[1], c->t->size, fn);
+ if (c->class & Cptr) {
+ emit(Oblit1, 0, R, INT(c->t->size), R);
+ emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
+ }
}
static Params
diff --git a/load.c b/load.c
@@ -6,7 +6,6 @@ typedef struct Loc Loc;
typedef struct Slice Slice;
typedef struct Insert Insert;
-
struct Loc {
enum {
LRoot, /* right above the original load */
@@ -19,6 +18,7 @@ struct Loc {
struct Slice {
Ref ref;
+ int off;
short sz;
short cls; /* load class */
};
@@ -194,6 +194,7 @@ killsl(Ref r, Slice sl)
static Ref
def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
{
+ Slice sl1;
Blk *bp;
bits msk1, msks;
int off, cls, cls1, op, sz, ld;
@@ -244,10 +245,33 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
sz = storesz(i);
r1 = i->arg[1];
r = i->arg[0];
+ } else if (i->op == Oblit1) {
+ assert(rtype(i->arg[0]) == RInt);
+ sz = abs(rsval(i->arg[0]));
+ --i;
+ assert(i->op == Oblit0);
+ r1 = i->arg[1];
} else
continue;
- switch (alias(sl.ref, sl.sz, r1, sz, &off, curf)) {
+ switch (alias(sl.ref, sl.off, sl.sz, r1, sz, &off, curf)) {
case MustAlias:
+ if (i->op == Oblit0) {
+ sl1 = sl;
+ sl1.ref = i->arg[0];
+ if (off >= 0) {
+ assert(off < sz);
+ sl1.off = off;
+ sz -= off;
+ off = 0;
+ } else {
+ sl1.off = 0;
+ sl1.sz += off;
+ }
+ if (sz > sl1.sz)
+ sz = sl1.sz;
+ assert(sz <= 8);
+ sl1.sz = sz;
+ }
if (off < 0) {
off = -off;
msk1 = (MASK(sz) << 8*off) & msks;
@@ -257,7 +281,12 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
op = Oshr;
}
if ((msk1 & msk) == 0)
- break;
+ continue;
+ if (i->op == Oblit0) {
+ r = def(sl1, MASK(sz), b, i, il);
+ if (req(r, R))
+ goto Load;
+ }
if (off) {
cls1 = cls;
if (op == Oshr && off + sl.sz > 4)
@@ -279,11 +308,11 @@ def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il)
return r;
case MayAlias:
if (ld)
- break;
+ continue;
else
goto Load;
case NoAlias:
- break;
+ continue;
default:
die("unreachable");
}
@@ -397,7 +426,7 @@ loadopt(Fn *fn)
if (!isload(i->op))
continue;
sz = loadsz(i);
- sl = (Slice){i->arg[0], sz, i->cls};
+ sl = (Slice){i->arg[0], 0, sz, i->cls};
l = (Loc){LRoot, i-b->ins, b};
i->arg[1] = def(sl, MASK(sz), b, i, &l);
}
diff --git a/main.c b/main.c
@@ -78,6 +78,7 @@ func(Fn *fn)
filluse(fn);
fold(fn);
T.abi1(fn);
+ simpl(fn);
fillpreds(fn);
filluse(fn);
T.isel(fn);
diff --git a/mem.c b/mem.c
@@ -195,12 +195,13 @@ coalesce(Fn *fn)
Range r, *br;
Slot *s, *s0, *sl;
Blk *b, **ps, *succ[3];
- Ins *i;
+ Ins *i, **bl;
Use *u;
Tmp *t, *ts;
Ref *arg;
bits x;
- int n, m, nsl, ip, *stk;
+ int64_t off0, off1;
+ int n, m, sz, nsl, nbl, ip, *stk;
uint total, freed, fused;
/* minimize the stack usage
@@ -229,6 +230,8 @@ coalesce(Fn *fn)
for (b=fn->start; b; b=b->link)
b->loop = -1;
loopiter(fn, maxrpo);
+ nbl = 0;
+ bl = vnew(0, sizeof bl[0], PHeap);
br = emalloc(fn->nblk * sizeof br[0]);
ip = INT_MAX - 1;
for (n=fn->nblk-1; n>=0; n--) {
@@ -247,8 +250,11 @@ coalesce(Fn *fn)
}
}
}
+ if (b->jmp.type == Jretc)
+ load(b->jmp.arg, -1, --ip, fn, sl);
for (i=&b->ins[b->nins]; i!=b->ins;) {
- arg = (--i)->arg;
+ --i;
+ arg = i->arg;
if (i->op == Oargc) {
load(arg[1], -1, --ip, fn, sl);
}
@@ -260,6 +266,16 @@ coalesce(Fn *fn)
x = BIT(storesz(i)) - 1;
store(arg[1], x, ip--, fn, sl);
}
+ if (i->op == Oblit0) {
+ assert((i+1)->op == Oblit1);
+ assert(rtype((i+1)->arg[0]) == RInt);
+ sz = abs(rsval((i+1)->arg[0]));
+ x = sz >= NBit ? (bits)-1 : BIT(sz) - 1;
+ store(arg[1], x, ip--, fn, sl);
+ load(arg[0], x, ip, fn, sl);
+ vgrow(&bl, ++nbl);
+ bl[nbl-1] = i;
+ }
}
for (s=sl; s<&sl[nsl]; s++)
if (s->l) {
@@ -321,6 +337,8 @@ coalesce(Fn *fn)
stk[n-1] = i->to.val;
} else {
assert(!isarg(i->op));
+ if (i->op == Oblit0)
+ *(i+1) = (Ins){.op = Onop};
*i = (Ins){.op = Onop};
}
}
@@ -340,7 +358,7 @@ coalesce(Fn *fn)
if (s->s || !s->r.b)
goto Skip;
if (rovlap(r, s->r))
- /* O(n) can be approximated
+ /* O(n); can be approximated
* by 'goto Skip;' if need be
*/
for (m=n; &sl[m]<s; m++)
@@ -387,6 +405,20 @@ coalesce(Fn *fn)
}
}
+ /* fix newly overlapping blits */
+ for (n=0; n<nbl; n++) {
+ i = bl[n];
+ if (i->op == Oblit0)
+ if (slot(&s, &off0, i->arg[0], fn, sl))
+ if (slot(&s0, &off1, i->arg[1], fn, sl))
+ if (s->s == s0->s && off0 < off1) {
+ sz = rsval((i+1)->arg[0]);
+ assert(sz >= 0);
+ (i+1)->arg[0] = INT(-sz);
+ }
+ }
+ vfree(bl);
+
if (debug['M']) {
for (s0=sl; s0<&sl[nsl]; s0++) {
if (s0->s != s0)
diff --git a/ops.h b/ops.h
@@ -129,6 +129,8 @@ O(copy, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1) V(0)
/* Miscellaneous and Architecture-Specific Operations */
O(nop, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1) V(0)
O(addr, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(blit0, T(m,e,e,e, m,e,e,e), 0) X(0, 1, 0) V(0)
+O(blit1, T(w,e,e,e, x,e,e,e), 0) X(0, 1, 0) V(0)
O(swap, T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0) V(0)
O(sign, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0)
O(salloc, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
diff --git a/parse.c b/parse.c
@@ -27,7 +27,7 @@ typedef enum {
PEnd,
} PState;
-enum {
+enum Token {
Txxx = 0,
/* aliases */
@@ -38,6 +38,7 @@ enum {
Talloc1,
Talloc2,
+ Tblit,
Tcall,
Tenv,
Tphi,
@@ -94,6 +95,7 @@ static char *kwmap[Ntok] = {
[Tloadd] = "loadd",
[Talloc1] = "alloc1",
[Talloc2] = "alloc2",
+ [Tblit] = "blit",
[Tcall] = "call",
[Tenv] = "env",
[Tphi] = "phi",
@@ -481,7 +483,7 @@ parserefl(int arg)
expect(Tlparen);
while (peek() != Trparen) {
if (curi - insb >= NIns)
- err("too many instructions (1)");
+ err("too many instructions");
if (!arg && vararg)
err("no parameters allowed after '...'");
switch (peek()) {
@@ -578,6 +580,7 @@ parseline(PState ps)
Phi *phi;
Ref r;
Blk *b;
+ Con *c;
int t, op, i, k, ty;
t = nextnl();
@@ -586,6 +589,7 @@ parseline(PState ps)
switch (t) {
default:
if (isstore(t)) {
+ case Tblit:
case Tcall:
case Ovastart:
/* operations without result */
@@ -657,11 +661,6 @@ parseline(PState ps)
k = parsecls(&ty);
op = next();
DoOp:
- if (op == Tphi) {
- if (ps != PPhi || curb == curf->start)
- err("unexpected phi instruction");
- op = -1;
- }
if (op == Tcall) {
arg[0] = parseref();
parserefl(1);
@@ -686,14 +685,12 @@ DoOp:
err("cannot use vastart in non-variadic function");
if (k >= Ksb)
err("size class must be w, l, s, or d");
- if (op >= NPubOp)
- err("invalid instruction");
i = 0;
if (peek() != Tnl)
for (;;) {
if (i == NPred)
err("too many arguments");
- if (op == -1) {
+ if (op == Tphi) {
expect(Tlbl);
blk[i] = findblk(tokval.str);
}
@@ -709,18 +706,10 @@ DoOp:
next();
}
next();
-Ins:
- if (op != -1) {
- if (curi - insb >= NIns)
- err("too many instructions (2)");
- curi->op = op;
- curi->cls = k;
- curi->to = r;
- curi->arg[0] = arg[0];
- curi->arg[1] = arg[1];
- curi++;
- return PIns;
- } else {
+ switch (op) {
+ case Tphi:
+ if (ps != PPhi || curb == curf->start)
+ err("unexpected phi instruction");
phi = alloc(sizeof *phi);
phi->to = r;
phi->cls = k;
@@ -732,6 +721,39 @@ Ins:
*plink = phi;
plink = &phi->link;
return PPhi;
+ case Tblit:
+ if (curi - insb >= NIns-1)
+ err("too many instructions");
+ memset(curi, 0, 2 * sizeof(Ins));
+ curi->op = Oblit0;
+ curi->arg[0] = arg[0];
+ curi->arg[1] = arg[1];
+ curi++;
+ if (rtype(arg[2]) != RCon)
+ err("blit size must be constant");
+ c = &curf->con[arg[2].val];
+ r = INT(c->bits.i);
+ if (c->type != CBits
+ || rsval(r) < 0
+ || rsval(r) != c->bits.i)
+ err("invalid blit size");
+ curi->op = Oblit1;
+ curi->arg[0] = r;
+ curi++;
+ return PIns;
+ default:
+ if (op >= NPubOp)
+ err("invalid instruction");
+ Ins:
+ if (curi - insb >= NIns)
+ err("too many instructions");
+ curi->op = op;
+ curi->cls = k;
+ curi->to = r;
+ curi->arg[0] = arg[0];
+ curi->arg[1] = arg[1];
+ curi++;
+ return PIns;
}
}
@@ -1241,6 +1263,9 @@ printref(Ref r, Fn *fn, FILE *f)
}
fputc(']', f);
break;
+ case RInt:
+ fprintf(f, "%d", rsval(r));
+ break;
}
}
diff --git a/rv64/abi.c b/rv64/abi.c
@@ -222,7 +222,8 @@ selret(Blk *b, Fn *fn)
typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg);
if (cr.class & Cptr) {
assert(rtype(fn->retr) == RTmp);
- blit0(fn->retr, r, cr.type->size, fn);
+ emit(Oblit1, 0, R, INT(cr.type->size), R);
+ emit(Oblit0, 0, R, r, fn->retr);
cty = 0;
} else {
ldregs(&cr, r, fn);
@@ -341,7 +342,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
Class *ca, *c, cr;
int j, k, cty;
uint64_t stk, off;
- Ref r, r1, tmp[2];
+ Ref r, r1, r2, tmp[2];
ca = alloc((i1-i0) * sizeof ca[0]);
cr.class = 0;
@@ -419,8 +420,10 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
k = KWIDE(*c->cls) ? Kl : Kw;
emit(Ocast, k, TMP(*c->reg), i->arg[0], R);
}
- if (c->class & Cptr)
- blit0(i->arg[0], i->arg[1], c->type->size, fn);
+ if (c->class & Cptr) {
+ emit(Oblit1, 0, R, INT(c->type->size), R);
+ emit(Oblit0, 0, R, i->arg[1], i->arg[0]);
+ }
}
if (!stk)
@@ -450,11 +453,21 @@ selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
}
if (i->op == Oargc) {
if (c->class & Cstk1) {
- blit(r, off, i->arg[1], 0, 8, fn);
+ r1 = newtmp("abi", Kl, fn);
+ r2 = newtmp("abi", Kl, fn);
+ emit(Ostorel, 0, R, r2, r1);
+ emit(Oadd, Kl, r1, r, getcon(off, fn));
+ emit(Oload, Kl, r2, i->arg[1], R);
off += 8;
}
if (c->class & Cstk2) {
- blit(r, off, i->arg[1], 8, 8, fn);
+ r1 = newtmp("abi", Kl, fn);
+ r2 = newtmp("abi", Kl, fn);
+ emit(Ostorel, 0, R, r2, r1);
+ emit(Oadd, Kl, r1, r, getcon(off, fn));
+ r1 = newtmp("abi", Kl, fn);
+ emit(Oload, Kl, r2, r1, R);
+ emit(Oadd, Kl, r1, i->arg[1], getcon(8, fn));
off += 8;
}
}
diff --git a/simpl.c b/simpl.c
@@ -0,0 +1,82 @@
+#include "all.h"
+
+static void
+blit(Ref sd[2], int sz, Fn *fn)
+{
+ struct { int st, ld, cls, size; } *p, tbl[] = {
+ { Ostorel, Oload, Kl, 8 },
+ { Ostorew, Oload, Kw, 4 },
+ { Ostoreh, Oloaduh, Kw, 2 },
+ { Ostoreb, Oloadub, Kw, 1 }
+ };
+ Ref r, r1, ro;
+ int off, fwd, n;
+
+ fwd = sz >= 0;
+ sz = abs(sz);
+ off = fwd ? sz : 0;
+ for (p=tbl; sz; p++)
+ for (n=p->size; sz>=n; sz-=n) {
+ off -= fwd ? n : 0;
+ r = newtmp("blt", Kl, fn);
+ r1 = newtmp("blt", Kl, fn);
+ ro = getcon(off, fn);
+ emit(p->st, 0, R, r, r1);
+ emit(Oadd, Kl, r1, sd[1], ro);
+ r1 = newtmp("blt", Kl, fn);
+ emit(p->ld, p->cls, r, r1, R);
+ emit(Oadd, Kl, r1, sd[0], ro);
+ off += fwd ? 0 : n;
+ }
+}
+
+static void
+ins(Ins **pi, int *new, Blk *b, Fn *fn)
+{
+ ulong ni;
+ Ins *i;
+
+ i = *pi;
+ /* simplify more instructions here;
+ * copy 0 into xor, mul 2^n into shift,
+ * bit rotations, ... */
+ switch (i->op) {
+ case Oblit1:
+ assert(i > b->ins);
+ assert((i-1)->op == Oblit0);
+ if (!*new) {
+ curi = &insb[NIns];
+ ni = &b->ins[b->nins] - (i+1);
+ curi -= ni;
+ icpy(curi, i+1, ni);
+ *new = 1;
+ }
+ blit((i-1)->arg, rsval(i->arg[0]), fn);
+ *pi = i-1;
+ break;
+ default:
+ if (*new)
+ emiti(*i);
+ break;
+ }
+}
+
+void
+simpl(Fn *fn)
+{
+ Blk *b;
+ Ins *i;
+ int new;
+
+ for (b=fn->start; b; b=b->link) {
+ new = 0;
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ --i;
+ ins(&i, &new, b, fn);
+ }
+ if (new) {
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ }
+ }
+}
diff --git a/test/load2.ssa b/test/load2.ssa
@@ -0,0 +1,75 @@
+# blit & load elimination
+
+export
+function $f() {
+@start
+ %x =l alloc4 12
+ %y =l alloc4 12
+
+ %x1 =l add 1, %x
+ %x2 =l add 1, %x1
+ %x3 =l add 1, %x2
+ %x4 =l add 1, %x3
+ %x5 =l add 1, %x4
+ %x6 =l add 1, %x5
+ %x7 =l add 1, %x6
+ %x8 =l add 1, %x7
+ %x9 =l add 1, %x8
+ %xa =l add 1, %x9
+ %xb =l add 1, %xa
+
+ %y1 =l add 1, %y
+ %y4 =l add 4, %y
+
+ storew 287454020, %x4 # 0x11223344
+ storew 1432778632, %y # 0x55667788
+ blit %y, %x5, 1
+ %n =w load %x4
+ call $px(w %n) # 0x11228844
+
+ storew 287454020, %x4 # 0x11223344
+ storew 1432778632, %y # 0x55667788
+ blit %y, %x5, 2
+ %n =w load %x4
+ call $px(w %n) # 0x11778844
+
+ storew 287454020, %x4 # 0x11223344
+ storew 1432778632, %y # 0x55667788
+ blit %y, %x5, 4
+ %n =w load %x4
+ call $px(w %n) # 0x66778844
+
+ storew 287454020, %x4 # 0x11223344
+ storew 1432778632, %y # 0x55667788
+ blit %y, %x2, 4
+ %n =w load %x4
+ call $px(w %n) # 0x11225566
+
+ storew 287454020, %x4 # 0x11223344
+ storew 0, %y
+ storew 1432778632, %y4 # 0x55667788
+ blit %y1, %x2, 7
+ %n =w load %x4
+ call $px(w %n) # 0x66778800
+
+ ret
+}
+
+# >>> driver
+# #include <stdio.h>
+# void px(unsigned n) {
+# printf("0x%08x\n", n);
+# }
+# int main() {
+# extern void f(void);
+# f();
+# }
+# <<<
+
+# >>> output
+# 0x11228844
+# 0x11778844
+# 0x66778844
+# 0x11225566
+# 0x66778800
+# <<<
diff --git a/test/mem1.ssa b/test/mem1.ssa
@@ -0,0 +1,35 @@
+type :i3 = { w 3 }
+
+export
+function :i3 $blit() {
+@start
+ %l0 =l alloc4 12
+ %l1 =l alloc4 12
+
+ storew 287454020, %l0
+ %l04 =l add %l0, 4
+ storew 1432778632, %l04
+ %l08 =l add %l0, 8
+ storew 2578103244, %l08
+
+ # we expect that %l0 and %l1
+ # are coalesced and the blit
+ # goes backwards
+ %l11 =l add %l1, 1
+ blit %l0, %l11, 11
+
+ storeb 221, %l1
+
+ ret %l1
+}
+
+# >>> driver
+# struct i3 { int a, b, c; };
+# extern struct i3 blit();
+# int main() {
+# struct i3 s = blit();
+# return !(s.a == 0x223344dd
+# && s.b == 0x66778811
+# && s.c == 0xaabbcc55);
+# }
+# <<<
diff --git a/tools/lexh.c b/tools/lexh.c
@@ -26,7 +26,7 @@ char *tok[] = {
"vaarg", "vastart", "...", "env",
"call", "phi", "jmp", "jnz", "ret", "hlt", "export",
- "function", "type", "data", "section", "align",
+ "function", "type", "data", "section", "align", "blit",
"l", "w", "sh", "uh", "h", "sb", "ub", "b",
"d", "s", "z", "loadw", "loadl", "loads", "loadd",
"alloc1", "alloc2",
diff --git a/util.c b/util.c
@@ -404,36 +404,6 @@ addcon(Con *c0, Con *c1)
}
void
-blit(Ref rdst, uint doff, Ref rsrc, uint boff, uint sz, Fn *fn)
-{
- struct { int st, ld, cls, size; } *p, tbl[] = {
- { Ostorel, Oload, Kl, 8 },
- { Ostorew, Oload, Kw, 4 },
- { Ostoreh, Oloaduh, Kw, 2 },
- { Ostoreb, Oloadub, Kw, 1 }
- };
- Ref r, r1;
- uint s;
-
- for (p=tbl; sz; p++)
- for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) {
- r = newtmp("blt", Kl, fn);
- r1 = newtmp("blt", Kl, fn);
- emit(p->st, 0, R, r, r1);
- emit(Oadd, Kl, r1, rdst, getcon(doff, fn));
- r1 = newtmp("blt", Kl, fn);
- emit(p->ld, p->cls, r, r1, R);
- emit(Oadd, Kl, r1, rsrc, getcon(boff, fn));
- }
-}
-
-void
-blit0(Ref rdst, Ref rsrc, uint sz, Fn *fn)
-{
- blit(rdst, 0, rsrc, 0, sz, fn);
-}
-
-void
salloc(Ref rt, Ref rs, Fn *fn)
{
Ref r0, r1;