qbe

Internal scc patchset buffer for QBE
Log | Files | Refs | README | LICENSE

commit 62e238a6ef151d56b79e1f076a57463f2e1fb020
parent 97b58def96d47d937d86849380d8316ddb16bed8
Author: Quentin Carbonneaux <quentin.carbonneaux@yale.edu>
Date:   Fri, 25 Mar 2016 14:02:43 -0400

great renaming campain!

Diffstat:
Dlisc/.gitignore | 5-----
Dlisc/Makefile | 17-----------------
Dlisc/copy.c | 159-------------------------------------------------------------------------------
Dlisc/emit.c | 666-------------------------------------------------------------------------------
Dlisc/isel.c | 1135-------------------------------------------------------------------------------
Dlisc/live.c | 174-------------------------------------------------------------------------------
Dlisc/main.c | 117-------------------------------------------------------------------------------
Dlisc/mem.c | 81-------------------------------------------------------------------------------
Dlisc/parse.c | 1081-------------------------------------------------------------------------------
Dlisc/rega.c | 597-------------------------------------------------------------------------------
Dlisc/spill.c | 507-------------------------------------------------------------------------------
Dlisc/ssa.c | 516-------------------------------------------------------------------------------
Dlisc/test/go.sh | 116-------------------------------------------------------------------------------
Dlisc/tools/abitest.sh | 104-------------------------------------------------------------------------------
Dlisc/tools/regress.sh | 17-----------------
Dlisc/util.c | 329-------------------------------------------------------------------------------
Mminic/mcc | 2+-
Asrc/.gitignore | 5+++++
Rlisc/.tag -> src/.tag | 0
Asrc/Makefile | 17+++++++++++++++++
Rlisc/lisc.h -> src/all.h | 0
Asrc/copy.c | 159+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/emit.c | 666+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/isel.c | 1135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/live.c | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/mem.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/parse.c | 1081+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/rega.c | 598+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/spill.c | 507+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/ssa.c | 516+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rlisc/test/_alt.ssa -> src/test/_alt.ssa | 0
Rlisc/test/_dragon.ssa -> src/test/_dragon.ssa | 0
Rlisc/test/_fix1.ssa -> src/test/_fix1.ssa | 0
Rlisc/test/_fix2.ssa -> src/test/_fix2.ssa | 0
Rlisc/test/_fix3.ssa -> src/test/_fix3.ssa | 0
Rlisc/test/_fix4.ssa -> src/test/_fix4.ssa | 0
Rlisc/test/_live.ssa -> src/test/_live.ssa | 0
Rlisc/test/_rpo.ssa -> src/test/_rpo.ssa | 0
Rlisc/test/_spill1.ssa -> src/test/_spill1.ssa | 0
Rlisc/test/_spill2.ssa -> src/test/_spill2.ssa | 0
Rlisc/test/_spill3.ssa -> src/test/_spill3.ssa | 0
Rlisc/test/abi1.ssa -> src/test/abi1.ssa | 0
Rlisc/test/abi2.ssa -> src/test/abi2.ssa | 0
Rlisc/test/abi3.ssa -> src/test/abi3.ssa | 0
Rlisc/test/abi4.ssa -> src/test/abi4.ssa | 0
Rlisc/test/abi5.ssa -> src/test/abi5.ssa | 0
Rlisc/test/align.ssa -> src/test/align.ssa | 0
Rlisc/test/collatz.ssa -> src/test/collatz.ssa | 0
Rlisc/test/cprime.ssa -> src/test/cprime.ssa | 0
Rlisc/test/cup.ssa -> src/test/cup.ssa | 0
Rlisc/test/dark.ssa -> src/test/dark.ssa | 0
Rlisc/test/double.ssa -> src/test/double.ssa | 0
Rlisc/test/echo.ssa -> src/test/echo.ssa | 0
Rlisc/test/eucl.ssa -> src/test/eucl.ssa | 0
Rlisc/test/euclc.ssa -> src/test/euclc.ssa | 0
Rlisc/test/fpcnv.ssa -> src/test/fpcnv.ssa | 0
Asrc/test/go.sh | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rlisc/test/loop.ssa -> src/test/loop.ssa | 0
Rlisc/test/mandel.ssa -> src/test/mandel.ssa | 0
Rlisc/test/max.ssa -> src/test/max.ssa | 0
Rlisc/test/prime.ssa -> src/test/prime.ssa | 0
Rlisc/test/puts10.ssa -> src/test/puts10.ssa | 0
Rlisc/test/sum.ssa -> src/test/sum.ssa | 0
Rlisc/tools/abi.ml -> src/tools/abi.ml | 0
Asrc/tools/abitest.sh | 104+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rlisc/tools/fptox.c -> src/tools/fptox.c | 0
Rlisc/tools/pmov.c -> src/tools/pmov.c | 0
Asrc/tools/regress.sh | 17+++++++++++++++++
Asrc/util.c | 329+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
70 files changed, 5623 insertions(+), 5622 deletions(-)

diff --git a/lisc/.gitignore b/lisc/.gitignore @@ -1,5 +0,0 @@ -lisc -doc -.comfile -*.o -*.out diff --git a/lisc/Makefile b/lisc/Makefile @@ -1,17 +0,0 @@ -BIN = lisc -OBJ = main.o util.o parse.o mem.o ssa.o copy.o live.o isel.o spill.o rega.o emit.o - -CFLAGS = -Wall -Wextra -std=c99 -g -pedantic - -$(BIN): $(OBJ) - $(CC) $(LDFLAGS) $(OBJ) -o $@ - -$(OBJ): lisc.h - -.PHONY: clean check syndoc -clean: - rm -f $(BIN) $(OBJ) -check: $(BIN) - test/go.sh all -syndoc: - unison -auto doc ssh://qcar@h/data/d/ssa-doc diff --git a/lisc/copy.c b/lisc/copy.c @@ -1,159 +0,0 @@ -#include "lisc.h" - -typedef struct RList RList; -struct RList { - int t; - RList *l; -}; - -static Ref -copyof(Ref r, Ref *cp) -{ - if (rtype(r) == RTmp) - return cp[r.val]; - else - return r; -} - -static void -update(Ref r, Ref rcp, Ref *cp, RList **w) -{ - RList *l; - - if (!req(cp[r.val], rcp)) { - cp[r.val] = rcp; - l = emalloc(sizeof *l); - l->t = r.val; - l->l = *w; - *w = l; - } -} - -static void -visitphi(Phi *p, Ref *cp, RList **w) -{ - uint a; - Ref r, r1; - - r = R; - for (a=0; a<p->narg; a++) { - r1 = copyof(p->arg[a], cp); - if (req(r1, R)) - continue; - if (req(r, R) || req(r, r1)) - r = r1; - else { - r = p->to; - break; - } - } - assert(!req(r, R)); - update(p->to, r, cp, w); -} - -static void -visitins(Ins *i, Ref *cp, RList **w) -{ - Ref r; - - if (i->op == OCopy) { - r = copyof(i->arg[0], cp); - update(i->to, r, cp, w); - } else if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - update(i->to, i->to, cp, w); - } -} - -void -copy(Fn *fn) -{ - Blk *b; - Ref *cp, r; - RList *w, *w1; - Use *u, *u1; - Ins *i; - Phi *p, **pp; - uint a; - int t; - - w = 0; - cp = emalloc(fn->ntmp * sizeof cp[0]); - for (b=fn->start; b; b=b->link) { - for (p=b->phi; p; p=p->link) - visitphi(p, cp, &w); - for (i=b->ins; i-b->ins < b->nins; i++) - visitins(i, cp, &w); - } - while ((w1=w)) { - t = w->t; - w = w->l; - free(w1); - u = fn->tmp[t].use; - u1 = u + fn->tmp[t].nuse; - for (; u<u1; u++) - switch (u->type) { - default: - diag("copy: invalid use"); - case UPhi: - visitphi(u->u.phi, cp, &w); - break; - case UIns: - visitins(u->u.ins, cp, &w); - break; - case UJmp: - break; - } - } - for (b=fn->start; b; b=b->link) { - for (pp=&b->phi; (p=*pp);) { - r = cp[p->to.val]; - if (!req(r, p->to)) { - *pp = p->link; - continue; - } - for (a=0; a<p->narg; a++) - if (rtype(p->arg[a]) == RTmp) { - r = cp[p->arg[a].val]; - assert(!req(r, R)); - p->arg[a] = r; - } - pp=&p->link; - } - for (i=b->ins; i-b->ins < b->nins; i++) { - r = cp[i->to.val]; - if (!req(r, i->to)) { - *i = (Ins){.op = ONop}; - continue; - } - for (a=0; a<2; a++) - if (rtype(i->arg[a]) == RTmp) { - r = cp[i->arg[a].val]; - assert(!req(r, R)); - i->arg[a] = r; - } - } - if (rtype(b->jmp.arg) == RTmp) { - r = cp[b->jmp.arg.val]; - assert(!req(r, R)); - b->jmp.arg = r; - } - } - if (debug['C']) { - fprintf(stderr, "\n> Copy information:"); - for (t=Tmp0; t<fn->ntmp; t++) { - if (req(cp[t], R)) { - fprintf(stderr, "\n%10s not seen!", - fn->tmp[t].name); - } - else if (!req(cp[t], TMP(t))) { - fprintf(stderr, "\n%10s copy of ", - fn->tmp[t].name); - printref(cp[t], fn, stderr); - } - } - fprintf(stderr, "\n\n> After copy elimination:\n"); - printfn(fn, stderr); - } - free(cp); -} diff --git a/lisc/emit.c b/lisc/emit.c @@ -1,666 +0,0 @@ -#include "lisc.h" - -enum { - SLong = 0, - SWord = 1, - SShort = 2, - SByte = 3, - - Ki = -1, /* matches Kw and Kl */ - Ka = -2, /* matches all classes */ -}; - -/* Instruction format strings: - * - * if the format string starts with -, the instruction - * is assumed to be 3-address and is put in 2-address - * mode using an extra mov if necessary - * - * if the format string starts with +, the same as the - * above applies, but commutativity is also assumed - * - * %k is used to set the class of the instruction, - * it'll expand to "l", "q", "ss", "sd", depending - * on the instruction class - * %0 designates the first argument - * %1 designates the second argument - * %= designates the result - * - * if %k is not used, a prefix to 0, 1, or = must be - * added, it can be: - * M - memory reference - * L - long (64 bits) - * W - word (32 bits) - * H - short (16 bits) - * B - byte (8 bits) - * S - single precision float - * D - double precision float - */ -static struct { - short op; - short cls; - char *asm; -} omap[] = { - { OAdd, Ka, "+add%k %1, %=" }, - { OSub, Ka, "-sub%k %1, %=" }, - { OAnd, Ki, "+and%k %1, %=" }, - { OOr, Ki, "+or%k %1, %=" }, - { OXor, Ki, "+xor%k %1, %=" }, - { OSar, Ki, "-sar%k %B1, %=" }, - { OShr, Ki, "-shr%k %B1, %=" }, - { OShl, Ki, "-shl%k %B1, %=" }, - { OMul, Ki, "+imul%k %1, %=" }, - { OMul, Ks, "+mulss %1, %=" }, /* fixme */ - { OMul, Kd, "+mulsd %1, %=" }, - { ODiv, Ka, "-div%k %1, %=" }, - { OStorel, Ka, "movq %L0, %M1" }, - { OStorew, Ka, "movl %W0, %M1" }, - { OStoreh, Ka, "movw %H0, %M1" }, - { OStoreb, Ka, "movb %B0, %M1" }, - { OStores, Ka, "movss %S0, %M1" }, - { OStored, Ka, "movsd %D0, %M1" }, - { OLoad, Ka, "mov%k %M0, %=" }, - { OLoadsw, Kl, "movslq %M0, %L=" }, - { OLoadsw, Kw, "movl %M0, %W=" }, - { OLoaduw, Ki, "movl %M0, %W=" }, - { OLoadsh, Ki, "movsw%k %M0, %=" }, - { OLoaduh, Ki, "movzw%k %M0, %=" }, - { OLoadsb, Ki, "movsb%k %M0, %=" }, - { OLoadub, Ki, "movzb%k %M0, %=" }, - { OExtsw, Kl, "movslq %W0, %L=" }, - { OExtuw, Kl, "movl %W0, %W=" }, - { OExtsh, Ki, "movsw%k %H0, %=" }, - { OExtuh, Ki, "movzw%k %H0, %=" }, - { OExtsb, Ki, "movsb%k %B0, %=" }, - { OExtub, Ki, "movzb%k %B0, %=" }, - - { OExts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */ - { OTruncd, Ks, "cvttsd2ss %0, %=" }, - { OFtosi, Kw, "cvttss2si %0, %=" }, - { OFtosi, Kl, "cvttsd2si %0, %=" }, - { OSitof, Ks, "cvtsi2ss %W0, %=" }, - { OSitof, Kd, "cvtsi2sd %L0, %=" }, - { OCast, Ki, "movq %D0, %L=" }, - { OCast, Ka, "movq %L0, %D=" }, - - { OAddr, Ki, "lea%k %M0, %=" }, - { OSwap, Ki, "xchg%k %0, %1" }, - { OSign, Kl, "cqto" }, - { OSign, Kw, "cltd" }, - { OXDiv, Ki, "div%k %0" }, - { OXIDiv, Ki, "idiv%k %0" }, - { OXCmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */ - { OXCmp, Kd, "comisd %D0, %D1" }, - { OXCmp, Ki, "cmp%k %0, %1" }, - { OXTest, Ki, "test%k %0, %1" }, - { OXSet+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" }, - { OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" }, - { OXSet+ICXp, Ki, "setp %B=\n\tmovsb%k %B=, %=" }, - { NOp, 0, 0 } -}; - -static char *rname[][4] = { - [RAX] = {"rax", "eax", "ax", "al"}, - [RBX] = {"rbx", "ebx", "bx", "bl"}, - [RCX] = {"rcx", "ecx", "cx", "cl"}, - [RDX] = {"rdx", "edx", "dx", "dl"}, - [RSI] = {"rsi", "esi", "si", "sil"}, - [RDI] = {"rdi", "edi", "di", "dil"}, - [RBP] = {"rbp", "ebp", "bp", "bpl"}, - [RSP] = {"rsp", "esp", "sp", "spl"}, - [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, - [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, - [R10] = {"r10", "r10d", "r10w", "r10b"}, - [R11] = {"r11", "r11d", "r11w", "r11b"}, - [R12] = {"r12", "r12d", "r12w", "r12b"}, - [R13] = {"r13", "r13d", "r13w", "r13b"}, - [R14] = {"r14", "r14d", "r14w", "r14b"}, - [R15] = {"r15", "r15d", "r15w", "r15b"}, -}; - - -static int -slot(int s, Fn *fn) -{ - struct { int i:14; } x; - - /* sign extend s using a bitfield */ - x.i = s; - assert(NAlign == 3); - if (x.i < 0) - return -4 * x.i; - else { - assert(fn->slot >= x.i); - return -4 * (fn->slot - x.i); - } -} - -static void -emitcon(Con *con, FILE *f) -{ - switch (con->type) { - default: - diag("emit: invalid constant"); - case CAddr: - fputs(con->label, f); - if (con->bits.i) - fprintf(f, "%+"PRId64, con->bits.i); - break; - case CBits: - fprintf(f, "%"PRId64, con->bits.i); - break; - } -} - -static char * -regtoa(int reg, int sz) -{ - static char buf[6]; - - if (reg >= XMM0) { - sprintf(buf, "xmm%d", reg-XMM0); - return buf; - } else - return rname[reg][sz]; -} - -static Ref -getarg(char c, Ins *i) -{ - switch (c) { - default: - diag("emit: 0, 1, = expected in format"); - case '0': - return i->arg[0]; - case '1': - return i->arg[1]; - case '=': - return i->to; - } -} - -static void emitins(Ins, Fn *, FILE *); - -static void -emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) -{ - Ins icp; - - icp.op = OCopy; - icp.arg[0] = r2; - icp.to = r1; - icp.cls = k; - emitins(icp, fn, f); -} - -static void -emitf(char *s, Ins *i, Fn *fn, FILE *f) -{ - static char clstoa[][3] = {"l", "q", "ss", "sd"}; - char c; - int sz; - Ref ref; - Mem *m; - Con off; - - switch (*s) { - case '+': - if (req(i->arg[1], i->to)) { - ref = i->arg[0]; - i->arg[0] = i->arg[1]; - i->arg[1] = ref; - } - /* fall through */ - case '-': - if (req(i->arg[1], i->to) && !req(i->arg[0], i->to)) - diag("emit: cannot convert to 2-address"); - emitcopy(i->to, i->arg[0], i->cls, fn, f); - s++; - break; - } - - fputc('\t', f); -Next: - while ((c = *s++) != '%') - if (!c) { - fputc('\n', f); - return; - } else - fputc(c, f); - switch ((c = *s++)) { - default: - diag("emit: invalid escape"); - case '%': - fputc('%', f); - break; - case 'k': - fputs(clstoa[i->cls], f); - break; - case '0': - case '1': - case '=': - sz = KWIDE(i->cls) ? SLong : SWord; - s--; - /* fall through */ - case 'D': - case 'S': - Ref: - c = *s++; - ref = getarg(c, i); - switch (rtype(ref)) { - default: - diag("emit: invalid reference"); - case RTmp: - assert(isreg(ref)); - fprintf(f, "%%%s", regtoa(ref.val, sz)); - break; - case RSlot: - fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); - break; - case RAMem: - Mem: - m = &fn->mem[ref.val & AMask]; - if (rtype(m->base) == RSlot) { - off.type = CBits; - off.bits.i = slot(m->base.val, fn); - addcon(&m->offset, &off); - m->base = TMP(RBP); - } - if (m->offset.type != CUndef) - emitcon(&m->offset, f); - if (req(m->base, R) && req(m->index, R)) - break; - fputc('(', f); - if (!req(m->base, R)) - fprintf(f, "%%%s", regtoa(m->base.val, SLong)); - if (!req(m->index, R)) - fprintf(f, ", %%%s, %d", - regtoa(m->index.val, SLong), - m->scale - ); - fputc(')', f); - break; - case RCon: - fputc('$', f); - emitcon(&fn->con[ref.val], f); - break; - } - break; - case 'L': - sz = SLong; - goto Ref; - case 'W': - sz = SWord; - goto Ref; - case 'H': - sz = SShort; - goto Ref; - case 'B': - sz = SByte; - goto Ref; - case 'M': - c = *s++; - ref = getarg(c, i); - switch (rtype(ref)) { - default: - diag("emit: invalid memory reference"); - case RAMem: - goto Mem; - case RSlot: - fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); - break; - case RCon: - emitcon(&fn->con[ref.val], f); - fprintf(f, "(%%rip)"); - break; - case RTmp: - assert(isreg(ref)); - fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); - break; - } - break; - } - goto Next; -} - -static void -emitins(Ins i, Fn *fn, FILE *f) -{ - Ref r; - int64_t val; - int o; - - switch (i.op) { - default: - Table: - /* most instructions are just pulled out of - * the table omap[], some special cases are - * detailed below */ - for (o=0;; o++) { - /* this linear search should really be a binary - * search */ - if (omap[o].op == NOp) - diag("emit: no entry found for instruction"); - if (omap[o].op == i.op) - if (omap[o].cls == i.cls - || (omap[o].cls == Ki && KBASE(i.cls) == 0) - || (omap[o].cls == Ka)) - break; - } - emitf(omap[o].asm, &i, fn, f); - break; - case ONop: - /* just do nothing for nops, they are inserted - * by some passes */ - break; - case OMul: - /* here, we try to use the 3-addresss form - * of multiplication when possible */ - if (rtype(i.arg[1]) == RCon) { - r = i.arg[0]; - i.arg[0] = i.arg[1]; - i.arg[1] = r; - } - if (KBASE(i.cls) == 0 /* only available for ints */ - && rtype(i.arg[0]) == RCon - && rtype(i.arg[1]) == RTmp) { - emitf("imul%k %0, %1, %=", &i, fn, f); - break; - } - goto Table; - case OSub: - /* we have to use the negation trick to handle - * some 3-address substractions */ - if (req(i.to, i.arg[1])) { - emitf("neg%k %=", &i, fn, f); - emitf("add%k %0, %=", &i, fn, f); - break; - } - goto Table; - case OCopy: - /* make sure we don't emit useless copies, - * also, we can use a trick to load 64-bits - * registers, it's detailed in my note below - * http://c9x.me/art/notes.html?09/19/2015 */ - if (req(i.to, R) || req(i.arg[0], R)) - break; - if (isreg(i.to) - && rtype(i.arg[0]) == RCon - && i.cls == Kl - && fn->con[i.arg[0].val].type == CBits - && (val = fn->con[i.arg[0].val].bits.i) >= 0 - && val <= UINT32_MAX) { - emitf("movl %W0, %W=", &i, fn, f); - } else if (!req(i.arg[0], i.to)) - emitf("mov%k %0, %=", &i, fn, f); - break; - case OCall: - /* calls simply have a weird syntax in AT&T - * assembly... */ - switch (rtype(i.arg[0])) { - default: - diag("emit: invalid call instruction"); - case RCon: - fprintf(f, "\tcallq "); - emitcon(&fn->con[i.arg[0].val], f); - fprintf(f, "\n"); - break; - case RTmp: - emitf("callq *%L0", &i, fn, f); - break; - } - break; - case OSAlloc: - /* there is no good reason why this is here - * maybe we should split OSAlloc in 2 different - * instructions depending on the result - */ - emitf("subq %L0, %%rsp", &i, fn, f); - if (!req(i.to, R)) - emitcopy(i.to, TMP(RSP), Kl, fn, f); - break; - case OSwap: - if (KBASE(i.cls) == 0) - goto Table; - /* for floats, there is no swap instruction - * so we use xmm15 as a temporary - */ - emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); - emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); - emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); - break; - } -} - -static int -cneg(int cmp) -{ - switch (cmp) { - default: diag("emit: cneg() unhandled comparison"); - case ICule: return ICugt; - case ICult: return ICuge; - case ICsle: return ICsgt; - case ICslt: return ICsge; - case ICsgt: return ICsle; - case ICsge: return ICslt; - case ICugt: return ICule; - case ICuge: return ICult; - case ICeq: return ICne; - case ICne: return ICeq; - case ICXnp: return ICXp; - case ICXp: return ICXnp; - } -} - -static int -framesz(Fn *fn) -{ - int i, o, f; - - assert(NAlign == 3); - for (i=0, o=0; i<NRClob; i++) - o ^= 1 & (fn->reg >> rclob[i]); - f = fn->slot; - f = (f + 3) & -4; - return 4*f + 8*o; -} - -void -emitfn(Fn *fn, FILE *f) -{ - static char *ctoa[] = { - [ICeq] = "z", - [ICule] = "be", - [ICult] = "b", - [ICsle] = "le", - [ICslt] = "l", - [ICsgt] = "g", - [ICsge] = "ge", - [ICugt] = "a", - [ICuge] = "ae", - [ICne] = "nz", - [ICXnp] = "np", - [ICXp] = "p" - }; - Blk *b, *s; - Ins *i, itmp; - int *r, c, fs; - - fprintf(f, - ".text\n" - ".globl %s\n" - ".type %s, @function\n" - "%s:\n" - "\tpush %%rbp\n" - "\tmov %%rsp, %%rbp\n", - fn->name, fn->name, fn->name - ); - fs = framesz(fn); - if (fs) - fprintf(f, "\tsub $%d, %%rsp\n", fs); - for (r=rclob; r-rclob < NRClob; r++) - if (fn->reg & BIT(*r)) { - itmp.arg[0] = TMP(*r); - emitf("pushq %L0", &itmp, fn, f); - } - - for (b=fn->start; b; b=b->link) { - fprintf(f, ".L%s:\n", b->name); - for (i=b->ins; i!=&b->ins[b->nins]; i++) - emitins(*i, fn, f); - switch (b->jmp.type) { - case JRet0: - for (r=&rclob[NRClob]; r>rclob;) - if (fn->reg & BIT(*--r)) { - itmp.arg[0] = TMP(*r); - emitf("popq %L0", &itmp, fn, f); - } - fprintf(f, - "\tleave\n" - "\tret\n" - ); - break; - case JJmp: - if (b->s1 != b->link) - fprintf(f, "\tjmp .L%s\n", b->s1->name); - break; - default: - c = b->jmp.type - JXJc; - if (0 <= c && c <= NXICmp) { - if (b->link == b->s2) { - s = b->s1; - } else if (b->link == b->s1) { - c = cneg(c); - s = b->s2; - } else - diag("emit: unhandled jump (1)"); - fprintf(f, "\tj%s .L%s\n", ctoa[c], s->name); - break; - } - diag("emit: unhandled jump (2)"); - } - } - -} - -void -emitdat(Dat *d, FILE *f) -{ - static int align; - static char *dtoa[] = { - [DAlign] = ".align", - [DB] = "\t.byte", - [DH] = "\t.value", - [DW] = "\t.long", - [DL] = "\t.quad" - }; - - switch (d->type) { - case DStart: - align = 0; - fprintf(f, ".data\n"); - break; - case DEnd: - break; - case DName: - if (!align) - fprintf(f, ".align 8\n"); - fprintf(f, - ".globl %s\n" - ".type %s, @object\n" - "%s:\n", - d->u.str, d->u.str, d->u.str - ); - break; - case DZ: - fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num); - break; - default: - if (d->type == DAlign) - align = 1; - - if (d->isstr) { - if (d->type != DB) - err("strings only supported for 'b' currently"); - fprintf(f, "\t.ascii \"%s\"\n", d->u.str); - } - else if (d->isref) { - fprintf(f, "%s %s%+"PRId64"\n", - dtoa[d->type], d->u.ref.nam, - d->u.ref.off); - } - else { - fprintf(f, "%s %"PRId64"\n", - dtoa[d->type], d->u.num); - } - break; - } -} - -typedef struct FBits FBits; - -struct FBits { - int64_t bits; - int wide; - FBits *link; -}; - -static FBits *stash; - -int -stashfp(int64_t n, int w) -{ - FBits **pb, *b; - int i; - - /* does a dumb de-dup of fp constants - * this should be the linker's job */ - for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++) - if (n == b->bits && w == b->wide) - return i; - b = emalloc(sizeof *b); - b->bits = n; - b->wide = w; - b->link = 0; - *pb = b; - return i; -} - -void -emitfin(FILE *f) -{ - FBits *b; - int i; - - if (!stash) - return; - fprintf(f, "/* floating point constants */\n"); - fprintf(f, ".data\n.align 8\n"); - for (b=stash, i=0; b; b=b->link, i++) - if (b->wide) - fprintf(f, - ".Lfp%d:\n" - "\t.quad %"PRId64 - " /* %f */\n", - i, b->bits, - *(double *)&b->bits - ); - for (b=stash, i=0; b; b=b->link, i++) - if (!b->wide) - fprintf(f, - ".Lfp%d:\n" - "\t.long %"PRId64 - " /* %lf */\n", - i, b->bits & 0xffffffff, - *(float *)&b->bits - ); - while ((b=stash)) { - stash = b->link; - free(b); - } -} diff --git a/lisc/isel.c b/lisc/isel.c @@ -1,1135 +0,0 @@ -#include "lisc.h" -#include <limits.h> - -/* For x86_64, do the following: - * - * - lower calls - * - check that constants are used only in - * places allowed - * - ensure immediates always fit in 32b - * - explicit machine register contraints - * on instructions like division. - * - implement fast locals (the streak of - * constant allocX in the first basic block) - * - recognize complex addressing modes - * - * Invariant: the use counts that are used - * in sel() must be sound. This - * is not so trivial, maybe the - * dce should be moved out... - */ - -typedef struct ANum ANum; -typedef struct AClass AClass; -typedef struct RAlloc RAlloc; - -struct ANum { - char n, l, r; - Ins *i; - Ref mem; -}; - -static void amatch(Addr *, Ref, ANum *, Fn *, int); - -static int -fcmptoi(int fc) -{ - switch (fc) { - default: diag("isel: fcmptoi defaulted"); - case FCle: return ICule; - case FClt: return ICult; - case FCgt: return ICugt; - case FCge: return ICuge; - case FCne: return ICne; - case FCeq: return ICeq; - case FCo: return ICXnp; - case FCuo: return ICXp; - } -} - -static int -iscmp(int op, int *pk, int *pc) -{ - int k, c; - - if (OCmpw <= op && op <= OCmpw1) { - c = op - OCmpw; - k = Kw; - } - else if (OCmpl <= op && op <= OCmpl1) { - c = op - OCmpl; - k = Kl; - } - else if (OCmps <= op && op <= OCmps1) { - c = fcmptoi(op - OCmps); - k = Ks; - } - else if (OCmpd <= op && op <= OCmpd1) { - c = fcmptoi(op - OCmpd); - k = Kd; - } - else - return 0; - if (pk) - *pk = k; - if (pc) - *pc = c; - return 1; -} - -static int -noimm(Ref r, Fn *fn) -{ - int64_t val; - - if (rtype(r) != RCon) - return 0; - switch (fn->con[r.val].type) { - default: - diag("isel: invalid constant"); - case CAddr: - /* we only support the 'small' - * code model of the ABI, this - * means that we can always - * address data with 32bits - */ - return 0; - case CBits: - val = fn->con[r.val].bits.i; - return (val < INT32_MIN || val > INT32_MAX); - } -} - -static int -rslot(Ref r, Fn *fn) -{ - if (rtype(r) != RTmp) - return -1; - return fn->tmp[r.val].slot; -} - -static int -argcls(Ins *i, int n) -{ - return opdesc[i->op].argcls[n][i->cls]; -} - -static void -fixarg(Ref *r, int k, int phi, Fn *fn) -{ - Addr a; - Ref r0, r1; - int s, n; - - r1 = r0 = *r; - s = rslot(r0, fn); - if (KBASE(k) == 1 && rtype(r0) == RCon) { - /* load floating points from memory - * slots, they can't be used as - * immediates - */ - r1 = MEM(fn->nmem); - vgrow(&fn->mem, ++fn->nmem); - memset(&a, 0, sizeof a); - a.offset.type = CAddr; - n = stashfp(fn->con[r0.val].bits.i, KWIDE(k)); - sprintf(a.offset.label, ".Lfp%d", n); - fn->mem[fn->nmem-1] = a; - } - else if (!phi && k == Kl && noimm(r0, fn)) { - /* load constants that do not fit in - * a 32bit signed integer into a - * long temporary - */ - r1 = newtmp("isel", Kl, fn); - emit(OCopy, Kl, r1, r0, R); - } - else if (s != -1) { - /* load fast locals' addresses into - * temporaries right before the - * instruction - */ - r1 = newtmp("isel", Kl, fn); - emit(OAddr, Kl, r1, SLOT(s), R); - } - *r = r1; -} - -static void -chuse(Ref r, int du, Fn *fn) -{ - if (rtype(r) == RTmp) - fn->tmp[r.val].nuse += du; -} - -static void -seladdr(Ref *r, ANum *an, Fn *fn) -{ - Addr a; - Ref r0, r1; - - r0 = *r; - if (rtype(r0) == RTmp) { - chuse(r0, -1, fn); - r1 = an[r0.val].mem; - if (req(r1, R)) { - amatch(&a, r0, an, fn, 1); - vgrow(&fn->mem, ++fn->nmem); - fn->mem[fn->nmem-1] = a; - r1 = MEM(fn->nmem-1); - chuse(a.base, +1, fn); - chuse(a.index, +1, fn); - if (rtype(a.base) != RTmp) - if (rtype(a.index) != RTmp) - an[r0.val].mem = r1; - } - *r = r1; - } -} - -static void -selcmp(Ref arg[2], int k, Fn *fn) -{ - Ref r; - - if (rtype(arg[0]) == RCon) { - r = arg[1]; - arg[1] = arg[0]; - arg[0] = r; - } - assert(rtype(arg[0]) != RCon); - emit(OXCmp, k, R, arg[1], arg[0]); - fixarg(&curi->arg[0], k, 0, fn); -} - -static void -sel(Ins i, ANum *an, Fn *fn) -{ - Ref r0, r1; - int x, k, kc; - int64_t val; - Ins *i0; - - if (rtype(i.to) == RTmp) - if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) - if (fn->tmp[i.to.val].nuse == 0) { - chuse(i.arg[0], -1, fn); - chuse(i.arg[1], -1, fn); - return; - } - i0 = curi; - k = i.cls; - switch (i.op) { - case ODiv: - case ORem: - case OUDiv: - case OURem: - if (i.op == ODiv || i.op == OUDiv) - r0 = TMP(RAX), r1 = TMP(RDX); - else - r0 = TMP(RDX), r1 = TMP(RAX); - emit(OCopy, k, i.to, r0, R); - emit(OCopy, k, R, r1, R); - if (rtype(i.arg[1]) == RCon) { - /* immediates not allowed for - * divisions in x86 - */ - r0 = newtmp("isel", k, fn); - } else - r0 = i.arg[1]; - if (i.op == ODiv || i.op == ORem) { - emit(OXIDiv, k, R, r0, R); - emit(OSign, k, TMP(RDX), TMP(RAX), R); - } else { - emit(OXDiv, k, R, r0, R); - emit(OCopy, k, TMP(RDX), CON_Z, R); - } - emit(OCopy, k, TMP(RAX), i.arg[0], R); - if (rtype(i.arg[1]) == RCon) - emit(OCopy, k, r0, i.arg[1], R); - break; - case OSar: - case OShr: - case OShl: - if (rtype(i.arg[1]) == RCon) - goto Emit; - r0 = i.arg[1]; - i.arg[1] = TMP(RCX); - emit(OCopy, Kw, R, TMP(RCX), R); - emiti(i); - emit(OCopy, Kw, TMP(RCX), r0, R); - break; - case ONop: - break; - case OStored: - case OStores: - case OStorel: - case OStorew: - case OStoreh: - case OStoreb: - if (rtype(i.arg[0]) == RCon) { - if (i.op == OStored) - i.op = OStorel; - if (i.op == OStores) - i.op = OStorew; - } - seladdr(&i.arg[1], an, fn); - goto Emit; - case_OLoad: - seladdr(&i.arg[0], an, fn); - goto Emit; - case OCall: - case OSAlloc: - case OCopy: - case OAdd: - case OSub: - case OMul: - case OAnd: - case OOr: - case OXor: - case OXTest: - case OFtosi: - case OSitof: - case OExts: - case OTruncd: - case OCast: - case_OExt: -Emit: - emiti(i); - fixarg(&curi->arg[0], argcls(curi, 0), 0, fn); - fixarg(&curi->arg[1], argcls(curi, 1), 0, fn); - break; - case OAlloc: - case OAlloc+1: - case OAlloc+2: /* == OAlloc1 */ - /* we need to make sure - * the stack remains aligned - * (rsp = 0) mod 16 - */ - if (rtype(i.arg[0]) == RCon) { - assert(fn->con[i.arg[0].val].type == CBits); - val = fn->con[i.arg[0].val].bits.i; - val = (val + 15) & ~INT64_C(15); - if (val < 0 || val > INT32_MAX) - diag("isel: alloc too large"); - emit(OSAlloc, Kl, i.to, getcon(val, fn), R); - } else { - /* r0 = (i.arg[0] + 15) & -16 */ - r0 = newtmp("isel", Kl, fn); - r1 = newtmp("isel", Kl, fn); - emit(OSAlloc, Kl, i.to, r0, R); - emit(OAnd, Kl, r0, r1, getcon(-16, fn)); - emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn)); - } - break; - default: - if (isext(i.op)) - goto case_OExt; - if (isload(i.op)) - goto case_OLoad; - if (iscmp(i.op, &kc, &x)) { - if (rtype(i.arg[0]) == RCon) - x = icmpop(x); - emit(OXSet+x, k, i.to, R, R); - selcmp(i.arg, kc, fn); - break; - } - diag("isel: non-exhaustive implementation"); - } - - while (i0 > curi && --i0) - if (rslot(i0->arg[0], fn) != -1 - || rslot(i0->arg[1], fn) != -1) - diag("isel: usupported address argument"); -} - -static Ins * -flagi(Ins *i0, Ins *i) -{ - while (i>i0) { - i--; - if (opdesc[i->op].sflag) - return i; - if (opdesc[i->op].lflag) - continue; - return 0; - } - return 0; -} - -struct AClass { - int inmem; - int align; - uint size; - int cls[2]; -}; - -static void -aclass(AClass *a, Typ *t) -{ - int e, s, n, cls; - uint sz, al; - - sz = t->size; - al = 1u << t->align; - - /* the ABI requires sizes to be rounded - * up to the nearest multiple of 8, moreover - * it makes it easy load and store structures - * in registers - */ - if (al < 8) - al = 8; - sz = (sz + al-1) & -al; - - a->size = sz; - a->align = t->align; - - if (t->dark || sz > 16) { - /* large or unaligned structures are - * required to be passed in memory - */ - a->inmem = 1; - return; - } - - a->inmem = 0; - for (e=0, s=0; e<2; e++) { - cls = -1; - for (n=0; n<8 && t->seg[s].len; s++) { - if (t->seg[s].ispad) { - /* don't change anything */ - } - else if (t->seg[s].isflt) { - if (cls == -1) - cls = Kd; - } - else - cls = Kl; - n += t->seg[s].len; - } - assert(n <= 8); - a->cls[e] = cls; - } -} - -static void -blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) -{ - Ref r, r1; - uint boff; - - /* it's an impolite blit, we might go across the end - * of the source object a little bit... */ - for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { - r = newtmp("abi", Kl, fn); - r1 = newtmp("abi", Kl, fn); - emit(OStorel, 0, R, r, r1); - emit(OAdd, Kl, r1, rstk, getcon(soff, fn)); - r1 = newtmp("abi", Kl, fn); - emit(OLoad, Kl, r, r1, R); - emit(OAdd, Kl, r1, rsrc, getcon(boff, fn)); - chuse(rsrc, +1, fn); - chuse(rstk, +1, fn); - } -} - -static int -retr(Ref reg[2], AClass *aret) -{ - static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; - int n, k, ca, nr[2]; - - nr[0] = nr[1] = 0; - ca = 0; - for (n=0; aret->cls[n]>=0 && n<2; n++) { - k = KBASE(aret->cls[n]); - reg[n] = TMP(retreg[k][nr[k]++]); - ca += 1 << (2 * k); - } - return ca; -} - -static void -selret(Blk *b, Fn *fn) -{ - int j, k, ca; - Ref r, r0, reg[2]; - AClass aret; - - j = b->jmp.type; - - if (!isret(j) || j == JRet0) - return; - - r0 = b->jmp.arg; - b->jmp.type = JRet0; - - if (j == JRetc) { - aclass(&aret, &typ[fn->retty]); - if (aret.inmem) { - assert(rtype(fn->retr) == RTmp); - emit(OCopy, Kl, TMP(RAX), fn->retr, R); - chuse(fn->retr, +1, fn); - blit(fn->retr, 0, r0, aret.size, fn); - ca = 1; - } else { - ca = retr(reg, &aret); - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - emit(OLoad, Kl, reg[1], r, R); - emit(OAdd, Kl, r, r0, getcon(8, fn)); - chuse(r0, +1, fn); - } - emit(OLoad, Kl, reg[0], r0, R); - } - } else { - k = j - JRetw; - if (KBASE(k) == 0) { - emit(OCopy, k, TMP(RAX), r0, R); - ca = 1; - } else { - emit(OCopy, k, TMP(XMM0), r0, R); - ca = 1 << 2; - } - } - - b->jmp.arg = CALL(ca); -} - -static void -seljmp(Blk *b, Fn *fn) -{ - Ref r; - int c, k; - Ins *fi; - - if (b->jmp.type == JRet0 || b->jmp.type == JJmp) - return; - assert(b->jmp.type == JJnz); - r = b->jmp.arg; - b->jmp.arg = R; - assert(!req(r, R)); - if (rtype(r) == RCon) { - b->jmp.type = JJmp; - if (req(r, CON_Z)) - b->s1 = b->s2; - b->s2 = 0; - return; - } - fi = flagi(b->ins, &b->ins[b->nins]); - if (fi && req(fi->to, r)) { - if (iscmp(fi->op, &k, &c)) { - if (rtype(fi->arg[0]) == RCon) - c = icmpop(c); - b->jmp.type = JXJc + c; - if (fn->tmp[r.val].nuse == 1) { - assert(fn->tmp[r.val].ndef == 1); - selcmp(fi->arg, k, fn); - *fi = (Ins){.op = ONop}; - } - return; - } - if (fi->op == OAnd && fn->tmp[r.val].nuse == 1 - && (rtype(fi->arg[0]) == RTmp || - rtype(fi->arg[1]) == RTmp)) { - fi->op = OXTest; - fi->to = R; - b->jmp.type = JXJc + ICne; - if (rtype(fi->arg[1]) == RCon) { - r = fi->arg[1]; - fi->arg[1] = fi->arg[0]; - fi->arg[0] = r; - } - return; - } - /* since flags are not tracked in liveness, - * the result of the flag-setting instruction - * has to be marked as live - */ - if (fn->tmp[r.val].nuse == 1) - emit(OCopy, Kw, R, r, R); - b->jmp.type = JXJc + ICne; - return; - } - selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */ - b->jmp.type = JXJc + ICne; -} - -static int -classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret) -{ - int nint, ni, nsse, ns, n, *pn; - AClass *a; - Ins *i; - - if (aret && aret->inmem) - nint = 5; /* hidden argument */ - else - nint = 6; - nsse = 8; - for (i=i0, a=ac; i<i1; i++, a++) { - if (i->op == op) { - if (KBASE(i->cls) == 0) - pn = &nint; - else - pn = &nsse; - if (*pn > 0) { - --*pn; - a->inmem = 0; - } else - a->inmem = 2; - a->align = 3; - a->size = 8; - a->cls[0] = i->cls; - } else { - n = i->arg[0].val & AMask; - aclass(a, &typ[n]); - if (a->inmem) - continue; - ni = ns = 0; - for (n=0; n<2; n++) - if (KBASE(a->cls[n]) == 0) - ni++; - else - ns++; - if (nint >= ni && nsse >= ns) { - nint -= ni; - nsse -= ns; - } else - a->inmem = 1; - } - } - - return ((6-nint) << 4) | ((8-nsse) << 8); -} - -int rsave[] = { - RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 -}; -int rclob[] = {RBX, R12, R13, R14, R15}; - -MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); -MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); - -bits -retregs(Ref r, int p[2]) -{ - bits b; - int ni, nf; - - assert(rtype(r) == RACall); - b = 0; - ni = r.val & 3; - nf = (r.val >> 2) & 3; - if (ni >= 1) - b |= BIT(RAX); - if (ni >= 2) - b |= BIT(RDX); - if (nf >= 1) - b |= BIT(XMM0); - if (nf >= 2) - b |= BIT(XMM1); - if (p) { - p[0] = ni; - p[1] = nf; - } - return b; -} - -bits -argregs(Ref r, int p[2]) -{ - bits b; - int j, ni, nf; - - assert(rtype(r) == RACall); - b = 0; - ni = (r.val >> 4) & 15; - nf = (r.val >> 8) & 15; - for (j=0; j<ni; j++) - b |= BIT(rsave[j]); - for (j=0; j<nf; j++) - b |= BIT(XMM0+j); - if (p) { - p[0] = ni + 1; - p[1] = nf; - } - return b | BIT(RAX); -} - -static Ref -rarg(int ty, int *ni, int *ns) -{ - if (KBASE(ty) == 0) - return TMP(rsave[(*ni)++]); - else - return TMP(XMM0 + (*ns)++); -} - -struct RAlloc { - Ins i; - RAlloc *link; -}; - -static void -selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) -{ - Ins *i; - AClass *ac, *a, aret; - int ca, ni, ns; - uint stk, off; - Ref r, r1, r2, reg[2], regcp[2]; - RAlloc *ra; - - ac = alloc((i1-i0) * sizeof ac[0]); - if (!req(i1->arg[1], R)) { - assert(rtype(i1->arg[1]) == RAType); - aclass(&aret, &typ[i1->arg[1].val & AMask]); - ca = classify(i0, i1, ac, OArg, &aret); - } else - ca = classify(i0, i1, ac, OArg, 0); - - for (stk=0, a=&ac[i1-i0]; a>ac;) - if ((--a)->inmem) { - assert(a->align <= 4); - stk += a->size; - if (a->align == 4) - stk += stk & 15; - } - stk += stk & 15; - if (stk) { - r = getcon(-(int64_t)stk, fn); - emit(OSAlloc, Kl, R, r, R); - } - - if (!req(i1->arg[1], R)) { - if (aret.inmem) { - /* get the return location from eax - * it saves one callee-save reg */ - r1 = newtmp("abi", Kl, fn); - emit(OCopy, Kl, i1->to, TMP(RAX), R); - ca += 1; - } else { - if (aret.size > 8) { - r = newtmp("abi", Kl, fn); - regcp[1] = newtmp("abi", aret.cls[1], fn); - emit(OStorel, 0, R, regcp[1], r); - emit(OAdd, Kl, r, i1->to, getcon(8, fn)); - chuse(i1->to, +1, fn); - ca += 1 << (2 * KBASE(aret.cls[1])); - } - regcp[0] = newtmp("abi", aret.cls[0], fn); - emit(OStorel, 0, R, regcp[0], i1->to); - ca += 1 << (2 * KBASE(aret.cls[0])); - retr(reg, &aret); - if (aret.size > 8) - emit(OCopy, aret.cls[1], regcp[1], reg[1], R); - emit(OCopy, aret.cls[0], regcp[0], reg[0], R); - r1 = i1->to; - } - /* allocate return pad */ - ra = alloc(sizeof *ra); - assert(NAlign == 3); - aret.align -= 2; - if (aret.align < 0) - aret.align = 0; - ra->i.op = OAlloc + aret.align; - ra->i.cls = Kl; - ra->i.to = r1; - ra->i.arg[0] = getcon(aret.size, fn); - ra->link = (*rap); - *rap = ra; - } else { - ra = 0; - if (KBASE(i1->cls) == 0) { - emit(OCopy, i1->cls, i1->to, TMP(RAX), R); - ca += 1; - } else { - emit(OCopy, i1->cls, i1->to, TMP(XMM0), R); - ca += 1 << 2; - } - } - emit(OCall, i1->cls, R, i1->arg[0], CALL(ca)); - emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); - - ni = ns = 0; - if (ra && aret.inmem) - emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ - for (i=i0, a=ac; i<i1; i++, a++) { - if (a->inmem) - continue; - r1 = rarg(a->cls[0], &ni, &ns); - if (i->op == OArgc) { - if (a->size > 8) { - r2 = rarg(a->cls[1], &ni, &ns); - r = newtmp("abi", Kl, fn); - emit(OLoad, a->cls[1], r2, r, R); - emit(OAdd, Kl, r, i->arg[1], getcon(8, fn)); - chuse(i->arg[1], +1, fn); - } - emit(OLoad, a->cls[0], r1, i->arg[1], R); - } else - emit(OCopy, i->cls, r1, i->arg[0], R); - } - - if (!stk) - return; - - r = newtmp("abi", Kl, fn); - chuse(r, -1, fn); - for (i=i0, a=ac, off=0; i<i1; i++, a++) { - if (!a->inmem) - continue; - if (i->op == OArgc) { - if (a->align == 4) - off += off & 15; - blit(r, off, i->arg[1], a->size, fn); - } else { - r1 = newtmp("abi", Kl, fn); - emit(OStorel, 0, R, i->arg[0], r1); - emit(OAdd, Kl, r1, r, getcon(off, fn)); - chuse(r, +1, fn); - } - off += a->size; - } - emit(OSAlloc, Kl, r, getcon(stk, fn), R); -} - -static void -selpar(Fn *fn, Ins *i0, Ins *i1) -{ - AClass *ac, *a, aret; - Ins *i; - int ni, ns, s, al; - Ref r, r1; - - ac = alloc((i1-i0) * sizeof ac[0]); - curi = insb; - ni = ns = 0; - - if (fn->retty >= 0) { - aclass(&aret, &typ[fn->retty]); - if (aret.inmem) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl}; - fn->retr = r; - } - classify(i0, i1, ac, OPar, &aret); - } else - classify(i0, i1, ac, OPar, 0); - - assert(NAlign == 3); - - s = 4; - for (i=i0, a=ac; i<i1; i++, a++) { - switch (a->inmem) { - case 1: - assert(a->align <= 4); - if (a->align == 4) - s = (s+3) & -4; - fn->tmp[i->to.val].slot = -s; /* HACK! */ - s += a->size / 4; - continue; - case 2: - *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls}; - s += 2; - continue; - } - r1 = rarg(a->cls[0], &ni, &ns); - if (i->op == OParc) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {r1}, Kl}; - a->cls[0] = r.val; - if (a->size > 8) { - r1 = rarg(a->cls[1], &ni, &ns); - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OCopy, r, {r1}, Kl}; - a->cls[1] = r.val; - } - } else - *curi++ = (Ins){OCopy, i->to, {r1}, i->cls}; - } - for (i=i0, a=ac; i<i1; i++, a++) { - if (i->op != OParc || a->inmem) - continue; - assert(NAlign == 3); - for (al=0; a->align >> (al+2); al++) - ; - r = TMP(a->cls[0]); - r1 = i->to; - *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl}; - *curi++ = (Ins){OStorel, R, {r, r1}, 0}; - if (a->size > 8) { - r = newtmp("abi", Kl, fn); - *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl}; - r1 = TMP(a->cls[1]); - *curi++ = (Ins){OStorel, R, {r1, r}, 0}; - } - } -} - -static int -aref(Ref r, ANum *ai) -{ - switch (rtype(r)) { - default: - diag("isel: aref defaulted"); - case RCon: - return 2; - case RTmp: - return ai[r.val].n; - } -} - -static int -ascale(Ref r, Con *con) -{ - int64_t n; - - if (rtype(r) != RCon) - return 0; - if (con[r.val].type != CBits) - return 0; - n = con[r.val].bits.i; - return n == 1 || n == 2 || n == 4 || n == 8; -} - -static void -anumber(ANum *ai, Blk *b, Con *con) -{ - /* This should be made obsolete by a proper - * reassoc pass. - * - * Rules: - * - * RTmp(_) -> 0 tmp - * ( RTmp(_) -> 1 slot ) - * RCon(_) -> 2 con - * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) - */ - static char add[10][10] = { - [2] [2] = 2, /* folding */ - [2] [5] = 5, [5] [2] = 5, - [2] [6] = 6, [6] [2] = 6, - [2] [7] = 7, [7] [2] = 7, - [0] [0] = 4, /* 4: b + s * i */ - [0] [3] = 4, [3] [0] = 4, - [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */ - [0] [2] = 6, [2] [0] = 6, /* 6: o + b */ - [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */ - [0] [5] = 7, [5] [0] = 7, - [6] [3] = 7, [3] [6] = 7, - - }; - int a, a1, a2, n1, n2, t1, t2; - Ins *i; - - for (i=b->ins; i-b->ins < b->nins; i++) { - if (rtype(i->to) == RTmp) - ai[i->to.val].i = i; - if (i->op != OAdd && i->op != OMul) - continue; - a1 = aref(i->arg[0], ai); - a2 = aref(i->arg[1], ai); - t1 = a1 != 1 && a1 != 2; - t2 = a2 != 1 && a2 != 2; - if (i->op == OAdd) { - a = add[n1 = a1][n2 = a2]; - if (t1 && a < add[0][a2]) - a = add[n1 = 0][n2 = a2]; - if (t2 && a < add[a1][0]) - a = add[n1 = a1][n2 = 0]; - if (t1 && t2 && a < add[0][0]) - a = add[n1 = 0][n2 = 0]; - } else { - n1 = n2 = a = 0; - if (ascale(i->arg[0], con) && t2) - a = 3, n1 = 2, n2 = 0; - if (t1 && ascale(i->arg[1], con)) - a = 3, n1 = 0, n2 = 2; - } - ai[i->to.val].n = a; - ai[i->to.val].l = n1; - ai[i->to.val].r = n2; - } -} - -static void -amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top) -{ - Ins *i; - int nl, nr, t, s; - Ref al, ar; - - if (top) - memset(a, 0, sizeof *a); - if (rtype(r) == RCon) { - addcon(&a->offset, &fn->con[r.val]); - return; - } - assert(rtype(r) == RTmp); - i = ai[r.val].i; - nl = ai[r.val].l; - nr = ai[r.val].r; - if (i) { - if (nl > nr) { - al = i->arg[1]; - ar = i->arg[0]; - t = nl, nl = nr, nr = t; - } else { - al = i->arg[0]; - ar = i->arg[1]; - } - } - switch (ai[r.val].n) { - default: - diag("isel: amatch defaulted"); - case 3: /* s * i */ - if (!top) { - a->index = al; - a->scale = fn->con[ar.val].bits.i; - } else - a->base = r; - break; - case 4: /* b + s * i */ - switch (nr) { - case 0: - if (fn->tmp[ar.val].slot != -1) { - al = i->arg[1]; - ar = i->arg[0]; - } - a->index = ar; - a->scale = 1; - break; - case 3: - amatch(a, ar, ai, fn, 0); - break; - } - r = al; - case 0: - s = fn->tmp[r.val].slot; - if (s != -1) - r = SLOT(s); - a->base = r; - break; - case 2: /* constants */ - case 5: /* o + s * i */ - case 6: /* o + b */ - case 7: /* o + b + s * i */ - amatch(a, ar, ai, fn, 0); - amatch(a, al, ai, fn, 0); - break; - } -} - -/* instruction selection - * requires use counts (as given by parsing) - */ -void -isel(Fn *fn) -{ - Blk *b, **sb; - Ins *i, *i0, *ip; - Phi *p; - uint a; - int n, al; - int64_t sz; - ANum *ainfo; - RAlloc *ral; - - for (n=0; n<fn->ntmp; n++) - fn->tmp[n].slot = -1; - fn->slot = 0; - - /* lower arguments */ - for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) - if (i->op != OPar && i->op != OParc) - break; - selpar(fn, b->ins, i); - n = b->nins - (i - b->ins) + (curi - insb); - i0 = alloc(n * sizeof(Ins)); - ip = icpy(ip = i0, insb, curi - insb); - ip = icpy(ip, i, &b->ins[b->nins] - i); - b->nins = n; - b->ins = i0; - - /* lower function calls and returns */ - ral = 0; - b = fn->start; - do { - if (!(b = b->link)) - b = fn->start; /* do it last */ - curi = &insb[NIns]; - selret(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) { - if ((--i)->op == OCall) { - for (i0=i; i0>b->ins; i0--) - if ((i0-1)->op != OArg) - if ((i0-1)->op != OArgc) - break; - selcall(fn, i0, i, &ral); - i = i0; - continue; - } - assert(i->op != OArg && i->op != OArgc); - emiti(*i); - } - if (b == fn->start) - for (; ral; ral=ral->link) - emiti(ral->i); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } while (b != fn->start); - - if (debug['A']) { - fprintf(stderr, "\n> After call lowering:\n"); - printfn(fn, stderr); - } - - /* assign slots to fast allocs */ - b = fn->start; - assert(NAlign == 3 && "change n=4 and sz /= 4 below"); - for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2) - for (i=b->ins; i-b->ins < b->nins; i++) - if (i->op == al) { - if (rtype(i->arg[0]) != RCon) - break; - sz = fn->con[i->arg[0].val].bits.i; - if (sz < 0 || sz >= INT_MAX-3) - diag("isel: invalid alloc size"); - sz = (sz + n-1) & -n; - sz /= 4; - fn->tmp[i->to.val].slot = fn->slot; - fn->slot += sz; - *i = (Ins){.op = ONop}; - } - - /* process basic blocks */ - n = fn->ntmp; - ainfo = emalloc(n * sizeof ainfo[0]); - for (b=fn->start; b; b=b->link) { - curi = &insb[NIns]; - for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) - for (p=(*sb)->phi; p; p=p->link) { - for (a=0; p->blk[a] != b; a++) - assert(a+1 < p->narg); - fixarg(&p->arg[a], p->cls, 1, fn); - } - memset(ainfo, 0, n * sizeof ainfo[0]); - anumber(ainfo, b, fn->con); - seljmp(b, fn); - for (i=&b->ins[b->nins]; i!=b->ins;) - sel(*--i, ainfo, fn); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } - free(ainfo); - - if (debug['I']) { - fprintf(stderr, "\n> After instruction selection:\n"); - printfn(fn, stderr); - } -} diff --git a/lisc/live.c b/lisc/live.c @@ -1,174 +0,0 @@ -#include "lisc.h" - -void -liveon(BSet *v, Blk *b, Blk *s) -{ - Phi *p; - uint a; - - bscopy(v, s->in); - for (p=s->phi; p; p=p->link) { - bsclr(v, p->to.val); - for (a=0; a<p->narg; a++) - if (p->blk[a] == b) - if (rtype(p->arg[a]) == RTmp) - bsset(v, p->arg[a].val); - } -} - -static int -phitmp(int t, Tmp *tmp) -{ - int tp; - - tp = tmp[t].phi; - return tp ? tp : t; -} - -static void -phifix(int t1, short *phi, Tmp *tmp) -{ - int t, t2; - - /* detect temporaries arguments - * of the same phi node that - * interfere and separate them - */ - t = phitmp(t1, tmp); - t2 = phi[t]; - if (t2 && t2 != t1) { - if (t != t1) { - tmp[t1].phi = t1; - t = t1; - } else { - tmp[t2].phi = t2; - phi[t2] = t2; - } - } - phi[t] = t1; -} - -static void -bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp) -{ - - if (rtype(r) != RTmp) - return; - bsset(b->gen, r.val); - phifix(r.val, phi, tmp); - if (!bshas(b->in, r.val)) { - nlv[KBASE(tmp[r.val].cls)]++; - bsset(b->in, r.val); - } -} - -/* liveness analysis - * requires rpo computation - */ -void -filllive(Fn *f) -{ - Blk *b; - Ins *i; - int k, t, m[2], n, chg, nlv[2]; - short *phi; - BSet u[1], v[1]; - Mem *ma; - - bsinit(u, f->ntmp); - bsinit(v, f->ntmp); - phi = emalloc(f->ntmp * sizeof phi[0]); - for (b=f->start; b; b=b->link) { - bsinit(b->in, f->ntmp); - bsinit(b->out, f->ntmp); - bsinit(b->gen, f->ntmp); - } - chg = 1; -Again: - for (n=f->nblk-1; n>=0; n--) { - b = f->rpo[n]; - - bscopy(u, b->out); - if (b->s1) { - liveon(v, b, b->s1); - bsunion(b->out, v); - } - if (b->s2) { - liveon(v, b, b->s2); - bsunion(b->out, v); - } - chg |= !bsequal(b->out, u); - - memset(phi, 0, f->ntmp * sizeof phi[0]); - memset(nlv, 0, sizeof nlv); - bscopy(b->in, b->out); - for (t=0; t<f->ntmp; t++) - if (bshas(b->in, t)) { - phifix(t, phi, f->tmp); - nlv[KBASE(f->tmp[t].cls)]++; - } - if (rtype(b->jmp.arg) == RACall) { - assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0); - b->in->t[0] |= retregs(b->jmp.arg, nlv); - } else - bset(b->jmp.arg, b, nlv, phi, f->tmp); - for (k=0; k<2; k++) - b->nlive[k] = nlv[k]; - for (i=&b->ins[b->nins]; i!=b->ins;) { - if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) { - b->in->t[0] &= ~retregs(i->arg[1], m); - for (k=0; k<2; k++) - nlv[k] -= m[k]; - if (nlv[0] + NISave > b->nlive[0]) - b->nlive[0] = nlv[0] + NISave; - if (nlv[1] + NFSave > b->nlive[1]) - b->nlive[1] = nlv[1] + NFSave; - b->in->t[0] |= argregs(i->arg[1], m); - for (k=0; k<2; k++) - nlv[k] += m[k]; - } - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - t = i->to.val; - if (bshas(b->in, i->to.val)) - nlv[KBASE(f->tmp[t].cls)]--; - bsset(b->gen, t); - bsclr(b->in, t); - phi[phitmp(t, f->tmp)] = 0; - } - for (k=0; k<2; k++) - switch (rtype(i->arg[k])) { - case RAMem: - ma = &f->mem[i->arg[k].val & AMask]; - bset(ma->base, b, nlv, phi, f->tmp); - bset(ma->index, b, nlv, phi, f->tmp); - break; - default: - bset(i->arg[k], b, nlv, phi, f->tmp); - break; - } - for (k=0; k<2; k++) - if (nlv[k] > b->nlive[k]) - b->nlive[k] = nlv[k]; - } - } - if (chg) { - chg = 0; - goto Again; - } - free(phi); - - if (debug['L']) { - fprintf(stderr, "\n> Liveness analysis:\n"); - for (b=f->start; b; b=b->link) { - fprintf(stderr, "\t%-10sin: ", b->name); - dumpts(b->in, f->tmp, stderr); - fprintf(stderr, "\t out: "); - dumpts(b->out, f->tmp, stderr); - fprintf(stderr, "\t gen: "); - dumpts(b->gen, f->tmp, stderr); - fprintf(stderr, "\t live: "); - fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]); - } - } -} diff --git a/lisc/main.c b/lisc/main.c @@ -1,117 +0,0 @@ -#include "lisc.h" -#include <ctype.h> -#include <getopt.h> - -char debug['Z'+1] = { - ['P'] = 0, /* parsing */ - ['A'] = 0, /* abi lowering */ - ['I'] = 0, /* instruction selection */ - ['L'] = 0, /* liveness */ - ['M'] = 0, /* memory optimization */ - ['N'] = 0, /* ssa construction */ - ['C'] = 0, /* copy elimination */ - ['S'] = 0, /* spilling */ - ['R'] = 0, /* reg. allocation */ -}; - -static FILE *outf; -static int dbg; - -static void -data(Dat *d) -{ - if (dbg) - return; - if (d->type == DEnd) { - fputs("/* end data */\n\n", outf); - freeall(); - } - emitdat(d, outf); -} - -static void -func(Fn *fn) -{ - int n; - - if (dbg) - fprintf(stderr, "**** Function %s ****", fn->name); - if (debug['P']) { - fprintf(stderr, "\n> After parsing:\n"); - printfn(fn, stderr); - } - fillrpo(fn); - fillpreds(fn); - filluse(fn); - memopt(fn); - ssa(fn); - filluse(fn); - copy(fn); - filluse(fn); - isel(fn); - filllive(fn); - fillcost(fn); - spill(fn); - rega(fn); - fillrpo(fn); - assert(fn->rpo[0] == fn->start); - for (n=0;; n++) - if (n == fn->nblk-1) { - fn->rpo[n]->link = 0; - break; - } else - fn->rpo[n]->link = fn->rpo[n+1]; - if (!dbg) { - emitfn(fn, outf); - fprintf(outf, "/* end function %s */\n\n", fn->name); - } else - fprintf(stderr, "\n"); - freeall(); -} - -int -main(int ac, char *av[]) -{ - FILE *inf; - char *f; - int c; - - outf = stdout; - while ((c = getopt(ac, av, "d:o:")) != -1) - switch (c) { - case 'd': - for (; *optarg; optarg++) - if (isalpha(*optarg)) { - debug[toupper(*optarg)] = 1; - dbg = 1; - } - break; - case 'o': - if (strcmp(optarg, "-") != 0) - outf = fopen(optarg, "w"); - break; - default: - fprintf(stderr, "usage: %s [-d <flags>] [-o out] {file.ssa, -}\n", av[0]); - exit(1); - } - - do { - f = av[optind]; - if (!f || strcmp(f, "-") == 0) { - inf = stdin; - f = "-"; - } else { - inf = fopen(f, "r"); - if (!inf) { - fprintf(stderr, "cannot open '%s'\n", f); - exit(1); - } - } - parse(inf, f, data, func); - } while (++optind < ac); - - if (!dbg) - emitfin(outf); - - exit(0); -} diff --git a/lisc/mem.c b/lisc/mem.c @@ -1,81 +0,0 @@ -#include "lisc.h" - -/* Memory optimization: - * - * - replace alloced slots used only in - * load/store operations - * Assumption: all the accesses have the - * same size (this could be wrong...) - */ - -/* require use, maintains use counts */ -void -memopt(Fn *fn) -{ - Blk *b; - Ins *i, *l; - Tmp *t; - Use *u, *ue; - int a; - - b = fn->start; - for (i=b->ins; i-b->ins < b->nins; i++) { - if (OAlloc > i->op || i->op > OAlloc1) - continue; - assert(NAlign == 3); - assert(rtype(i->to) == RTmp); - t = &fn->tmp[i->to.val]; - for (u=t->use; u != &t->use[t->nuse]; u++) { - if (u->type != UIns) - goto NextIns; - l = u->u.ins; - if (!isload(l->op) - && (!isstore(l->op) || req(i->to, l->arg[0]))) - goto NextIns; - } - /* get rid of the alloc and replace uses */ - *i = (Ins){.op = ONop}; - t->ndef--; - ue = &t->use[t->nuse]; - for (u=t->use; u!=ue; u++) { - l = u->u.ins; - if (isstore(l->op)) { - if (l->op == OStores) - l->cls = Kd; - else if (l->op == OStored) - l->cls = Kd; - else if (l->op == OStorel) - l->cls = Kl; - else - l->cls = Kw; - l->op = OCopy; - l->to = l->arg[1]; - l->arg[1] = R; - t->nuse--; - t->ndef++; - } else - /* try to turn loads into copies so we - * can eliminate them later */ - switch(l->op) { - case OLoad: - l->op = OCopy; - break; - case OLoadsw: - case OLoaduw: - l->cls = Kw; - l->op = OCopy; - break; - default: - /* keep l->cls */ - a = l->op - OLoadsw; - l->op = OExtsw + a; - break; - } - } - NextIns:; - } - if (debug['M']) { - fprintf(stderr, "\n> After memory optimization:\n"); - printfn(fn, stderr); - } -} diff --git a/lisc/parse.c b/lisc/parse.c @@ -1,1081 +0,0 @@ -#include "lisc.h" -#include <ctype.h> -#include <stdarg.h> - -enum { - Kx = -1, /* Invalid operand */ - Km = Kl, /* Memory pointer (for x64) */ -}; - -OpDesc opdesc[NOp] = { -#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d} - - /* NAME NM ARGCLS0 ARGCLS1 SF LF */ - [OAdd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OSub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [ODiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [ORem] = { "rem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, - [OUDiv] = { "udiv", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [OURem] = { "urem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, - [OMul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [OAnd] = { "and", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OOr] = { "or", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OXor] = { "xor", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OSar] = { "sar", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, - [OShr] = { "shr", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, - [OShl] = { "shl", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, - [OStored] = { "stored", 0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 }, - [OStores] = { "stores", 0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 }, - [OStorel] = { "storel", 0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 }, - [OStorew] = { "storew", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, - [OStoreh] = { "storeh", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, - [OStoreb] = { "storeb", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, - [OLoad] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 }, - [OLoadsw] = { "loadsw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoaduw] = { "loaduw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoadsh] = { "loadsh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoaduh] = { "loaduh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoadsb] = { "loadsb", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OLoadub] = { "loadub", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OExtsw] = { "extsw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtuw] = { "extuw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtsh] = { "extsh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtuh] = { "extuh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtsb] = { "extsb", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExtub] = { "extub", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OExts] = { "exts", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, - [OTruncd] = { "truncd", 0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 }, - [OFtosi] = { "ftosi", 0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 }, - [OSitof] = { "sitof", 0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 }, - [OCast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 }, - [OCopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 }, - [ONop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, - [OSwap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, - [OSign] = { "sign", 0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, - [OSAlloc] = { "salloc", 0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 }, - [OXDiv] = { "xdiv", 1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, - [OXCmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, - [OXTest] = { "xtest", 1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 }, - [OAddr] = { "addr", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, - [OPar] = { "parn", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OParc] = { "parc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OArg] = { "arg", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OArgc] = { "argc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OCall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 }, - [OXSetnp] = { "xsetnp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OXSetp] = { "xsetp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, - [OAlloc] = { "alloc4", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, - [OAlloc+1] = { "alloc8", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, - [OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, -#define X(c) \ - [OCmpw+IC##c] = { "c" #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \ - [OCmpl+IC##c] = { "c" #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \ - [OXSet+IC##c] = { "xset" #c, 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, - ICMPS(X) -#undef X -#define X(c) \ - [OCmps+FC##c] = { "c" #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \ - [OCmpd+FC##c] = { "c" #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 }, - FCMPS(X) -#undef X - -}; -#undef A - -typedef enum { - PXXX, - PLbl, - PPhi, - PIns, - PEnd, -} PState; - -enum { - TXXX = NPubOp, - TCall, - TPhi, - TJmp, - TJnz, - TRet, - TFunc, - TType, - TData, - TAlign, - TL, - TW, - TH, - TB, - TD, - TS, - TZ, - - TInt, - TFlts, - TFltd, - TTmp, - TLbl, - TGlo, - TTyp, - TStr, - - TPlus, - TEq, - TComma, - TLParen, - TRParen, - TLBrace, - TRBrace, - TNL, - TEOF, -}; - - -static FILE *inf; -static char *inpath; -static int thead; -static struct { - char chr; - double fltd; - float flts; - int64_t num; - char *str; -} tokval; -static int lnum; - -static Tmp *tmp; -static Con *con; -static int ntmp; -static int ncon; -static Phi **plink; -static Blk **bmap; -static Blk *curb; -static Blk **blink; -static int nblk; -static int rcls; -static int ntyp; - - -void -err(char *s, ...) -{ - char buf[100], *p, *end; - va_list ap; - - p = buf; - end = buf + sizeof(buf); - - va_start(ap, s); - p += snprintf(p, end - p, "%s:%d: ", inpath, lnum); - p += vsnprintf(p, end - p, s, ap); - va_end(ap); - - diag(buf); -} - -static int -lex() -{ - static struct { - char *str; - int tok; - } tmap[] = { - { "call", TCall }, - { "phi", TPhi }, - { "jmp", TJmp }, - { "jnz", TJnz }, - { "ret", TRet }, - { "function", TFunc }, - { "type", TType }, - { "data", TData }, - { "align", TAlign }, - { "l", TL }, - { "w", TW }, - { "h", TH }, - { "b", TB }, - { "d", TD }, - { "s", TS }, - { "z", TZ }, - { "loadw", OLoad }, /* for convenience */ - { "loadl", OLoad }, - { "loads", OLoad }, - { "loadd", OLoad }, - { "alloc1", OAlloc }, - { "alloc2", OAlloc }, - { 0, TXXX } - }; - static char tok[NString]; - int c, i; - int t; - - do - c = fgetc(inf); - while (isblank(c)); - t = TXXX; - tokval.chr = c; - switch (c) { - case EOF: - return TEOF; - case ',': - return TComma; - case '(': - return TLParen; - case ')': - return TRParen; - case '{': - return TLBrace; - case '}': - return TRBrace; - case '=': - return TEq; - case '+': - return TPlus; - case 's': - if (fscanf(inf, "_%f", &tokval.flts) != 1) - break; - return TFlts; - case 'd': - if (fscanf(inf, "_%lf", &tokval.fltd) != 1) - break; - return TFltd; - case '%': - t = TTmp; - goto Alpha; - case '@': - t = TLbl; - goto Alpha; - case '$': - t = TGlo; - goto Alpha; - case ':': - t = TTyp; - goto Alpha; - case '#': - while (fgetc(inf) != '\n') - ; - case '\n': - lnum++; - return TNL; - } - if (isdigit(c) || c == '-' || c == '+') { - ungetc(c, inf); - if (fscanf(inf, "%"SCNd64, &tokval.num) != 1) - err("invalid integer literal"); - return TInt; - } - if (c == '"') { - tokval.str = vnew(0, 1); - for (i=0;; i++) { - c = fgetc(inf); - vgrow(&tokval.str, i+1); - if (c == '"') - if (!i || tokval.str[i-1] != '\\') { - tokval.str[i] = 0; - return TStr; - } - tokval.str[i] = c; - } - } - if (0) -Alpha: c = fgetc(inf); - if (!isalpha(c) && c != '.' && c != '_') - err("lexing failure: invalid character %c (%d)", c, c); - i = 0; - do { - if (i >= NString-1) - err("identifier too long"); - tok[i++] = c; - c = fgetc(inf); - } while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c)); - tok[i] = 0; - ungetc(c, inf); - tokval.str = tok; - if (t != TXXX) { - return t; - } - for (i=0; i<NPubOp; i++) - if (opdesc[i].name) - if (strcmp(tok, opdesc[i].name) == 0) - return i; - for (i=0; tmap[i].str; i++) - if (strcmp(tok, tmap[i].str) == 0) - return tmap[i].tok; - err("unknown keyword %s", tokval.str); - return TXXX; -} - -static int -peek() -{ - if (thead == TXXX) - thead = lex(); - return thead; -} - -static int -next() -{ - int t; - - t = peek(); - thead = TXXX; - return t; -} - -static int -nextnl() -{ - int t; - - while ((t = next()) == TNL) - ; - return t; -} - -static void -expect(int t) -{ - static char *ttoa[] = { - [TLbl] = "label", - [TComma] = ",", - [TEq] = "=", - [TNL] = "newline", - [TLParen] = "(", - [TRParen] = ")", - [TLBrace] = "{", - [TRBrace] = "}", - [TEOF] = 0, - }; - char buf[128], *s1, *s2; - int t1; - - t1 = next(); - if (t == t1) - return; - s1 = ttoa[t] ? ttoa[t] : "??"; - s2 = ttoa[t1] ? ttoa[t1] : "??"; - sprintf(buf, "%s expected, got %s instead", s1, s2); - err(buf); -} - -static Ref -tmpref(char *v) -{ - int t; - - for (t=Tmp0; t<ntmp; t++) - if (strcmp(v, tmp[t].name) == 0) - return TMP(t); - vgrow(&tmp, ++ntmp); - strcpy(tmp[t].name, v); - return TMP(t); -} - -static Ref -parseref() -{ - Con c; - int i; - - memset(&c, 0, sizeof c); - switch (next()) { - case TTmp: - return tmpref(tokval.str); - case TInt: - c.type = CBits; - c.bits.i = tokval.num; - goto Look; - case TFlts: - c.type = CBits; - c.bits.s = tokval.flts; - c.flt = 1; - goto Look; - case TFltd: - c.type = CBits; - c.bits.d = tokval.fltd; - c.flt = 2; - goto Look; - case TGlo: - c.type = CAddr; - strcpy(c.label, tokval.str); - Look: - for (i=0; i<ncon; i++) - if (con[i].type == c.type - && con[i].bits.i == c.bits.i - && strcmp(con[i].label, c.label) == 0) - return CON(i); - vgrow(&con, ++ncon); - con[i] = c; - return CON(i); - default: - return R; - } -} - -static int -parsecls(int *tyn) -{ - int i; - - switch (next()) { - default: - err("invalid class specifier"); - case TTyp: - for (i=0; i<ntyp; i++) - if (strcmp(tokval.str, typ[i].name) == 0) { - *tyn = i; - return 4; - } - err("undefined type"); - case TW: - return Kw; - case TL: - return Kl; - case TS: - return Ks; - case TD: - return Kd; - } -} - -static void -parserefl(int arg) -{ - int k, t, ty; - Ref r; - - expect(TLParen); - if (peek() == TRParen) { - next(); - return; - } - for (;;) { - if (curi - insb >= NIns) - err("too many instructions (1)"); - k = parsecls(&ty); - r = parseref(); - if (req(r, R)) - err("invalid reference argument"); - if (!arg && rtype(r) != RTmp) - err("invalid function parameter"); - if (k == 4) - if (arg) - *curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl}; - else - *curi = (Ins){OParc, r, {TYPE(ty)}, Kl}; - else - if (arg) - *curi = (Ins){OArg, R, {r}, k}; - else - *curi = (Ins){OPar, r, {R}, k}; - curi++; - t = next(); - if (t == TRParen) - break; - if (t != TComma) - err(", or ) expected"); - } -} - -static Blk * -findblk(char *name) -{ - int i; - - for (i=0; i<nblk; i++) - if (strcmp(bmap[i]->name, name) == 0) - return bmap[i]; - vgrow(&bmap, ++nblk); - bmap[i] = blknew(); - strcpy(bmap[i]->name, name); - return bmap[i]; -} - -static void -closeblk() -{ - curb->nins = curi - insb; - idup(&curb->ins, insb, curb->nins); - blink = &curb->link; - curi = insb; -} - -static PState -parseline(PState ps) -{ - Ref arg[NPred] = {R}; - Blk *blk[NPred]; - Phi *phi; - Ref r; - Blk *b; - int t, op, i, k, ty; - - t = nextnl(); - if (ps == PLbl && t != TLbl && t != TRBrace) - err("label or } expected"); - switch (t) { - default: - if (isstore(t)) { - /* operations without result */ - r = R; - k = 0; - op = t; - goto DoOp; - } - err("label, instruction or jump expected"); - case TRBrace: - return PEnd; - case TTmp: - break; - case TLbl: - b = findblk(tokval.str); - if (b->jmp.type != JXXX) - err("multiple definitions of block"); - if (curb && curb->jmp.type == JXXX) { - closeblk(); - curb->jmp.type = JJmp; - curb->s1 = b; - } - *blink = b; - curb = b; - plink = &curb->phi; - expect(TNL); - return PPhi; - case TRet: - curb->jmp.type = (int[]){ - JRetw, JRetl, - JRets, JRetd, - JRetc, JRet0 - }[rcls]; - if (rcls < 5) { - r = parseref(); - if (req(r, R)) - err("return value expected"); - curb->jmp.arg = r; - } - goto Close; - case TJmp: - curb->jmp.type = JJmp; - goto Jump; - case TJnz: - curb->jmp.type = JJnz; - r = parseref(); - if (req(r, R)) - err("invalid argument for jnz jump"); - curb->jmp.arg = r; - expect(TComma); - Jump: - expect(TLbl); - curb->s1 = findblk(tokval.str); - if (curb->jmp.type != JJmp) { - expect(TComma); - expect(TLbl); - curb->s2 = findblk(tokval.str); - } - Close: - expect(TNL); - closeblk(); - return PLbl; - } - r = tmpref(tokval.str); - expect(TEq); - k = parsecls(&ty); - op = next(); -DoOp: - if (op == TPhi) { - if (ps != PPhi) - err("unexpected phi instruction"); - op = -1; - } - if (op == TCall) { - arg[0] = parseref(); - parserefl(1); - expect(TNL); - op = OCall; - if (k == 4) { - k = Kl; - arg[1] = TYPE(ty); - } else - arg[1] = R; - goto Ins; - } - if (k == 4) - err("size class must be w, l, s, or d"); - if (op >= NPubOp) - err("invalid instruction"); - i = 0; - if (peek() != TNL) - for (;;) { - if (i == NPred) - err("too many arguments"); - if (op == -1) { - expect(TLbl); - blk[i] = findblk(tokval.str); - } - arg[i] = parseref(); - if (req(arg[i], R)) - err("invalid instruction argument"); - i++; - t = peek(); - if (t == TNL) - break; - if (t != TComma) - err(", or end of line expected"); - next(); - } - next(); - if (op != -1) { - Ins: - if (curi - insb >= NIns) - err("too many instructions (2)"); - curi->op = op; - curi->cls = k; - curi->to = r; - curi->arg[0] = arg[0]; - curi->arg[1] = arg[1]; - curi++; - return PIns; - } else { - phi = alloc(sizeof *phi); - phi->to = r; - phi->cls = k; - memcpy(phi->arg, arg, i * sizeof arg[0]); - memcpy(phi->blk, blk, i * sizeof blk[0]); - phi->narg = i; - *plink = phi; - plink = &phi->link; - return PPhi; - } -} - -static Fn * -parsefn() -{ - PState ps; - Fn *fn; - - ntmp = Tmp0; - ncon = 1; /* first constant must be 0 */ - curb = 0; - nblk = 0; - curi = insb; - tmp = vnew(ntmp, sizeof tmp[0]); - con = vnew(ncon, sizeof con[0]); - bmap = vnew(nblk, sizeof bmap[0]); - con[0].type = CBits; - fn = alloc(sizeof *fn); - blink = &fn->start; - fn->retty = -1; - if (peek() != TGlo) - rcls = parsecls(&fn->retty); - else - rcls = 5; - if (next() != TGlo) - err("function name expected"); - strcpy(fn->name, tokval.str); - parserefl(0); - if (nextnl() != TLBrace) - err("function body must start with {"); - ps = PLbl; - do - ps = parseline(ps); - while (ps != PEnd); - if (!curb) - err("empty file"); - if (curb->jmp.type == JXXX) - err("last block misses jump"); - fn->tmp = tmp; - fn->con = con; - fn->mem = vnew(0, sizeof fn->mem[0]); - fn->ntmp = ntmp; - fn->ncon = ncon; - fn->nmem = 0; - fn->nblk = nblk; - fn->rpo = 0; - return fn; -} - -static void -parsetyp() -{ - Typ *ty; - int t, n, sz, al, s, a, c, flt; - - if (ntyp >= NTyp) - err("too many type definitions"); - ty = &typ[ntyp++]; - ty->align = -1; - if (nextnl() != TTyp || nextnl() != TEq) - err("type name, then = expected"); - strcpy(ty->name, tokval.str); - t = nextnl(); - if (t == TAlign) { - if (nextnl() != TInt) - err("alignment expected"); - for (al=0; tokval.num /= 2; al++) - ; - ty->align = al; - t = nextnl(); - } - if (t != TLBrace) - err("type body must start with {"); - t = nextnl(); - if (t == TInt) { - ty->dark = 1; - ty->size = tokval.num; - if (ty->align == -1) - err("dark types need alignment"); - t = nextnl(); - } else { - ty->dark = 0; - n = -1; - sz = 0; - al = 0; - for (;;) { - flt = 0; - switch (t) { - default: err("invalid size specifier %c", tokval.chr); - case TD: flt = 1; - case TL: s = 8; a = 3; break; - case TS: flt = 1; - case TW: s = 4; a = 2; break; - case TH: s = 2; a = 1; break; - case TB: s = 1; a = 0; break; - } - if (a > al) - al = a; - if ((a = sz & (s-1))) { - a = s - a; - if (++n < NSeg) { - /* padding segment */ - ty->seg[n].ispad = 1; - ty->seg[n].len = a; - } - } - t = nextnl(); - if (t == TInt) { - c = tokval.num; - t = nextnl(); - } else - c = 1; - while (c-- > 0) { - if (++n < NSeg) { - ty->seg[n].isflt = flt; - ty->seg[n].ispad = 0; - ty->seg[n].len = s; - } - sz += a + s; - } - if (t != TComma) - break; - t = nextnl(); - } - if (++n >= NSeg) - ty->dark = 1; - else - ty->seg[n].len = 0; - if (ty->align == -1) - ty->align = al; - else - al = ty->align; - a = (1 << al) - 1; - ty->size = (sz + a) & ~a; - } - if (t != TRBrace) - err("expected closing }"); -} - -static void -parsedatref(Dat *d) -{ - int t; - - d->isref = 1; - d->u.ref.nam = tokval.str; - d->u.ref.off = 0; - t = peek(); - if (t == TPlus) { - next(); - if (next() != TInt) - err("invalid token after offset in ref"); - d->u.ref.off = tokval.num; - } -} - -static void -parsedatstr(Dat *d) -{ - d->isstr = 1; - d->u.str = tokval.str; -} - -static void -parsedat(void cb(Dat *)) -{ - char s[NString]; - int t; - Dat d; - - d.type = DStart; - d.isstr = 0; - d.isref = 0; - cb(&d); - if (nextnl() != TGlo || nextnl() != TEq) - err("data name, then = expected"); - strcpy(s, tokval.str); - t = nextnl(); - if (t == TAlign) { - if (nextnl() != TInt) - err("alignment expected"); - d.type = DAlign; - d.u.num = tokval.num; - cb(&d); - t = nextnl(); - } - d.type = DName; - d.u.str = s; - cb(&d); - - if (t != TLBrace) - err("expected data contents in { .. }"); - for (;;) { - switch (nextnl()) { - default: err("invalid size specifier %c in data", tokval.chr); - case TRBrace: goto Done; - case TL: d.type = DL; break; - case TW: d.type = DW; break; - case TH: d.type = DH; break; - case TB: d.type = DB; break; - case TS: d.type = DW; break; - case TD: d.type = DL; break; - case TZ: d.type = DZ; break; - } - t = nextnl(); - do { - d.isref = 0; - d.isstr = 0; - memset(&d.u, 0, sizeof d.u); - if (t == TFlts) - d.u.flts = tokval.flts; - else if (t == TFltd) - d.u.fltd = tokval.fltd; - else if (t == TInt) - d.u.num = tokval.num; - else if (t == TGlo) - parsedatref(&d); - else if (t == TStr) - parsedatstr(&d); - else - err("constant literal expected"); - cb(&d); - t = nextnl(); - } while (t == TInt || t == TFlts || t == TFltd); - if (t == TRBrace) - break; - if (t != TComma) - err(", or } expected"); - } -Done: - d.type = DEnd; - cb(&d); -} - -void -parse(FILE *f, char *path, void data(Dat *), void func(Fn *)) -{ - inf = f; - inpath = path; - lnum = 1; - thead = TXXX; - ntyp = 0; - for (;;) - switch (nextnl()) { - case TFunc: - func(parsefn()); - break; - case TType: - parsetyp(); - break; - case TData: - parsedat(data); - break; - case TEOF: - return; - default: - err("top-level definition expected"); - break; - } -} - -static void -printcon(Con *c, FILE *f) -{ - switch (c->type) { - case CUndef: - break; - case CAddr: - fprintf(f, "$%s", c->label); - if (c->bits.i) - fprintf(f, "%+"PRIi64, c->bits.i); - break; - case CBits: - if (c->flt == 1) - fprintf(f, "s_%f", c->bits.s); - else if (c->flt == 2) - fprintf(f, "d_%lf", c->bits.d); - else - fprintf(f, "%"PRIi64, c->bits.i); - break; - } -} - -void -printref(Ref r, Fn *fn, FILE *f) -{ - int i; - Mem *m; - - switch (rtype(r)) { - case RTmp: - if (r.val < Tmp0) - fprintf(f, "R%d", r.val); - else - fprintf(f, "%%%s", fn->tmp[r.val].name); - break; - case RCon: - printcon(&fn->con[r.val], f); - break; - case RSlot: - fprintf(f, "S%d", r.val); - break; - case RACall: - fprintf(f, "%03x", r.val & AMask); - break; - case RAType: - fprintf(f, ":%s", typ[r.val & AMask].name); - break; - case RAMem: - i = 0; - m = &fn->mem[r.val & AMask]; - fputc('[', f); - if (m->offset.type != CUndef) { - printcon(&m->offset, f); - i = 1; - } - if (!req(m->base, R)) { - if (i) - fprintf(f, " + "); - printref(m->base, fn, f); - i = 1; - } - if (!req(m->index, R)) { - if (i) - fprintf(f, " + "); - fprintf(f, "%d * ", m->scale); - printref(m->index, fn, f); - } - fputc(']', f); - break; - } -} - -void -printfn(Fn *fn, FILE *f) -{ - static char *jtoa[NJmp] = { - [JRet0] = "ret", - [JRetw] = "retw", - [JRetl] = "retl", - [JRetc] = "retc", - [JRets] = "rets", - [JRetd] = "retd", - [JJnz] = "jnz", - [JXJnp] = "xjnp", - [JXJp] = "xjp", - #define X(c) [JXJc+IC##c] = "xj" #c, - ICMPS(X) - #undef X - }; - static char prcls[NOp] = { - [OArg] = 1, - [OSwap] = 1, - [OXCmp] = 1, - [OXTest] = 1, - [OXDiv] = 1, - [OXIDiv] = 1, - }; - static char ktoc[] = "wlsd"; - Blk *b; - Phi *p; - Ins *i; - uint n; - - fprintf(f, "function $%s() {\n", fn->name); - for (b=fn->start; b; b=b->link) { - fprintf(f, "@%s\n", b->name); - for (p=b->phi; p; p=p->link) { - fprintf(f, "\t"); - printref(p->to, fn, f); - fprintf(f, " =%c phi ", ktoc[p->cls]); - assert(p->narg); - for (n=0;; n++) { - fprintf(f, "@%s ", p->blk[n]->name); - printref(p->arg[n], fn, f); - if (n == p->narg-1) { - fprintf(f, "\n"); - break; - } else - fprintf(f, ", "); - } - } - for (i=b->ins; i-b->ins < b->nins; i++) { - fprintf(f, "\t"); - if (!req(i->to, R)) { - printref(i->to, fn, f); - fprintf(f, " =%c ", ktoc[i->cls]); - } - assert(opdesc[i->op].name); - fprintf(f, "%s", opdesc[i->op].name); - if (req(i->to, R) && prcls[i->op]) - fputc(ktoc[i->cls], f); - if (!req(i->arg[0], R)) { - fprintf(f, " "); - printref(i->arg[0], fn, f); - } - if (!req(i->arg[1], R)) { - fprintf(f, ", "); - printref(i->arg[1], fn, f); - } - fprintf(f, "\n"); - } - switch (b->jmp.type) { - case JRet0: - case JRetw: - case JRetl: - case JRets: - case JRetd: - case JRetc: - fprintf(f, "\t%s", jtoa[b->jmp.type]); - if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) { - fprintf(f, " "); - printref(b->jmp.arg, fn, f); - } - if (b->jmp.type == JRetc) - fprintf(f, ", :%s", typ[fn->retty].name); - fprintf(f, "\n"); - break; - case JJmp: - if (b->s1 != b->link) - fprintf(f, "\tjmp @%s\n", b->s1->name); - break; - default: - fprintf(f, "\t%s ", jtoa[b->jmp.type]); - if (b->jmp.type == JJnz) { - printref(b->jmp.arg, fn, f); - fprintf(f, ", "); - } - fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name); - break; - } - } - fprintf(f, "}\n"); -} diff --git a/lisc/rega.c b/lisc/rega.c @@ -1,597 +0,0 @@ -#include "lisc.h" -#ifdef TEST_PMOV - #undef assert - #define assert(x) assert_test(#x, x) -#endif - -typedef struct RMap RMap; - -struct RMap { - int t[NIReg+NFReg]; - int r[NIReg+NFReg]; - BSet b[1]; - int n; -}; - -static bits regu; /* registers used */ -static Tmp *tmp; /* function temporaries */ -static Mem *mem; /* function mem references */ -static struct { - Ref src, dst; - int cls; -} *pm; /* parallel move constructed */ -static int cpm, npm; /* capacity and size of pm */ - -static int * -hint(int t) -{ - return &tmp[phicls(t, tmp)].hint.r; -} - -static void -sethint(int t, int r) -{ - bits m; - - m = tmp[phicls(t, tmp)].hint.m; - if (*hint(t) == -1) - if (!(BIT(r) & m)) - *hint(t) = r; -} - -static void -rcopy(RMap *ma, RMap *mb) -{ - memcpy(ma->t, mb->t, sizeof ma->t); - memcpy(ma->r, mb->r, sizeof ma->r); - bscopy(ma->b, mb->b); - ma->n = mb->n; -} - -static int -rfind(RMap *m, int t) -{ - int i; - - for (i=0; i<m->n; i++) - if (m->t[i] == t) - return m->r[i]; - return -1; -} - -static Ref -rref(RMap *m, int t) -{ - int r, s; - - r = rfind(m, t); - if (r == -1) { - s = tmp[t].slot; - assert(s != -1 && "should have spilled"); - return SLOT(s); - } else - return TMP(r); -} - -static void -radd(RMap *m, int t, int r) -{ - assert((t >= Tmp0 || t == r) && "invalid temporary"); - assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register"); - assert(!bshas(m->b, t) && "temporary has mapping"); - assert(!bshas(m->b, r) && "register already allocated"); - assert(m->n <= NIReg+NFReg && "too many mappings"); - bsset(m->b, t); - bsset(m->b, r); - m->t[m->n] = t; - m->r[m->n] = r; - m->n++; - regu |= BIT(r); -} - -static Ref -ralloc(RMap *m, int t) -{ - bits regs; - int r, r0, r1; - - if (t < Tmp0) { - assert(bshas(m->b, t)); - return TMP(t); - } - if (bshas(m->b, t)) { - r = rfind(m, t); - assert(r != -1); - return TMP(r); - } - r = *hint(t); - if (r == -1 || bshas(m->b, r)) { - regs = tmp[phicls(t, tmp)].hint.m; - regs |= m->b->t[0]; - switch (KBASE(tmp[t].cls)) { - case 0: - r0 = RAX; - r1 = RAX + NIReg; - break; - case 1: - r0 = XMM0; - r1 = XMM0 + NFReg; - break; - } - for (r=r0; r<r1; r++) - if (!(regs & BIT(r))) - goto Found; - for (r=r0; r<r1; r++) - if (!bshas(m->b, r)) - goto Found; - diag("rega: no more regs"); - } -Found: - radd(m, t, r); - sethint(t, r); - return TMP(r); -} - -static int -rfree(RMap *m, int t) -{ - int i, r; - - if (!bshas(m->b, t)) - return -1; - for (i=0; m->t[i] != t; i++) - assert(i+1 < m->n); - r = m->r[i]; - bsclr(m->b, t); - bsclr(m->b, r); - m->n--; - memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); - memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); - return r; -} - -static void -mdump(RMap *m) -{ - int i; - - for (i=0; i<m->n; i++) - fprintf(stderr, " (%s, R%d)", - tmp[m->t[i]].name, - m->r[i]); - fprintf(stderr, "\n"); -} - -static void -pmadd(Ref src, Ref dst, int k) -{ - if (npm == cpm) { - cpm = cpm * 2 + 16; - pm = realloc(pm, cpm * sizeof pm[0]); - if (!pm) - diag("pmadd: out of memory"); - } - pm[npm].src = src; - pm[npm].dst = dst; - pm[npm].cls = k; - npm++; -} - -enum PMStat { ToMove, Moving, Moved }; - -static Ref -pmrec(enum PMStat *status, int i, int *k) -{ - Ref swp, swp1; - int j, k1; - - /* note, this routine might emit - * too many large instructions: - * - * , x -- x - * x -- x -- x | - * ` x -- x - * - * if only the first move is wide - * the whole cycle will be wide, - * this is safe but not necessary - */ - - if (req(pm[i].src, pm[i].dst)) - return R; - status[i] = Moving; - assert(KBASE(*k) == KBASE(pm[i].cls)); - assert((Kw|1) == Kl && (Ks|1) == Kd); - *k |= KWIDE(pm[i].cls); /* see above */ - swp = R; - for (j=0; j<npm; j++) { - if (req(pm[j].src, pm[i].dst)) - switch (status[j]) { - case ToMove: - k1 = *k; - swp1 = pmrec(status, j, &k1); - if (!req(swp1, R)) { - assert(req(swp, R)); - swp = swp1; - *k = k1; - } - break; - case Moving: - assert(req(swp, R)); - swp = pm[i].dst; - break; - case Moved: - break; - } - } - status[i] = Moved; - if (req(swp, R)) { - *curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls}; - return R; - } else if (!req(swp, pm[i].src)) { - *curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k}; - return swp; - } else - return R; - -} - -static void -pmgen() -{ - int i, k; - enum PMStat *status; - - status = alloc(npm * sizeof status[0]); - assert(!npm || status[npm-1] == ToMove); - curi = insb; - for (i=0; i<npm; i++) - if (status[i] == ToMove) { - k = pm[i].cls; - pmrec(status, i, &k); - } -} - -static void -move(int r, Ref to, RMap *m) -{ - int n, t, r1; - - r1 = req(to, R) ? -1 : rfree(m, to.val); - if (bshas(m->b, r) && r1 != r) { - /* r is used and not by to */ - for (n=0; m->r[n] != r; n++) - assert(n+1 < m->n); - t = m->t[n]; - rfree(m, t); - bsset(m->b, r); - ralloc(m, t); - bsclr(m->b, r); - } - t = req(to, R) ? r : to.val; - radd(m, t, r); -} - -static int -regcpy(Ins *i) -{ - return i->op == OCopy && isreg(i->arg[0]); -} - -static Ins * -dopm(Blk *b, Ins *i, RMap *m) -{ - RMap m0; - int n, r, r1, t, s; - Ins *i0, *i1, *ip, *ir; - bits def; - - m0 = *m; - i1 = ++i; - do { - i--; - move(i->arg[0].val, i->to, m); - } while (i != b->ins && regcpy(i-1)); - assert(m0.n <= m->n); - if (i != b->ins && (i-1)->op == OCall) { - def = retregs((i-1)->arg[1], 0); - for (r=0; r<NRSave; r++) - if (!(BIT(rsave[r]) & def)) - move(rsave[r], R, m); - } - for (npm=0, n=0; n<m->n; n++) { - t = m->t[n]; - s = tmp[t].slot; - r1 = m->r[n]; - r = rfind(&m0, t); - if (r != -1) - pmadd(TMP(r1), TMP(r), tmp[t].cls); - else if (s != -1) - pmadd(TMP(r1), SLOT(s), tmp[t].cls); - } - for (ip=i; ip<i1; ip++) { - if (!req(ip->to, R)) - rfree(m, ip->to.val); - r = ip->arg[0].val; - if (rfind(m, r) == -1) - radd(m, r, r); - } - pmgen(); -#ifdef TEST_PMOV - return 0; -#endif - n = b->nins - (i1 - i) + (curi - insb); - i0 = alloc(n * sizeof(Ins)); - ip = icpy(ip = i0, b->ins, i - b->ins); - ip = icpy(ir = ip, insb, curi - insb); - ip = icpy(ip, i1, &b->ins[b->nins] - i1); - b->nins = n; - b->ins = i0; - return ir; -} - -static int -prio(Ref r1, Ref r2) -{ - /* trivial heuristic to begin with, - * later we can use the distance to - * the definition instruction - */ - (void) r2; - return *hint(r1.val) != -1; -} - -static void -insert(Ref *r, Ref **rs, int p) -{ - int i; - - rs[i = p] = r; - while (i-- > 0 && prio(*r, *rs[i])) { - rs[i+1] = rs[i]; - rs[i] = r; - } -} - -static void -doblk(Blk *b, RMap *cur) -{ - int x, r, nr; - bits rs; - Ins *i; - Mem *m; - Ref *ra[4]; - - if (rtype(b->jmp.arg) == RTmp) - b->jmp.arg = ralloc(cur, b->jmp.arg.val); - else if (rtype(b->jmp.arg) == RACall) { - /* add return registers */ - rs = retregs(b->jmp.arg, 0); - for (r=0; rs; rs/=2, r++) - if (rs & 1) - radd(cur, r, r); - } - for (i=&b->ins[b->nins]; i!=b->ins;) { - switch ((--i)->op) { - case OCall: - rs = argregs(i->arg[1], 0); - for (r=0; r<NRSave; r++) - if (!(BIT(rsave[r]) & rs)) - rfree(cur, rsave[r]); - break; - case OCopy: - if (isreg(i->arg[0])) { - i = dopm(b, i, cur); - continue; - } - if (isreg(i->to)) - if (rtype(i->arg[0]) == RTmp) - sethint(i->arg[0].val, i->to.val); - /* fall through */ - default: - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - r = rfree(cur, i->to.val); - if (r == -1 && !isreg(i->to)) { - *i = (Ins){.op = ONop}; - continue; - } - if (i->to.val >= Tmp0) - i->to = TMP(r); - } - break; - } - for (x=0, nr=0; x<2; x++) - switch (rtype(i->arg[x])) { - case RAMem: - m = &mem[i->arg[x].val & AMask]; - if (rtype(m->base) == RTmp) - insert(&m->base, ra, nr++); - if (rtype(m->index) == RTmp) - insert(&m->index, ra, nr++); - break; - case RTmp: - insert(&i->arg[x], ra, nr++); - break; - } - for (r=0; r<nr; r++) - *ra[r] = ralloc(cur, ra[r]->val); - } -} - -/* register allocation - * depends on rpo, phi, cost, (and obviously spill) - */ -void -rega(Fn *fn) -{ - int j, n, t, r, r1, x, rl[Tmp0]; - Blk *b, *b1, *s, ***ps, *blist; - RMap *end, *beg, cur, old; - Ins *i; - Phi *p; - uint u; - Ref src, dst; - - /* 1. setup */ - regu = 0; - tmp = fn->tmp; - mem = fn->mem; - end = alloc(fn->nblk * sizeof end[0]); - beg = alloc(fn->nblk * sizeof beg[0]); - for (n=0; n<fn->nblk; n++) { - bsinit(end[n].b, fn->ntmp); - bsinit(beg[n].b, fn->ntmp); - } - bsinit(cur.b, fn->ntmp); - bsinit(old.b, fn->ntmp); - - for (t=Tmp0; t<fn->ntmp; t++) - *hint(t) = -1; - for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) - if (i->op != OCopy || !isreg(i->arg[0])) - break; - else { - assert(rtype(i->to) == RTmp); - sethint(i->to.val, i->arg[0].val); - } - - /* 2. assign registers following post-order */ - for (n=fn->nblk-1; n>=0; n--) { - b = fn->rpo[n]; - cur.n = 0; - bszero(cur.b); - for (x=0; x<2; x++) - for (t=Tmp0; t<fn->ntmp; t++) { - assert(bshas(b->out, t) || - !bshas(cur.b, t)); - if (bshas(b->out, t)) - if (!bshas(cur.b, t)) - if (x || (r=*hint(t)) != -1) - if (x || !bshas(cur.b, r)) - ralloc(&cur, t); - } - rcopy(&end[n], &cur); - doblk(b, &cur); - bscopy(b->in, cur.b); - for (p=b->phi; p; p=p->link) - if (rtype(p->to) == RTmp) { - bsclr(b->in, p->to.val); - /* heuristic 0: - * if the phi destination has an - * argument from a frequent block - * that was already allocated to - * 'r', use 'r' as the new hint - */ - memset(rl, 0, sizeof rl); - for (u=0; u<p->narg; u++) { - t = p->arg[u].val; - b1 = p->blk[u]; - if (rtype(p->arg[u]) == RTmp) - if ((r=rfind(&end[b1->id], t)) != -1) - rl[r] += b1->loop; - } - for (x=0, j=0; j<Tmp0; j++) - if (rl[j] > rl[x]) - x = j; - if (rl[x] >= b->loop) - *hint(p->to.val) = x; - } - if (b->npred > 1) { - /* heuristic 1: - * attempt to satisfy hints - * when it's simple and we have - * multiple predecessors - */ - rcopy(&old, &cur); - curi = &insb[NIns]; - for (j=0; j<old.n; j++) { - t = old.t[j]; - r = *hint(t); - r1 = rfind(&cur, t); - if (r != -1 && r != r1) - if (!bshas(cur.b, r)) { - rfree(&cur, t); - radd(&cur, t, r); - x = tmp[t].cls; - emit(OCopy, x, TMP(r1), TMP(r), R); - } - } - if ((j = &insb[NIns] - curi)) { - b->nins += j; - i = alloc(b->nins * sizeof(Ins)); - icpy(icpy(i, curi, j), b->ins, b->nins-j); - b->ins = i; - } - } - rcopy(&beg[n], &cur); - } - if (debug['R']) { - fprintf(stderr, "\n> Register mappings:\n"); - for (n=0; n<fn->nblk; n++) { - b = fn->rpo[n]; - fprintf(stderr, "\t%-10s beg", b->name); - mdump(&beg[n]); - fprintf(stderr, "\t end"); - mdump(&end[n]); - } - fprintf(stderr, "\n"); - } - - /* 3. compose glue code */ - blist = 0; - for (b=fn->start;; b=b->link) { - ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; - for (; (s=**ps); ps++) { - npm = 0; - for (p=s->phi; p; p=p->link) { - dst = p->to; - assert(rtype(dst)==RSlot || rtype(dst)==RTmp); - if (rtype(dst) == RTmp) { - r = rfind(&beg[s->id], dst.val); - if (r == -1) - continue; - dst = TMP(r); - } - for (u=0; p->blk[u]!=b; u++) - assert(u+1 < p->narg); - src = p->arg[u]; - if (rtype(src) == RTmp) - src = rref(&end[b->id], src.val); - pmadd(src, dst, p->cls); - } - for (t=Tmp0; t<fn->ntmp; t++) - if (bshas(s->in, t)) { - src = rref(&end[b->id], t); - dst = rref(&beg[s->id], t); - pmadd(src, dst, tmp[t].cls); - } - pmgen(); - if (curi == insb) - continue; - b1 = blknew(); - b1->loop = (b->loop+s->loop) / 2; - b1->link = blist; - blist = b1; - fn->nblk++; - sprintf(b1->name, "%s_%s", b->name, s->name); - b1->nins = curi - insb; - idup(&b1->ins, insb, b1->nins); - b1->jmp.type = JJmp; - b1->s1 = s; - **ps = b1; - } - if (!b->link) { - b->link = blist; - break; - } - } - for (b=fn->start; b; b=b->link) - b->phi = 0; - fn->reg = regu; - - if (debug['R']) { - fprintf(stderr, "\n> After register allocation:\n"); - printfn(fn, stderr); - } -} diff --git a/lisc/spill.c b/lisc/spill.c @@ -1,507 +0,0 @@ -#include "lisc.h" - -static void -loopmark(Blk *hd, Blk *b, Phi *p) -{ - int k, head; - uint n, a; - - head = hd->id; - if (b->id < head) - return; - for (; p; p=p->link) - for (a=0; a<p->narg; a++) - if (p->blk[a] == b) - if (rtype(p->arg[a]) == RTmp) - bsset(hd->gen, p->arg[a].val); - if (b->visit == head) - return; - b->visit = head; - b->loop *= 10; - /* aggregate looping information at - * loop headers */ - bsunion(hd->gen, b->gen); - for (k=0; k<2; k++) - if (b->nlive[k] > hd->nlive[k]) - hd->nlive[k] = b->nlive[k]; - for (n=0; n<b->npred; n++) - loopmark(hd, b->pred[n], b->phi); -} - -static void -tmpuse(Ref r, int use, int loop, Fn *fn) -{ - Mem *m; - Tmp *t; - - if (rtype(r) == RAMem) { - m = &fn->mem[r.val & AMask]; - tmpuse(m->base, 1, loop, fn); - tmpuse(m->index, 1, loop, fn); - } - else if (rtype(r) == RTmp && r.val >= Tmp0) { - t = &fn->tmp[r.val]; - t->nuse += use; - t->ndef += !use; - t->cost += loop; - } -} - -/* evaluate spill costs of temporaries, - * this also fills usage information - * requires rpo, preds - */ -void -fillcost(Fn *fn) -{ - int n, hd; - uint a; - Blk *b; - Ins *i; - Tmp *t; - Phi *p; - - for (b=fn->start; b; b=b->link) { - b->loop = 1; - b->visit = -1; - } - if (debug['S']) - fprintf(stderr, "\n> Loop information:\n"); - for (n=0; n<fn->nblk; n++) { - b = fn->rpo[n]; - hd = 0; - for (a=0; a<b->npred; a++) - if (b->pred[a]->id >= n) { - loopmark(b, b->pred[a], b->phi); - hd = 1; - } - if (hd && debug['S']) { - fprintf(stderr, "\t%-10s", b->name); - fprintf(stderr, " (% 3d ", b->nlive[0]); - fprintf(stderr, "% 3d) ", b->nlive[1]); - dumpts(b->gen, fn->tmp, stderr); - } - } - for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) { - t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0; - t->nuse = 0; - t->ndef = 0; - } - for (b=fn->start; b; b=b->link) { - for (p=b->phi; p; p=p->link) { - /* todo, the cost computation - * for p->to is not great... */ - tmpuse(p->to, 0, 0, fn); - for (a=0; a<p->narg; a++) { - n = p->blk[a]->loop; - assert(b->npred==p->narg && - "wrong cfg"); - n /= b->npred; - tmpuse(p->arg[a], 1, n, fn); - } - } - n = b->loop; - for (i=b->ins; i-b->ins < b->nins; i++) { - tmpuse(i->to, 0, n, fn); - tmpuse(i->arg[0], 1, n, fn); - tmpuse(i->arg[1], 1, n, fn); - } - tmpuse(b->jmp.arg, 1, n, fn); - } - if (debug['S']) { - fprintf(stderr, "\n> Spill costs:\n"); - for (n=Tmp0; n<fn->ntmp; n++) - fprintf(stderr, "\t%-10s %d\n", - fn->tmp[n].name, - fn->tmp[n].cost); - fprintf(stderr, "\n"); - } -} - -static BSet *fst; /* temps to prioritize in registers (for tcmp1) */ -static Tmp *tmp; /* current temporaries (for tcmpX) */ -static int ntmp; /* current # of temps (for limit) */ -static int locs; /* stack size used by locals */ -static int slot4; /* next slot of 4 bytes */ -static int slot8; /* ditto, 8 bytes */ -static BSet mask[2][1]; /* class masks */ - -static int -tcmp0(const void *pa, const void *pb) -{ - return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost; -} - -static int -tcmp1(const void *pa, const void *pb) -{ - int c; - - c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa); - return c ? c : tcmp0(pa, pb); -} - -static Ref -slot(int t) -{ - int s; - - if (t < Tmp0) - diag("spill: cannot spill register"); - s = tmp[t].slot; - if (s == -1) { - assert(NAlign == 3); - /* nice logic to pack stack slots - * on demand, there can be only - * one hole and slot4 points to it - * - * invariant: slot4 <= slot8 - */ - if (KWIDE(tmp[t].cls)) { - s = slot8; - if (slot4 == slot8) - slot4 += 2; - slot8 += 2; - } else { - s = slot4; - if (slot4 == slot8) { - slot8 += 2; - slot4 += 1; - } else - slot4 = slot8; - } - s += locs; - tmp[t].slot = s; - } - return SLOT(s); -} - -static void -limit(BSet *b, int k, BSet *f) -{ - static int *tarr, maxt; - int i, nt; - uint t; - - nt = bscount(b); - if (nt <= k) - return; - if (nt > maxt) { - free(tarr); - tarr = emalloc(nt * sizeof tarr[0]); - maxt = nt; - } - for (i=0, t=0; bsiter(b, &t); t++) { - bsclr(b, t); - tarr[i++] = t; - } - if (!f) - qsort(tarr, nt, sizeof tarr[0], tcmp0); - else { - fst = f; - qsort(tarr, nt, sizeof tarr[0], tcmp1); - } - for (i=0; i<k && i<nt; i++) - bsset(b, tarr[i]); - for (; i<nt; i++) - slot(tarr[i]); -} - -static void -limit2(BSet *b1, int k1, int k2, BSet *fst) -{ - BSet b2[1]; - - bsinit(b2, ntmp); /* todo, free those */ - bscopy(b2, b1); - bsinter(b1, mask[0]); - bsinter(b2, mask[1]); - limit(b1, NIReg - k1, fst); - limit(b2, NFReg - k2, fst); - bsunion(b1, b2); -} - -static void -sethint(BSet *u, bits r) -{ - uint t; - - for (t=Tmp0; bsiter(u, &t); t++) - tmp[phicls(t, tmp)].hint.m |= r; -} - -static void -reloads(BSet *u, BSet *v) -{ - uint t; - - for (t=Tmp0; bsiter(u, &t); t++) - if (!bshas(v, t)) - emit(OLoad, tmp[t].cls, TMP(t), slot(t), R); -} - -static void -store(Ref r, int s) -{ - static int kstore[] = { - [Kw] = OStorew, [Kl] = OStorel, - [Ks] = OStores, [Kd] = OStored, - }; - - if (s != -1) - emit(kstore[tmp[r.val].cls], 0, R, r, SLOT(s)); -} - -static int -regcpy(Ins *i) -{ - return i->op == OCopy && isreg(i->arg[0]); -} - -static Ins * -dopm(Blk *b, Ins *i, BSet *v) -{ - int n, t; - BSet u[1]; - Ins *i1; - bits r; - - bsinit(u, ntmp); /* todo, free those */ - /* consecutive copies from - * registers need to be handled - * as one large instruction - * - * fixme: there is an assumption - * that calls are always followed - * by copy instructions here, this - * might not be true if previous - * passes change - */ - i1 = ++i; - do { - i--; - t = i->to.val; - if (!req(i->to, R)) - if (bshas(v, t)) { - bsclr(v, t); - store(i->to, tmp[t].slot); - } - bsset(v, i->arg[0].val); - } while (i != b->ins && regcpy(i-1)); - bscopy(u, v); - if (i != b->ins && (i-1)->op == OCall) { - v->t[0] &= ~retregs((i-1)->arg[1], 0); - limit2(v, NISave, NFSave, 0); - for (r=0, n=0; n<NRSave; n++) - r |= BIT(rsave[n]); - v->t[0] |= argregs((i-1)->arg[1], 0); - } else { - limit2(v, 0, 0, 0); - r = v->t[0]; - } - sethint(v, r); - reloads(u, v); - do - emiti(*--i1); - while (i1 != i); - return i; -} - -/* spill code insertion - * requires spill costs, rpo, liveness - * - * Note: this will replace liveness - * information (in, out) with temporaries - * that must be in registers at block - * borders - * - * Be careful with: - * - OCopy instructions to ensure register - * constraints - */ -void -spill(Fn *fn) -{ - Blk *b, *s1, *s2, *hd, **bp; - int j, n, l, t, k, lvarg[2]; - BSet u[1], v[1], w[1]; - Ins *i; - Phi *p; - Mem *m; - bits r; - - tmp = fn->tmp; - ntmp = fn->ntmp; - bsinit(u, ntmp); - bsinit(v, ntmp); - bsinit(w, ntmp); - bsinit(mask[0], ntmp); - bsinit(mask[1], ntmp); - locs = fn->slot; - slot4 = 0; - slot8 = 0; - for (t=0; t<ntmp; t++) { - k = 0; - if (t >= XMM0 && t < XMM0 + NFReg) - k = 1; - else if (t >= Tmp0) - k = KBASE(tmp[t].cls); - bsset(mask[k], t); - } - - for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) { - b = *--bp; - /* invariant: all bocks with bigger rpo got - * their in,out updated. */ - - /* 1. find temporaries in registers at - * the end of the block (put them in v) */ - curi = 0; - s1 = b->s1; - s2 = b->s2; - hd = 0; - if (s1 && s1->id <= n) - hd = s1; - if (s2 && s2->id <= n) - if (!hd || s2->id >= hd->id) - hd = s2; - r = 0; - bszero(v); - if (hd) { - /* back-edge */ - for (k=0; k<2; k++) { - n = k == 0 ? NIReg : NFReg; - bscopy(u, b->out); - bsinter(u, mask[k]); - bscopy(w, u); - bsinter(u, hd->gen); - bsdiff(w, hd->gen); - if ((int)bscount(u) < n) { /* fixme */ - j = bscount(w); /* live through */ - l = hd->nlive[k]; - limit(w, n - (l - j), 0); - bsunion(u, w); - } else - limit(u, n, 0); - bsunion(v, u); - } - } else if (s1) { - liveon(v, b, s1); - if (s2) { - liveon(u, b, s2); - bscopy(w, u); - bsinter(w, v); - bsunion(v, u); - } - limit2(v, 0, 0, w); - } else if (rtype(b->jmp.arg) == RACall) { - /* return */ - r = retregs(b->jmp.arg, 0); - v->t[0] |= r; - } - bscopy(b->out, v); - - /* 2. process the block instructions */ - curi = &insb[NIns]; - for (i=&b->ins[b->nins]; i!=b->ins;) { - i--; - if (regcpy(i)) { - i = dopm(b, i, v); - continue; - } - bszero(w); - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - t = i->to.val; - if (bshas(v, t)) - bsclr(v, t); - else { - /* make sure we have a reg - * for the result */ - bsset(v, t); - bsset(w, t); - } - } - j = opdesc[i->op].nmem; - for (n=0; n<2; n++) - if (rtype(i->arg[n]) == RAMem) - j--; - for (n=0; n<2; n++) - switch (rtype(i->arg[n])) { - case RAMem: - t = i->arg[n].val; - m = &fn->mem[t & AMask]; - if (rtype(m->base) == RTmp) { - bsset(v, m->base.val); - bsset(w, m->base.val); - } - if (rtype(m->index) == RTmp) { - bsset(v, m->index.val); - bsset(w, m->index.val); - } - break; - case RTmp: - t = i->arg[n].val; - lvarg[n] = bshas(v, t); - bsset(v, t); - if (j-- <= 0) - bsset(w, t); - break; - } - bscopy(u, v); - limit2(v, 0, 0, w); - for (n=0; n<2; n++) - if (rtype(i->arg[n]) == RTmp) { - t = i->arg[n].val; - if (!bshas(v, t)) { - /* do not reload if the - * the temporary was dead - */ - if (!lvarg[n]) - bsclr(u, t); - i->arg[n] = slot(t); - } - } - reloads(u, v); - if (!req(i->to, R)) { - t = i->to.val; - store(i->to, tmp[t].slot); - bsclr(v, t); - } - emiti(*i); - r = v->t[0] & (BIT(Tmp0)-1); - if (r) - sethint(v, r); - } - assert(!r || b==fn->start); - - for (p=b->phi; p; p=p->link) { - assert(rtype(p->to) == RTmp); - t = p->to.val; - if (bshas(v, t)) { - bsclr(v, t); - store(p->to, tmp[t].slot); - } else if (bshas(b->in, t)) - /* only if the phi is live */ - p->to = slot(p->to.val); - } - bscopy(b->in, v); - b->nins = &insb[NIns] - curi; - idup(&b->ins, curi, b->nins); - } - - /* align the locals to a 16 byte boundary */ - assert(NAlign == 3); - slot8 += slot8 & 3; - fn->slot += slot8; - - if (debug['S']) { - fprintf(stderr, "\n> Block information:\n"); - for (b=fn->start; b; b=b->link) { - printf("\t%-10s (% 5d) ", b->name, b->loop); - dumpts(b->out, fn->tmp, stdout); - } - fprintf(stderr, "\n> After spilling:\n"); - printfn(fn, stderr); - } -} diff --git a/lisc/ssa.c b/lisc/ssa.c @@ -1,516 +0,0 @@ -#include "lisc.h" -#include <stdarg.h> - -static void -adduse(Tmp *tmp, int ty, Blk *b, ...) -{ - Use *u; - int n; - va_list ap; - - va_start(ap, b); - n = tmp->nuse; - vgrow(&tmp->use, ++tmp->nuse); - u = &tmp->use[n]; - u->type = ty; - u->bid = b->id; - switch (ty) { - default: - diag("ssa: adduse defaulted"); - case UPhi: - u->u.phi = va_arg(ap, Phi *); - break; - case UIns: - u->u.ins = va_arg(ap, Ins *); - break; - case UJmp: - break; - } - va_end(ap); -} - -/* fill usage, phi, and class information - */ -void -filluse(Fn *fn) -{ - Blk *b; - Phi *p; - Ins *i; - int m, t; - uint a; - Tmp *tmp; - - /* todo, is this the correct file? */ - tmp = fn->tmp; - for (t=0; t<fn->ntmp; t++) { - tmp[t].ndef = 0; - tmp[t].nuse = 0; - tmp[t].phi = 0; - tmp[t].cls = 0; - if (tmp[t].use == 0) - tmp[t].use = vnew(0, sizeof(Use)); - } - for (b=fn->start; b; b=b->link) { - for (p=b->phi; p; p=p->link) { - assert(rtype(p->to) == RTmp); - t = p->to.val; - tmp[t].ndef++; - tmp[t].cls = p->cls; - tmp[t].phi = p->to.val; - for (a=0; a<p->narg; a++) - if (rtype(p->arg[a]) == RTmp) { - t = p->arg[a].val; - adduse(&tmp[t], UPhi, b, p); - if (!tmp[t].phi) - tmp[t].phi = p->to.val; - } - } - for (i=b->ins; i-b->ins < b->nins; i++) { - if (!req(i->to, R)) { - assert(rtype(i->to) == RTmp); - t = i->to.val; - tmp[t].ndef++; - tmp[t].cls = i->cls; - } - for (m=0; m<2; m++) - if (rtype(i->arg[m]) == RTmp) { - t = i->arg[m].val; - adduse(&tmp[t], UIns, b, i); - } - } - if (rtype(b->jmp.arg) == RTmp) - adduse(&tmp[b->jmp.arg.val], UJmp, b); - } -} - -static void -addpred(Blk *bp, Blk *bc) -{ - uint i; - - if (!bc->pred) { - bc->pred = alloc(bc->npred * sizeof bc->pred[0]); - for (i=0; i<bc->npred; i++) - bc->pred[i] = 0; - } - for (i=0; bc->pred[i]; i++) - ; - bc->pred[i] = bp; -} - -/* fill predecessors information in blocks - */ -void -fillpreds(Fn *f) -{ - Blk *b; - - for (b=f->start; b; b=b->link) { - b->npred = 0; - b->pred = 0; - } - for (b=f->start; b; b=b->link) { - if (b->s1) - b->s1->npred++; - if (b->s2) - b->s2->npred++; - } - for (b=f->start; b; b=b->link) { - if (b->s1) - addpred(b, b->s1); - if (b->s2) - addpred(b, b->s2); - } -} - -static int -rporec(Blk *b, int x) -{ - Blk *s1, *s2; - - if (!b || b->id >= 0) - return x; - b->id = 1; - s1 = b->s1; - s2 = b->s2; - if (s1 && s2 && s1->loop > s2->loop) { - s1 = b->s2; - s2 = b->s1; - } - x = rporec(s1, x); - x = rporec(s2, x); - b->id = x; - assert(x >= 0); - return x - 1; -} - -/* fill the rpo information in blocks - */ -void -fillrpo(Fn *f) -{ - int n; - Blk *b, **p; - - for (b=f->start; b; b=b->link) - b->id = -1; - n = 1 + rporec(f->start, f->nblk-1); - f->nblk -= n; - f->rpo = alloc(f->nblk * sizeof f->rpo[0]); - for (p=&f->start; *p;) { - b = *p; - if (b->id == -1) { - *p = b->link; - /* todo, free block */ - } else { - b->id -= n; - f->rpo[b->id] = b; - p=&(*p)->link; - } - } -} - -/* for dominators computation, read - * "A Simple, Fast Dominance Algorithm" - * by K. Cooper, T. Harvey, and K. Kennedy. - */ - -static Blk * -inter(Blk *b1, Blk *b2) -{ - Blk *bt; - - if (b1 == 0) - return b2; - while (b1 != b2) { - if (b1->id < b2->id) { - bt = b1; - b1 = b2; - b2 = bt; - } - while (b1->id > b2->id) { - b1 = b1->idom; - assert(b1); - } - } - return b1; -} - -static void -filldom(Fn *fn) -{ - Blk *b, *d; - int ch, n; - uint p; - - for (b=fn->start; b; b=b->link) { - b->idom = 0; - b->dom = 0; - b->dlink = 0; - } - do { - ch = 0; - for (n=1; n<fn->nblk; n++) { - b = fn->rpo[n]; - d = 0; - for (p=0; p<b->npred; p++) - if (b->pred[p]->idom - || b->pred[p] == fn->start) - d = inter(d, b->pred[p]); - if (d != b->idom) { - ch++; - b->idom = d; - } - } - } while (ch); - for (b=fn->start; b; b=b->link) - if ((d=b->idom)) { - assert(d != b); - b->dlink = d->dom; - d->dom = b; - } -} - -static int -sdom(Blk *b1, Blk *b2) -{ - assert(b1 && b2); - if (b1 == b2) - return 0; - while (b2->id > b1->id) - b2 = b2->idom; - return b1 == b2; -} - -static int -dom(Blk *b1, Blk *b2) -{ - return b1 == b2 || sdom(b1, b2); -} - -static void -addfron(Blk *a, Blk *b) -{ - int n; - - for (n=0; n<a->nfron; n++) - if (a->fron[n] == b) - return; - if (!a->nfron) - a->fron = vnew(++a->nfron, sizeof a->fron[0]); - else - vgrow(&a->fron, ++a->nfron); - a->fron[a->nfron-1] = b; -} - -static void -fillfron(Fn *fn) -{ - Blk *a, *b; - - for (b=fn->start; b; b=b->link) { - if (b->s1) - for (a=b; !sdom(a, b->s1); a=a->idom) - addfron(a, b->s1); - if (b->s2) - for (a=b; !sdom(a, b->s2); a=a->idom) - addfron(a, b->s2); - } -} - -static Ref -refindex(int t, Fn *fn) -{ - return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn); -} - -static void -phiins(Fn *fn) -{ - BSet u[1], defs[1]; - Blk *a, *b, **blist, **be, **bp; - Ins *i; - Phi *p; - Ref r; - int t, n, k, nt; - - bsinit(u, fn->nblk); - bsinit(defs, fn->nblk); - blist = emalloc(fn->nblk * sizeof blist[0]); - be = &blist[fn->nblk]; - nt = fn->ntmp; - for (t=Tmp0; t<nt; t++) { - fn->tmp[t].visit = 0; - if (fn->tmp[t].phi != 0) - continue; - bszero(u); - k = -1; - bp = be; - for (b=fn->start; b; b=b->link) { - b->visit = 0; - r = R; - for (i=b->ins; i-b->ins < b->nins; i++) { - if (!req(r, R)) { - if (req(i->arg[0], TMP(t))) - i->arg[0] = r; - if (req(i->arg[1], TMP(t))) - i->arg[1] = r; - } - if (req(i->to, TMP(t))) { - if (!bshas(b->out, t)) { - if (fn->tmp[t].ndef == 1) - r = TMP(t); - else - r = refindex(t, fn); - i->to = r; - } else { - if (!bshas(u, b->id)) { - bsset(u, b->id); - *--bp = b; - } - if (k == -1) - k = i->cls; - assert(k == i->cls); - } - } - } - if (!req(r, R) && req(b->jmp.arg, TMP(t))) - b->jmp.arg = r; - } - bscopy(defs, u); - while (bp != be) { - fn->tmp[t].visit = t; - b = *bp++; - bsclr(u, b->id); - for (n=0; n<b->nfron; n++) { - a = b->fron[n]; - if (a->visit++ == 0) - if (bshas(a->in, t)) { - p = alloc(sizeof *p); - p->cls = k; - p->to = TMP(t); - p->link = a->phi; - a->phi = p; - if (!bshas(defs, a->id)) - if (!bshas(u, a->id)) { - bsset(u, a->id); - *--bp = a; - } - } - } - } - } - free(blist); -} - -typedef struct Name Name; -struct Name { - Ref r; - Blk *b; - Name *up; -}; - -static Name *namel; - -static Name * -nnew(Ref r, Blk *b, Name *up) -{ - Name *n; - - if (namel) { - n = namel; - namel = n->up; - } else - /* could use alloc, here - * but namel should be reset - */ - n = emalloc(sizeof *n); - n->r = r; - n->b = b; - n->up = up; - return n; -} - -static void -nfree(Name *n) -{ - n->up = namel; - namel = n; -} - -static void -rendef(Ref *r, Blk *b, Name **stk, Fn *fn) -{ - Ref r1; - int t; - - t = r->val; - if (req(*r, R) || !fn->tmp[t].visit) - return; - r1 = refindex(t, fn); - fn->tmp[r1.val].visit = t; - stk[t] = nnew(r1, b, stk[t]); - *r = r1; -} - -static Ref -getstk(int t, Blk *b, Name **stk) -{ - Name *n, *n1; - - n = stk[t]; - while (n && !dom(n->b, b)) { - n1 = n; - n = n->up; - nfree(n1); - } - stk[t] = n; - if (!n) { - /* uh, oh, warn */ - return CON_Z; - } else - return n->r; -} - -static void -renblk(Blk *b, Name **stk, Fn *fn) -{ - Phi *p; - Ins *i; - Blk *s, **ps, *succ[3]; - int t, m; - - for (p=b->phi; p; p=p->link) - rendef(&p->to, b, stk, fn); - for (i=b->ins; i-b->ins < b->nins; i++) { - for (m=0; m<2; m++) { - t = i->arg[m].val; - if (rtype(i->arg[m]) == RTmp) - if (fn->tmp[t].visit) - i->arg[m] = getstk(t, b, stk); - } - rendef(&i->to, b, stk, fn); - } - t = b->jmp.arg.val; - if (rtype(b->jmp.arg) == RTmp) - if (fn->tmp[t].visit) - b->jmp.arg = getstk(t, b, stk); - succ[0] = b->s1; - succ[1] = b->s2; - succ[2] = 0; - for (ps=succ; (s=*ps); ps++) - for (p=s->phi; p; p=p->link) { - t = p->to.val; - if ((t=fn->tmp[t].visit)) { - m = p->narg++; - if (m == NPred) - diag("ssa: too many phi arguments"); - p->arg[m] = getstk(t, b, stk); - p->blk[m] = b; - } - } - for (s=b->dom; s; s=s->dlink) - renblk(s, stk, fn); -} - -/* require ndef */ -void -ssa(Fn *fn) -{ - Name **stk, *n; - int d, nt; - Blk *b, *b1; - - nt = fn->ntmp; - stk = emalloc(nt * sizeof stk[0]); - d = debug['L']; - debug['L'] = 0; - filldom(fn); - if (debug['N']) { - fprintf(stderr, "\n> Dominators:\n"); - for (b1=fn->start; b1; b1=b1->link) { - if (!b1->dom) - continue; - fprintf(stderr, "%10s:", b1->name); - for (b=b1->dom; b; b=b->dlink) - fprintf(stderr, " %s", b->name); - fprintf(stderr, "\n"); - } - } - fillfron(fn); - filllive(fn); - phiins(fn); - renblk(fn->start, stk, fn); - while (nt--) - while ((n=stk[nt])) { - stk[nt] = n->up; - nfree(n); - } - debug['L'] = d; - free(stk); - if (debug['N']) { - fprintf(stderr, "\n> After SSA construction:\n"); - printfn(fn, stderr); - } -} diff --git a/lisc/test/go.sh b/lisc/test/go.sh @@ -1,116 +0,0 @@ -#!/bin/sh - -TMP=/tmp/qbe.zzzz - -DRV=$TMP.c -ASM=$TMP.s -BIN=$TMP.bin -OUT=$TMP.out - -cleanup() { - rm -f $DRV $ASM $BIN $OUT -} - -extract() { - WHAT="$1" - FILE="$2" - - awk " - /^# >>> $WHAT/ { - p = 1 - next - } - /^# <<</ { - if (p) - p = 0 - } - p - " $FILE \ - | sed -e 's/# //' \ - | sed -e 's/#$//' -} - -once() { - T="$1" - - if ! test -f $T - then - echo "invalid test file $T" >&2 - exit 1 - fi - - echo "$T... " - - if ! ./lisc $T -o $ASM - then - echo "[qbe fail]" - return 1 - fi - - extract driver $T > $DRV - extract output $T > $OUT - - if test -s $DRV - then - LNK="$DRV $ASM" - else - LNK="$ASM" - fi - - if ! cc -g -o $BIN $LNK - then - echo "[cc fail]" - return 1 - fi - - if test -s $OUT - then - $BIN a b c | diff - $OUT - RET=$? - REASON="output" - else - $BIN a b c - RET=$? - REASON="returned $RET" - fi - - if test $RET -ne 0 - then - echo "[$REASON fail]" - return 1 - fi - - printf "\033[1A\033[45C[ok]\n" -} - - -#trap cleanup TERM QUIT - -if test -z "$1" -then - echo "usage: test/go.sh {all, SSAFILE}" 2>&1 - exit 1 -fi - -case $1 in - "all") - F=0 - for T in test/[!_]*.ssa - do - once $T - F=`expr $F + $?` - done - if test $F -ge 1 - then - echo - echo "$F test(s) failed!" - else - echo - echo "All is fine!" - fi - ;; - *) - once $1 - exit $? - ;; -esac diff --git a/lisc/tools/abitest.sh b/lisc/tools/abitest.sh @@ -1,104 +0,0 @@ -#!/bin/sh - -OCAMLC=/usr/bin/ocamlc -QBE=`pwd`/lisc - -failure() { - echo "Failure at stage:" $1 >&2 - exit 1 -} - -cleanup() { - rm -fr $TMP -} - -init() { - cp tools/abi.ml $TMP - pushd $TMP > /dev/null - - cat > Makefile << EOM - -.PHONY: test -test: caller.o callee.o - c99 -o \$@ caller.o callee.o -%.o: %.c - c99 -c -o \$@ \$< -%.o: %.ssa - $QBE -o \$*.s \$< - c99 -c -o \$@ \$*.s - -EOM - - if ! $OCAMLC abi.ml -o gentest - then - popd > /dev/null - cleanup - failure "abifuzz compilation" - fi - popd > /dev/null -} - -once() { - if test -z "$3" - then - $TMP/gentest $TMP $1 $2 - else - $TMP/gentest -s $3 $TMP $1 $2 - fi - make -C $TMP test > /dev/null || failure "building" - $TMP/test || failure "runtime" -} - -usage() { - echo "usage: abitest.sh [-callssa] [-callc] [-s SEED] [-n ITERATIONS]" >&2 - exit 1 -} - -N=1 -CALLER=c -CALLEE=ssa - -while test -n "$1" -do - case "$1" in - "-callssa") - ;; - "-callc") - CALLER=ssa - CALLEE=c - ;; - "-s") - test -n "$2" || usage - shift - SEED="$1" - ;; - "-n") - test -n "$2" || usage - shift - N="$1" - ;; - *) - usage - ;; - esac - shift -done - -TMP=`mktemp -d abifuzz.XXXXXX` - -init - -if test -n "$S" -then - once $CALLER $CALLEE $SEED -else - for n in `seq $N` - do - once $CALLER $CALLEE - echo "$n" | grep "00$" - done -fi - -echo "All done." - -cleanup diff --git a/lisc/tools/regress.sh b/lisc/tools/regress.sh @@ -1,17 +0,0 @@ -#!/bin/sh - -for t in test/* -do - printf "Test $t ... " - - ./lisc $t >/tmp/out.0 2>&1 - ./lisc.1 $t >/tmp/out.1 2>&1 - - if diff /tmp/out.0 /tmp/out.1 > /dev/null - then - echo "OK" - else - echo "KO" - break - fi -done diff --git a/lisc/util.c b/lisc/util.c @@ -1,329 +0,0 @@ -#include "lisc.h" - -typedef struct Bitset Bitset; -typedef struct Vec Vec; - -struct Vec { - ulong mag; - size_t esz; - ulong cap; - union { - long long ll; - long double ld; - void *ptr; - } align[]; -}; - -enum { - VMin = 2, - VMag = 0xcabba9e, - NPtr = 256, -}; - -Typ typ[NTyp]; -Ins insb[NIns], *curi; - -static void *ptr[NPtr]; -static void **pool = ptr; -static int nptr = 1; - -void -diag(char *s) -{ - fputs(s, stderr); - fputc('\n', stderr); - abort(); -} - -void * -emalloc(size_t n) -{ - void *p; - - p = calloc(1, n); - if (!p) - diag("emalloc: out of memory"); - return p; -} - -void * -alloc(size_t n) -{ - void **pp; - - if (n == 0) - return 0; - if (nptr >= NPtr) { - pp = emalloc(NPtr * sizeof(void *)); - pp[0] = pool; - pool = pp; - nptr = 1; - } - return pool[nptr++] = emalloc(n); -} - -void -freeall() -{ - void **pp; - - for (;;) { - for (pp = &pool[1]; pp < &pool[nptr]; pp++) - free(*pp); - pp = pool[0]; - if (!pp) - break; - free(pool); - pool = pp; - nptr = NPtr; - } - nptr = 1; -} - -Blk * -blknew() -{ - static Blk z; - Blk *b; - - b = alloc(sizeof *b); - *b = z; - return b; -} - -void -emit(int op, int k, Ref to, Ref arg0, Ref arg1) -{ - if (curi == insb) - diag("emit: too many instructions"); - *--curi = (Ins){ - .op = op, .cls = k, - .to = to, .arg = {arg0, arg1} - }; -} - -void -emiti(Ins i) -{ - emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]); -} - -void -idup(Ins **pd, Ins *s, ulong n) -{ - *pd = alloc(n * sizeof(Ins)); - memcpy(*pd, s, n * sizeof(Ins)); -} - -Ins * -icpy(Ins *d, Ins *s, ulong n) -{ - memcpy(d, s, n * sizeof(Ins)); - return d + n; -} - -void * -vnew(ulong len, size_t esz) -{ - ulong cap; - Vec *v; - - for (cap=VMin; cap<len; cap*=2) - ; - v = alloc(cap * esz + sizeof(Vec)); - v->mag = VMag; - v->cap = cap; - v->esz = esz; - return v + 1; -} - -void -vgrow(void *vp, ulong len) -{ - Vec *v; - void *v1; - - v = *(Vec **)vp - 1; - assert(v+1 && v->mag == VMag); - if (v->cap >= len) - return; - v1 = vnew(len, v->esz); - memcpy(v1, v+1, v->cap * v->esz); - *(Vec **)vp = v1; -} - -int -phicls(int t, Tmp *tmp /*, int c*/) -{ - if (tmp[t].phi) - return tmp[t].phi; - return t; -#if 0 - int t1; - - t1 = tmp[t].phi; - if (!t1) - t1 = t; - if (t != t1) { - t1 = phitmp(t1, tmp, c); - if (c) - tmp[t].phi = t1; - } - return t1; -#endif -} - -Ref -newtmp(char *prfx, int k, Fn *fn) -{ - static int n; - int t; - - t = fn->ntmp++; - vgrow(&fn->tmp, fn->ntmp); - sprintf(fn->tmp[t].name, "%s%d", prfx, ++n); - fn->tmp[t].cls = k; - fn->tmp[t].slot = -1; - fn->tmp[t].nuse = +1; - fn->tmp[t].ndef = +1; - return TMP(t); -} - -Ref -getcon(int64_t val, Fn *fn) -{ - int c; - - for (c=0; c<fn->ncon; c++) - if (fn->con[c].type == CBits && fn->con[c].bits.i == val) - return CON(c); - fn->ncon++; - vgrow(&fn->con, fn->ncon); - fn->con[c] = (Con){.type = CBits, .bits.i = val}; - return CON(c); -} - -void -addcon(Con *c0, Con *c1) -{ - if (c0->type == CUndef) - *c0 = *c1; - else { - if (c1->type == CAddr) { - if (c0->type == CAddr) - diag("addcon: adding two addresses"); - c0->type = CAddr; - strcpy(c0->label, c1->label); - } - c0->bits.i += c1->bits.i; - } -} - -void -bsinit(BSet *bs, uint n) -{ - n = (n + NBit-1) / NBit; - bs->nt = n; - bs->t = alloc(n * sizeof bs->t[0]); -} - -uint -bscount(BSet *bs) -{ - uint i, j, n; - - n = 0; - for (i=0; i<bs->nt; i++) - for (j=0; j<NBit; j++) - if (bs->t[i] & BIT(j)) - n++; - return n; -} - -static inline uint -bsmax(BSet *bs) -{ - return bs->nt * NBit; -} - -void -bsset(BSet *bs, uint elt) -{ - assert(elt < bsmax(bs)); - bs->t[elt/NBit] |= BIT(elt%NBit); -} - -void -bsclr(BSet *bs, uint elt) -{ - assert(elt < bsmax(bs)); - bs->t[elt/NBit] &= ~BIT(elt%NBit); -} - -#define BSOP(f, op) \ - void \ - f(BSet *a, BSet *b) \ - { \ - uint i; \ - \ - assert(a->nt == b->nt); \ - for (i=0; i<a->nt; i++) \ - a->t[i] op b->t[i]; \ - } - -BSOP(bscopy, =) -BSOP(bsunion, |=) -BSOP(bsinter, &=) -BSOP(bsdiff, &= ~) - -int -bsequal(BSet *a, BSet *b) -{ - uint i; - - assert(a->nt == b->nt); - for (i=0; i<a->nt; i++) - if (a->t[i] != b->t[i]) - return 0; - return 1; -} - -void -bszero(BSet *bs) -{ - memset(bs->t, 0, bs->nt * sizeof bs->t[0]); -} - -/* iterates on a bitset, use as follows - * - * for (i=0; bsiter(set, &i); i++) - * use(i); - * - */ -int -bsiter(BSet *bs, uint *elt) -{ - uint i; - - for (i=*elt;; i++) { - while (i < bsmax(bs) && !bs->t[i/NBit]) - i = (i + NBit) & -NBit; - if (i >= bsmax(bs)) - return 0; - if (bshas(bs, i)) { - *elt = i; - return 1; - } - } -} - -void -dumpts(BSet *bs, Tmp *tmp, FILE *f) -{ - uint t; - - fprintf(f, "["); - for (t=Tmp0; bsiter(bs, &t); t++) - fprintf(f, " %s", tmp[t].name); - fprintf(f, " ]\n"); -} diff --git a/minic/mcc b/minic/mcc @@ -29,7 +29,7 @@ fi ../minic/minic < $file > /tmp/minic.ssa && -../lisc/lisc < /tmp/minic.ssa > /tmp/minic.s && +../src/qbe < /tmp/minic.ssa > /tmp/minic.s && cc $flags /tmp/minic.s if test $? -ne 0 diff --git a/src/.gitignore b/src/.gitignore @@ -0,0 +1,5 @@ +qbe +doc +.comfile +*.o +*.out diff --git a/lisc/.tag b/src/.tag diff --git a/src/Makefile b/src/Makefile @@ -0,0 +1,17 @@ +BIN = qbe +OBJ = main.o util.o parse.o mem.o ssa.o copy.o live.o isel.o spill.o rega.o emit.o + +CFLAGS = -Wall -Wextra -std=c99 -g -pedantic + +$(BIN): $(OBJ) + $(CC) $(LDFLAGS) $(OBJ) -o $@ + +$(OBJ): all.h + +.PHONY: clean check syndoc +clean: + rm -f $(BIN) $(OBJ) +check: $(BIN) + test/go.sh all +syndoc: + unison -auto doc ssh://qcar@h/data/d/ssa-doc diff --git a/lisc/lisc.h b/src/all.h diff --git a/src/copy.c b/src/copy.c @@ -0,0 +1,159 @@ +#include "all.h" + +typedef struct RList RList; +struct RList { + int t; + RList *l; +}; + +static Ref +copyof(Ref r, Ref *cp) +{ + if (rtype(r) == RTmp) + return cp[r.val]; + else + return r; +} + +static void +update(Ref r, Ref rcp, Ref *cp, RList **w) +{ + RList *l; + + if (!req(cp[r.val], rcp)) { + cp[r.val] = rcp; + l = emalloc(sizeof *l); + l->t = r.val; + l->l = *w; + *w = l; + } +} + +static void +visitphi(Phi *p, Ref *cp, RList **w) +{ + uint a; + Ref r, r1; + + r = R; + for (a=0; a<p->narg; a++) { + r1 = copyof(p->arg[a], cp); + if (req(r1, R)) + continue; + if (req(r, R) || req(r, r1)) + r = r1; + else { + r = p->to; + break; + } + } + assert(!req(r, R)); + update(p->to, r, cp, w); +} + +static void +visitins(Ins *i, Ref *cp, RList **w) +{ + Ref r; + + if (i->op == OCopy) { + r = copyof(i->arg[0], cp); + update(i->to, r, cp, w); + } else if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + update(i->to, i->to, cp, w); + } +} + +void +copy(Fn *fn) +{ + Blk *b; + Ref *cp, r; + RList *w, *w1; + Use *u, *u1; + Ins *i; + Phi *p, **pp; + uint a; + int t; + + w = 0; + cp = emalloc(fn->ntmp * sizeof cp[0]); + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) + visitphi(p, cp, &w); + for (i=b->ins; i-b->ins < b->nins; i++) + visitins(i, cp, &w); + } + while ((w1=w)) { + t = w->t; + w = w->l; + free(w1); + u = fn->tmp[t].use; + u1 = u + fn->tmp[t].nuse; + for (; u<u1; u++) + switch (u->type) { + default: + diag("copy: invalid use"); + case UPhi: + visitphi(u->u.phi, cp, &w); + break; + case UIns: + visitins(u->u.ins, cp, &w); + break; + case UJmp: + break; + } + } + for (b=fn->start; b; b=b->link) { + for (pp=&b->phi; (p=*pp);) { + r = cp[p->to.val]; + if (!req(r, p->to)) { + *pp = p->link; + continue; + } + for (a=0; a<p->narg; a++) + if (rtype(p->arg[a]) == RTmp) { + r = cp[p->arg[a].val]; + assert(!req(r, R)); + p->arg[a] = r; + } + pp=&p->link; + } + for (i=b->ins; i-b->ins < b->nins; i++) { + r = cp[i->to.val]; + if (!req(r, i->to)) { + *i = (Ins){.op = ONop}; + continue; + } + for (a=0; a<2; a++) + if (rtype(i->arg[a]) == RTmp) { + r = cp[i->arg[a].val]; + assert(!req(r, R)); + i->arg[a] = r; + } + } + if (rtype(b->jmp.arg) == RTmp) { + r = cp[b->jmp.arg.val]; + assert(!req(r, R)); + b->jmp.arg = r; + } + } + if (debug['C']) { + fprintf(stderr, "\n> Copy information:"); + for (t=Tmp0; t<fn->ntmp; t++) { + if (req(cp[t], R)) { + fprintf(stderr, "\n%10s not seen!", + fn->tmp[t].name); + } + else if (!req(cp[t], TMP(t))) { + fprintf(stderr, "\n%10s copy of ", + fn->tmp[t].name); + printref(cp[t], fn, stderr); + } + } + fprintf(stderr, "\n\n> After copy elimination:\n"); + printfn(fn, stderr); + } + free(cp); +} diff --git a/src/emit.c b/src/emit.c @@ -0,0 +1,666 @@ +#include "all.h" + +enum { + SLong = 0, + SWord = 1, + SShort = 2, + SByte = 3, + + Ki = -1, /* matches Kw and Kl */ + Ka = -2, /* matches all classes */ +}; + +/* Instruction format strings: + * + * if the format string starts with -, the instruction + * is assumed to be 3-address and is put in 2-address + * mode using an extra mov if necessary + * + * if the format string starts with +, the same as the + * above applies, but commutativity is also assumed + * + * %k is used to set the class of the instruction, + * it'll expand to "l", "q", "ss", "sd", depending + * on the instruction class + * %0 designates the first argument + * %1 designates the second argument + * %= designates the result + * + * if %k is not used, a prefix to 0, 1, or = must be + * added, it can be: + * M - memory reference + * L - long (64 bits) + * W - word (32 bits) + * H - short (16 bits) + * B - byte (8 bits) + * S - single precision float + * D - double precision float + */ +static struct { + short op; + short cls; + char *asm; +} omap[] = { + { OAdd, Ka, "+add%k %1, %=" }, + { OSub, Ka, "-sub%k %1, %=" }, + { OAnd, Ki, "+and%k %1, %=" }, + { OOr, Ki, "+or%k %1, %=" }, + { OXor, Ki, "+xor%k %1, %=" }, + { OSar, Ki, "-sar%k %B1, %=" }, + { OShr, Ki, "-shr%k %B1, %=" }, + { OShl, Ki, "-shl%k %B1, %=" }, + { OMul, Ki, "+imul%k %1, %=" }, + { OMul, Ks, "+mulss %1, %=" }, /* fixme */ + { OMul, Kd, "+mulsd %1, %=" }, + { ODiv, Ka, "-div%k %1, %=" }, + { OStorel, Ka, "movq %L0, %M1" }, + { OStorew, Ka, "movl %W0, %M1" }, + { OStoreh, Ka, "movw %H0, %M1" }, + { OStoreb, Ka, "movb %B0, %M1" }, + { OStores, Ka, "movss %S0, %M1" }, + { OStored, Ka, "movsd %D0, %M1" }, + { OLoad, Ka, "mov%k %M0, %=" }, + { OLoadsw, Kl, "movslq %M0, %L=" }, + { OLoadsw, Kw, "movl %M0, %W=" }, + { OLoaduw, Ki, "movl %M0, %W=" }, + { OLoadsh, Ki, "movsw%k %M0, %=" }, + { OLoaduh, Ki, "movzw%k %M0, %=" }, + { OLoadsb, Ki, "movsb%k %M0, %=" }, + { OLoadub, Ki, "movzb%k %M0, %=" }, + { OExtsw, Kl, "movslq %W0, %L=" }, + { OExtuw, Kl, "movl %W0, %W=" }, + { OExtsh, Ki, "movsw%k %H0, %=" }, + { OExtuh, Ki, "movzw%k %H0, %=" }, + { OExtsb, Ki, "movsb%k %B0, %=" }, + { OExtub, Ki, "movzb%k %B0, %=" }, + + { OExts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */ + { OTruncd, Ks, "cvttsd2ss %0, %=" }, + { OFtosi, Kw, "cvttss2si %0, %=" }, + { OFtosi, Kl, "cvttsd2si %0, %=" }, + { OSitof, Ks, "cvtsi2ss %W0, %=" }, + { OSitof, Kd, "cvtsi2sd %L0, %=" }, + { OCast, Ki, "movq %D0, %L=" }, + { OCast, Ka, "movq %L0, %D=" }, + + { OAddr, Ki, "lea%k %M0, %=" }, + { OSwap, Ki, "xchg%k %0, %1" }, + { OSign, Kl, "cqto" }, + { OSign, Kw, "cltd" }, + { OXDiv, Ki, "div%k %0" }, + { OXIDiv, Ki, "idiv%k %0" }, + { OXCmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */ + { OXCmp, Kd, "comisd %D0, %D1" }, + { OXCmp, Ki, "cmp%k %0, %1" }, + { OXTest, Ki, "test%k %0, %1" }, + { OXSet+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" }, + { OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" }, + { OXSet+ICXp, Ki, "setp %B=\n\tmovsb%k %B=, %=" }, + { NOp, 0, 0 } +}; + +static char *rname[][4] = { + [RAX] = {"rax", "eax", "ax", "al"}, + [RBX] = {"rbx", "ebx", "bx", "bl"}, + [RCX] = {"rcx", "ecx", "cx", "cl"}, + [RDX] = {"rdx", "edx", "dx", "dl"}, + [RSI] = {"rsi", "esi", "si", "sil"}, + [RDI] = {"rdi", "edi", "di", "dil"}, + [RBP] = {"rbp", "ebp", "bp", "bpl"}, + [RSP] = {"rsp", "esp", "sp", "spl"}, + [R8 ] = {"r8" , "r8d", "r8w", "r8b"}, + [R9 ] = {"r9" , "r9d", "r9w", "r9b"}, + [R10] = {"r10", "r10d", "r10w", "r10b"}, + [R11] = {"r11", "r11d", "r11w", "r11b"}, + [R12] = {"r12", "r12d", "r12w", "r12b"}, + [R13] = {"r13", "r13d", "r13w", "r13b"}, + [R14] = {"r14", "r14d", "r14w", "r14b"}, + [R15] = {"r15", "r15d", "r15w", "r15b"}, +}; + + +static int +slot(int s, Fn *fn) +{ + struct { int i:14; } x; + + /* sign extend s using a bitfield */ + x.i = s; + assert(NAlign == 3); + if (x.i < 0) + return -4 * x.i; + else { + assert(fn->slot >= x.i); + return -4 * (fn->slot - x.i); + } +} + +static void +emitcon(Con *con, FILE *f) +{ + switch (con->type) { + default: + diag("emit: invalid constant"); + case CAddr: + fputs(con->label, f); + if (con->bits.i) + fprintf(f, "%+"PRId64, con->bits.i); + break; + case CBits: + fprintf(f, "%"PRId64, con->bits.i); + break; + } +} + +static char * +regtoa(int reg, int sz) +{ + static char buf[6]; + + if (reg >= XMM0) { + sprintf(buf, "xmm%d", reg-XMM0); + return buf; + } else + return rname[reg][sz]; +} + +static Ref +getarg(char c, Ins *i) +{ + switch (c) { + default: + diag("emit: 0, 1, = expected in format"); + case '0': + return i->arg[0]; + case '1': + return i->arg[1]; + case '=': + return i->to; + } +} + +static void emitins(Ins, Fn *, FILE *); + +static void +emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f) +{ + Ins icp; + + icp.op = OCopy; + icp.arg[0] = r2; + icp.to = r1; + icp.cls = k; + emitins(icp, fn, f); +} + +static void +emitf(char *s, Ins *i, Fn *fn, FILE *f) +{ + static char clstoa[][3] = {"l", "q", "ss", "sd"}; + char c; + int sz; + Ref ref; + Mem *m; + Con off; + + switch (*s) { + case '+': + if (req(i->arg[1], i->to)) { + ref = i->arg[0]; + i->arg[0] = i->arg[1]; + i->arg[1] = ref; + } + /* fall through */ + case '-': + if (req(i->arg[1], i->to) && !req(i->arg[0], i->to)) + diag("emit: cannot convert to 2-address"); + emitcopy(i->to, i->arg[0], i->cls, fn, f); + s++; + break; + } + + fputc('\t', f); +Next: + while ((c = *s++) != '%') + if (!c) { + fputc('\n', f); + return; + } else + fputc(c, f); + switch ((c = *s++)) { + default: + diag("emit: invalid escape"); + case '%': + fputc('%', f); + break; + case 'k': + fputs(clstoa[i->cls], f); + break; + case '0': + case '1': + case '=': + sz = KWIDE(i->cls) ? SLong : SWord; + s--; + /* fall through */ + case 'D': + case 'S': + Ref: + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + default: + diag("emit: invalid reference"); + case RTmp: + assert(isreg(ref)); + fprintf(f, "%%%s", regtoa(ref.val, sz)); + break; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RAMem: + Mem: + m = &fn->mem[ref.val & AMask]; + if (rtype(m->base) == RSlot) { + off.type = CBits; + off.bits.i = slot(m->base.val, fn); + addcon(&m->offset, &off); + m->base = TMP(RBP); + } + if (m->offset.type != CUndef) + emitcon(&m->offset, f); + if (req(m->base, R) && req(m->index, R)) + break; + fputc('(', f); + if (!req(m->base, R)) + fprintf(f, "%%%s", regtoa(m->base.val, SLong)); + if (!req(m->index, R)) + fprintf(f, ", %%%s, %d", + regtoa(m->index.val, SLong), + m->scale + ); + fputc(')', f); + break; + case RCon: + fputc('$', f); + emitcon(&fn->con[ref.val], f); + break; + } + break; + case 'L': + sz = SLong; + goto Ref; + case 'W': + sz = SWord; + goto Ref; + case 'H': + sz = SShort; + goto Ref; + case 'B': + sz = SByte; + goto Ref; + case 'M': + c = *s++; + ref = getarg(c, i); + switch (rtype(ref)) { + default: + diag("emit: invalid memory reference"); + case RAMem: + goto Mem; + case RSlot: + fprintf(f, "%d(%%rbp)", slot(ref.val, fn)); + break; + case RCon: + emitcon(&fn->con[ref.val], f); + fprintf(f, "(%%rip)"); + break; + case RTmp: + assert(isreg(ref)); + fprintf(f, "(%%%s)", regtoa(ref.val, SLong)); + break; + } + break; + } + goto Next; +} + +static void +emitins(Ins i, Fn *fn, FILE *f) +{ + Ref r; + int64_t val; + int o; + + switch (i.op) { + default: + Table: + /* most instructions are just pulled out of + * the table omap[], some special cases are + * detailed below */ + for (o=0;; o++) { + /* this linear search should really be a binary + * search */ + if (omap[o].op == NOp) + diag("emit: no entry found for instruction"); + if (omap[o].op == i.op) + if (omap[o].cls == i.cls + || (omap[o].cls == Ki && KBASE(i.cls) == 0) + || (omap[o].cls == Ka)) + break; + } + emitf(omap[o].asm, &i, fn, f); + break; + case ONop: + /* just do nothing for nops, they are inserted + * by some passes */ + break; + case OMul: + /* here, we try to use the 3-addresss form + * of multiplication when possible */ + if (rtype(i.arg[1]) == RCon) { + r = i.arg[0]; + i.arg[0] = i.arg[1]; + i.arg[1] = r; + } + if (KBASE(i.cls) == 0 /* only available for ints */ + && rtype(i.arg[0]) == RCon + && rtype(i.arg[1]) == RTmp) { + emitf("imul%k %0, %1, %=", &i, fn, f); + break; + } + goto Table; + case OSub: + /* we have to use the negation trick to handle + * some 3-address substractions */ + if (req(i.to, i.arg[1])) { + emitf("neg%k %=", &i, fn, f); + emitf("add%k %0, %=", &i, fn, f); + break; + } + goto Table; + case OCopy: + /* make sure we don't emit useless copies, + * also, we can use a trick to load 64-bits + * registers, it's detailed in my note below + * http://c9x.me/art/notes.html?09/19/2015 */ + if (req(i.to, R) || req(i.arg[0], R)) + break; + if (isreg(i.to) + && rtype(i.arg[0]) == RCon + && i.cls == Kl + && fn->con[i.arg[0].val].type == CBits + && (val = fn->con[i.arg[0].val].bits.i) >= 0 + && val <= UINT32_MAX) { + emitf("movl %W0, %W=", &i, fn, f); + } else if (!req(i.arg[0], i.to)) + emitf("mov%k %0, %=", &i, fn, f); + break; + case OCall: + /* calls simply have a weird syntax in AT&T + * assembly... */ + switch (rtype(i.arg[0])) { + default: + diag("emit: invalid call instruction"); + case RCon: + fprintf(f, "\tcallq "); + emitcon(&fn->con[i.arg[0].val], f); + fprintf(f, "\n"); + break; + case RTmp: + emitf("callq *%L0", &i, fn, f); + break; + } + break; + case OSAlloc: + /* there is no good reason why this is here + * maybe we should split OSAlloc in 2 different + * instructions depending on the result + */ + emitf("subq %L0, %%rsp", &i, fn, f); + if (!req(i.to, R)) + emitcopy(i.to, TMP(RSP), Kl, fn, f); + break; + case OSwap: + if (KBASE(i.cls) == 0) + goto Table; + /* for floats, there is no swap instruction + * so we use xmm15 as a temporary + */ + emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f); + emitcopy(i.arg[0], i.arg[1], i.cls, fn, f); + emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f); + break; + } +} + +static int +cneg(int cmp) +{ + switch (cmp) { + default: diag("emit: cneg() unhandled comparison"); + case ICule: return ICugt; + case ICult: return ICuge; + case ICsle: return ICsgt; + case ICslt: return ICsge; + case ICsgt: return ICsle; + case ICsge: return ICslt; + case ICugt: return ICule; + case ICuge: return ICult; + case ICeq: return ICne; + case ICne: return ICeq; + case ICXnp: return ICXp; + case ICXp: return ICXnp; + } +} + +static int +framesz(Fn *fn) +{ + int i, o, f; + + assert(NAlign == 3); + for (i=0, o=0; i<NRClob; i++) + o ^= 1 & (fn->reg >> rclob[i]); + f = fn->slot; + f = (f + 3) & -4; + return 4*f + 8*o; +} + +void +emitfn(Fn *fn, FILE *f) +{ + static char *ctoa[] = { + [ICeq] = "z", + [ICule] = "be", + [ICult] = "b", + [ICsle] = "le", + [ICslt] = "l", + [ICsgt] = "g", + [ICsge] = "ge", + [ICugt] = "a", + [ICuge] = "ae", + [ICne] = "nz", + [ICXnp] = "np", + [ICXp] = "p" + }; + Blk *b, *s; + Ins *i, itmp; + int *r, c, fs; + + fprintf(f, + ".text\n" + ".globl %s\n" + ".type %s, @function\n" + "%s:\n" + "\tpush %%rbp\n" + "\tmov %%rsp, %%rbp\n", + fn->name, fn->name, fn->name + ); + fs = framesz(fn); + if (fs) + fprintf(f, "\tsub $%d, %%rsp\n", fs); + for (r=rclob; r-rclob < NRClob; r++) + if (fn->reg & BIT(*r)) { + itmp.arg[0] = TMP(*r); + emitf("pushq %L0", &itmp, fn, f); + } + + for (b=fn->start; b; b=b->link) { + fprintf(f, ".L%s:\n", b->name); + for (i=b->ins; i!=&b->ins[b->nins]; i++) + emitins(*i, fn, f); + switch (b->jmp.type) { + case JRet0: + for (r=&rclob[NRClob]; r>rclob;) + if (fn->reg & BIT(*--r)) { + itmp.arg[0] = TMP(*r); + emitf("popq %L0", &itmp, fn, f); + } + fprintf(f, + "\tleave\n" + "\tret\n" + ); + break; + case JJmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp .L%s\n", b->s1->name); + break; + default: + c = b->jmp.type - JXJc; + if (0 <= c && c <= NXICmp) { + if (b->link == b->s2) { + s = b->s1; + } else if (b->link == b->s1) { + c = cneg(c); + s = b->s2; + } else + diag("emit: unhandled jump (1)"); + fprintf(f, "\tj%s .L%s\n", ctoa[c], s->name); + break; + } + diag("emit: unhandled jump (2)"); + } + } + +} + +void +emitdat(Dat *d, FILE *f) +{ + static int align; + static char *dtoa[] = { + [DAlign] = ".align", + [DB] = "\t.byte", + [DH] = "\t.value", + [DW] = "\t.long", + [DL] = "\t.quad" + }; + + switch (d->type) { + case DStart: + align = 0; + fprintf(f, ".data\n"); + break; + case DEnd: + break; + case DName: + if (!align) + fprintf(f, ".align 8\n"); + fprintf(f, + ".globl %s\n" + ".type %s, @object\n" + "%s:\n", + d->u.str, d->u.str, d->u.str + ); + break; + case DZ: + fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num); + break; + default: + if (d->type == DAlign) + align = 1; + + if (d->isstr) { + if (d->type != DB) + err("strings only supported for 'b' currently"); + fprintf(f, "\t.ascii \"%s\"\n", d->u.str); + } + else if (d->isref) { + fprintf(f, "%s %s%+"PRId64"\n", + dtoa[d->type], d->u.ref.nam, + d->u.ref.off); + } + else { + fprintf(f, "%s %"PRId64"\n", + dtoa[d->type], d->u.num); + } + break; + } +} + +typedef struct FBits FBits; + +struct FBits { + int64_t bits; + int wide; + FBits *link; +}; + +static FBits *stash; + +int +stashfp(int64_t n, int w) +{ + FBits **pb, *b; + int i; + + /* does a dumb de-dup of fp constants + * this should be the linker's job */ + for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++) + if (n == b->bits && w == b->wide) + return i; + b = emalloc(sizeof *b); + b->bits = n; + b->wide = w; + b->link = 0; + *pb = b; + return i; +} + +void +emitfin(FILE *f) +{ + FBits *b; + int i; + + if (!stash) + return; + fprintf(f, "/* floating point constants */\n"); + fprintf(f, ".data\n.align 8\n"); + for (b=stash, i=0; b; b=b->link, i++) + if (b->wide) + fprintf(f, + ".Lfp%d:\n" + "\t.quad %"PRId64 + " /* %f */\n", + i, b->bits, + *(double *)&b->bits + ); + for (b=stash, i=0; b; b=b->link, i++) + if (!b->wide) + fprintf(f, + ".Lfp%d:\n" + "\t.long %"PRId64 + " /* %lf */\n", + i, b->bits & 0xffffffff, + *(float *)&b->bits + ); + while ((b=stash)) { + stash = b->link; + free(b); + } +} diff --git a/src/isel.c b/src/isel.c @@ -0,0 +1,1135 @@ +#include "all.h" +#include <limits.h> + +/* For x86_64, do the following: + * + * - lower calls + * - check that constants are used only in + * places allowed + * - ensure immediates always fit in 32b + * - explicit machine register contraints + * on instructions like division. + * - implement fast locals (the streak of + * constant allocX in the first basic block) + * - recognize complex addressing modes + * + * Invariant: the use counts that are used + * in sel() must be sound. This + * is not so trivial, maybe the + * dce should be moved out... + */ + +typedef struct ANum ANum; +typedef struct AClass AClass; +typedef struct RAlloc RAlloc; + +struct ANum { + char n, l, r; + Ins *i; + Ref mem; +}; + +static void amatch(Addr *, Ref, ANum *, Fn *, int); + +static int +fcmptoi(int fc) +{ + switch (fc) { + default: diag("isel: fcmptoi defaulted"); + case FCle: return ICule; + case FClt: return ICult; + case FCgt: return ICugt; + case FCge: return ICuge; + case FCne: return ICne; + case FCeq: return ICeq; + case FCo: return ICXnp; + case FCuo: return ICXp; + } +} + +static int +iscmp(int op, int *pk, int *pc) +{ + int k, c; + + if (OCmpw <= op && op <= OCmpw1) { + c = op - OCmpw; + k = Kw; + } + else if (OCmpl <= op && op <= OCmpl1) { + c = op - OCmpl; + k = Kl; + } + else if (OCmps <= op && op <= OCmps1) { + c = fcmptoi(op - OCmps); + k = Ks; + } + else if (OCmpd <= op && op <= OCmpd1) { + c = fcmptoi(op - OCmpd); + k = Kd; + } + else + return 0; + if (pk) + *pk = k; + if (pc) + *pc = c; + return 1; +} + +static int +noimm(Ref r, Fn *fn) +{ + int64_t val; + + if (rtype(r) != RCon) + return 0; + switch (fn->con[r.val].type) { + default: + diag("isel: invalid constant"); + case CAddr: + /* we only support the 'small' + * code model of the ABI, this + * means that we can always + * address data with 32bits + */ + return 0; + case CBits: + val = fn->con[r.val].bits.i; + return (val < INT32_MIN || val > INT32_MAX); + } +} + +static int +rslot(Ref r, Fn *fn) +{ + if (rtype(r) != RTmp) + return -1; + return fn->tmp[r.val].slot; +} + +static int +argcls(Ins *i, int n) +{ + return opdesc[i->op].argcls[n][i->cls]; +} + +static void +fixarg(Ref *r, int k, int phi, Fn *fn) +{ + Addr a; + Ref r0, r1; + int s, n; + + r1 = r0 = *r; + s = rslot(r0, fn); + if (KBASE(k) == 1 && rtype(r0) == RCon) { + /* load floating points from memory + * slots, they can't be used as + * immediates + */ + r1 = MEM(fn->nmem); + vgrow(&fn->mem, ++fn->nmem); + memset(&a, 0, sizeof a); + a.offset.type = CAddr; + n = stashfp(fn->con[r0.val].bits.i, KWIDE(k)); + sprintf(a.offset.label, ".Lfp%d", n); + fn->mem[fn->nmem-1] = a; + } + else if (!phi && k == Kl && noimm(r0, fn)) { + /* load constants that do not fit in + * a 32bit signed integer into a + * long temporary + */ + r1 = newtmp("isel", Kl, fn); + emit(OCopy, Kl, r1, r0, R); + } + else if (s != -1) { + /* load fast locals' addresses into + * temporaries right before the + * instruction + */ + r1 = newtmp("isel", Kl, fn); + emit(OAddr, Kl, r1, SLOT(s), R); + } + *r = r1; +} + +static void +chuse(Ref r, int du, Fn *fn) +{ + if (rtype(r) == RTmp) + fn->tmp[r.val].nuse += du; +} + +static void +seladdr(Ref *r, ANum *an, Fn *fn) +{ + Addr a; + Ref r0, r1; + + r0 = *r; + if (rtype(r0) == RTmp) { + chuse(r0, -1, fn); + r1 = an[r0.val].mem; + if (req(r1, R)) { + amatch(&a, r0, an, fn, 1); + vgrow(&fn->mem, ++fn->nmem); + fn->mem[fn->nmem-1] = a; + r1 = MEM(fn->nmem-1); + chuse(a.base, +1, fn); + chuse(a.index, +1, fn); + if (rtype(a.base) != RTmp) + if (rtype(a.index) != RTmp) + an[r0.val].mem = r1; + } + *r = r1; + } +} + +static void +selcmp(Ref arg[2], int k, Fn *fn) +{ + Ref r; + + if (rtype(arg[0]) == RCon) { + r = arg[1]; + arg[1] = arg[0]; + arg[0] = r; + } + assert(rtype(arg[0]) != RCon); + emit(OXCmp, k, R, arg[1], arg[0]); + fixarg(&curi->arg[0], k, 0, fn); +} + +static void +sel(Ins i, ANum *an, Fn *fn) +{ + Ref r0, r1; + int x, k, kc; + int64_t val; + Ins *i0; + + if (rtype(i.to) == RTmp) + if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) + if (fn->tmp[i.to.val].nuse == 0) { + chuse(i.arg[0], -1, fn); + chuse(i.arg[1], -1, fn); + return; + } + i0 = curi; + k = i.cls; + switch (i.op) { + case ODiv: + case ORem: + case OUDiv: + case OURem: + if (i.op == ODiv || i.op == OUDiv) + r0 = TMP(RAX), r1 = TMP(RDX); + else + r0 = TMP(RDX), r1 = TMP(RAX); + emit(OCopy, k, i.to, r0, R); + emit(OCopy, k, R, r1, R); + if (rtype(i.arg[1]) == RCon) { + /* immediates not allowed for + * divisions in x86 + */ + r0 = newtmp("isel", k, fn); + } else + r0 = i.arg[1]; + if (i.op == ODiv || i.op == ORem) { + emit(OXIDiv, k, R, r0, R); + emit(OSign, k, TMP(RDX), TMP(RAX), R); + } else { + emit(OXDiv, k, R, r0, R); + emit(OCopy, k, TMP(RDX), CON_Z, R); + } + emit(OCopy, k, TMP(RAX), i.arg[0], R); + if (rtype(i.arg[1]) == RCon) + emit(OCopy, k, r0, i.arg[1], R); + break; + case OSar: + case OShr: + case OShl: + if (rtype(i.arg[1]) == RCon) + goto Emit; + r0 = i.arg[1]; + i.arg[1] = TMP(RCX); + emit(OCopy, Kw, R, TMP(RCX), R); + emiti(i); + emit(OCopy, Kw, TMP(RCX), r0, R); + break; + case ONop: + break; + case OStored: + case OStores: + case OStorel: + case OStorew: + case OStoreh: + case OStoreb: + if (rtype(i.arg[0]) == RCon) { + if (i.op == OStored) + i.op = OStorel; + if (i.op == OStores) + i.op = OStorew; + } + seladdr(&i.arg[1], an, fn); + goto Emit; + case_OLoad: + seladdr(&i.arg[0], an, fn); + goto Emit; + case OCall: + case OSAlloc: + case OCopy: + case OAdd: + case OSub: + case OMul: + case OAnd: + case OOr: + case OXor: + case OXTest: + case OFtosi: + case OSitof: + case OExts: + case OTruncd: + case OCast: + case_OExt: +Emit: + emiti(i); + fixarg(&curi->arg[0], argcls(curi, 0), 0, fn); + fixarg(&curi->arg[1], argcls(curi, 1), 0, fn); + break; + case OAlloc: + case OAlloc+1: + case OAlloc+2: /* == OAlloc1 */ + /* we need to make sure + * the stack remains aligned + * (rsp = 0) mod 16 + */ + if (rtype(i.arg[0]) == RCon) { + assert(fn->con[i.arg[0].val].type == CBits); + val = fn->con[i.arg[0].val].bits.i; + val = (val + 15) & ~INT64_C(15); + if (val < 0 || val > INT32_MAX) + diag("isel: alloc too large"); + emit(OSAlloc, Kl, i.to, getcon(val, fn), R); + } else { + /* r0 = (i.arg[0] + 15) & -16 */ + r0 = newtmp("isel", Kl, fn); + r1 = newtmp("isel", Kl, fn); + emit(OSAlloc, Kl, i.to, r0, R); + emit(OAnd, Kl, r0, r1, getcon(-16, fn)); + emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn)); + } + break; + default: + if (isext(i.op)) + goto case_OExt; + if (isload(i.op)) + goto case_OLoad; + if (iscmp(i.op, &kc, &x)) { + if (rtype(i.arg[0]) == RCon) + x = icmpop(x); + emit(OXSet+x, k, i.to, R, R); + selcmp(i.arg, kc, fn); + break; + } + diag("isel: non-exhaustive implementation"); + } + + while (i0 > curi && --i0) + if (rslot(i0->arg[0], fn) != -1 + || rslot(i0->arg[1], fn) != -1) + diag("isel: usupported address argument"); +} + +static Ins * +flagi(Ins *i0, Ins *i) +{ + while (i>i0) { + i--; + if (opdesc[i->op].sflag) + return i; + if (opdesc[i->op].lflag) + continue; + return 0; + } + return 0; +} + +struct AClass { + int inmem; + int align; + uint size; + int cls[2]; +}; + +static void +aclass(AClass *a, Typ *t) +{ + int e, s, n, cls; + uint sz, al; + + sz = t->size; + al = 1u << t->align; + + /* the ABI requires sizes to be rounded + * up to the nearest multiple of 8, moreover + * it makes it easy load and store structures + * in registers + */ + if (al < 8) + al = 8; + sz = (sz + al-1) & -al; + + a->size = sz; + a->align = t->align; + + if (t->dark || sz > 16) { + /* large or unaligned structures are + * required to be passed in memory + */ + a->inmem = 1; + return; + } + + a->inmem = 0; + for (e=0, s=0; e<2; e++) { + cls = -1; + for (n=0; n<8 && t->seg[s].len; s++) { + if (t->seg[s].ispad) { + /* don't change anything */ + } + else if (t->seg[s].isflt) { + if (cls == -1) + cls = Kd; + } + else + cls = Kl; + n += t->seg[s].len; + } + assert(n <= 8); + a->cls[e] = cls; + } +} + +static void +blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn) +{ + Ref r, r1; + uint boff; + + /* it's an impolite blit, we might go across the end + * of the source object a little bit... */ + for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) { + r = newtmp("abi", Kl, fn); + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, r, r1); + emit(OAdd, Kl, r1, rstk, getcon(soff, fn)); + r1 = newtmp("abi", Kl, fn); + emit(OLoad, Kl, r, r1, R); + emit(OAdd, Kl, r1, rsrc, getcon(boff, fn)); + chuse(rsrc, +1, fn); + chuse(rstk, +1, fn); + } +} + +static int +retr(Ref reg[2], AClass *aret) +{ + static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}}; + int n, k, ca, nr[2]; + + nr[0] = nr[1] = 0; + ca = 0; + for (n=0; aret->cls[n]>=0 && n<2; n++) { + k = KBASE(aret->cls[n]); + reg[n] = TMP(retreg[k][nr[k]++]); + ca += 1 << (2 * k); + } + return ca; +} + +static void +selret(Blk *b, Fn *fn) +{ + int j, k, ca; + Ref r, r0, reg[2]; + AClass aret; + + j = b->jmp.type; + + if (!isret(j) || j == JRet0) + return; + + r0 = b->jmp.arg; + b->jmp.type = JRet0; + + if (j == JRetc) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + assert(rtype(fn->retr) == RTmp); + emit(OCopy, Kl, TMP(RAX), fn->retr, R); + chuse(fn->retr, +1, fn); + blit(fn->retr, 0, r0, aret.size, fn); + ca = 1; + } else { + ca = retr(reg, &aret); + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + emit(OLoad, Kl, reg[1], r, R); + emit(OAdd, Kl, r, r0, getcon(8, fn)); + chuse(r0, +1, fn); + } + emit(OLoad, Kl, reg[0], r0, R); + } + } else { + k = j - JRetw; + if (KBASE(k) == 0) { + emit(OCopy, k, TMP(RAX), r0, R); + ca = 1; + } else { + emit(OCopy, k, TMP(XMM0), r0, R); + ca = 1 << 2; + } + } + + b->jmp.arg = CALL(ca); +} + +static void +seljmp(Blk *b, Fn *fn) +{ + Ref r; + int c, k; + Ins *fi; + + if (b->jmp.type == JRet0 || b->jmp.type == JJmp) + return; + assert(b->jmp.type == JJnz); + r = b->jmp.arg; + b->jmp.arg = R; + assert(!req(r, R)); + if (rtype(r) == RCon) { + b->jmp.type = JJmp; + if (req(r, CON_Z)) + b->s1 = b->s2; + b->s2 = 0; + return; + } + fi = flagi(b->ins, &b->ins[b->nins]); + if (fi && req(fi->to, r)) { + if (iscmp(fi->op, &k, &c)) { + if (rtype(fi->arg[0]) == RCon) + c = icmpop(c); + b->jmp.type = JXJc + c; + if (fn->tmp[r.val].nuse == 1) { + assert(fn->tmp[r.val].ndef == 1); + selcmp(fi->arg, k, fn); + *fi = (Ins){.op = ONop}; + } + return; + } + if (fi->op == OAnd && fn->tmp[r.val].nuse == 1 + && (rtype(fi->arg[0]) == RTmp || + rtype(fi->arg[1]) == RTmp)) { + fi->op = OXTest; + fi->to = R; + b->jmp.type = JXJc + ICne; + if (rtype(fi->arg[1]) == RCon) { + r = fi->arg[1]; + fi->arg[1] = fi->arg[0]; + fi->arg[0] = r; + } + return; + } + /* since flags are not tracked in liveness, + * the result of the flag-setting instruction + * has to be marked as live + */ + if (fn->tmp[r.val].nuse == 1) + emit(OCopy, Kw, R, r, R); + b->jmp.type = JXJc + ICne; + return; + } + selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */ + b->jmp.type = JXJc + ICne; +} + +static int +classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret) +{ + int nint, ni, nsse, ns, n, *pn; + AClass *a; + Ins *i; + + if (aret && aret->inmem) + nint = 5; /* hidden argument */ + else + nint = 6; + nsse = 8; + for (i=i0, a=ac; i<i1; i++, a++) { + if (i->op == op) { + if (KBASE(i->cls) == 0) + pn = &nint; + else + pn = &nsse; + if (*pn > 0) { + --*pn; + a->inmem = 0; + } else + a->inmem = 2; + a->align = 3; + a->size = 8; + a->cls[0] = i->cls; + } else { + n = i->arg[0].val & AMask; + aclass(a, &typ[n]); + if (a->inmem) + continue; + ni = ns = 0; + for (n=0; n<2; n++) + if (KBASE(a->cls[n]) == 0) + ni++; + else + ns++; + if (nint >= ni && nsse >= ns) { + nint -= ni; + nsse -= ns; + } else + a->inmem = 1; + } + } + + return ((6-nint) << 4) | ((8-nsse) << 8); +} + +int rsave[] = { + RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14 +}; +int rclob[] = {RBX, R12, R13, R14, R15}; + +MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int)); +MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int)); + +bits +retregs(Ref r, int p[2]) +{ + bits b; + int ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = r.val & 3; + nf = (r.val >> 2) & 3; + if (ni >= 1) + b |= BIT(RAX); + if (ni >= 2) + b |= BIT(RDX); + if (nf >= 1) + b |= BIT(XMM0); + if (nf >= 2) + b |= BIT(XMM1); + if (p) { + p[0] = ni; + p[1] = nf; + } + return b; +} + +bits +argregs(Ref r, int p[2]) +{ + bits b; + int j, ni, nf; + + assert(rtype(r) == RACall); + b = 0; + ni = (r.val >> 4) & 15; + nf = (r.val >> 8) & 15; + for (j=0; j<ni; j++) + b |= BIT(rsave[j]); + for (j=0; j<nf; j++) + b |= BIT(XMM0+j); + if (p) { + p[0] = ni + 1; + p[1] = nf; + } + return b | BIT(RAX); +} + +static Ref +rarg(int ty, int *ni, int *ns) +{ + if (KBASE(ty) == 0) + return TMP(rsave[(*ni)++]); + else + return TMP(XMM0 + (*ns)++); +} + +struct RAlloc { + Ins i; + RAlloc *link; +}; + +static void +selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap) +{ + Ins *i; + AClass *ac, *a, aret; + int ca, ni, ns; + uint stk, off; + Ref r, r1, r2, reg[2], regcp[2]; + RAlloc *ra; + + ac = alloc((i1-i0) * sizeof ac[0]); + if (!req(i1->arg[1], R)) { + assert(rtype(i1->arg[1]) == RAType); + aclass(&aret, &typ[i1->arg[1].val & AMask]); + ca = classify(i0, i1, ac, OArg, &aret); + } else + ca = classify(i0, i1, ac, OArg, 0); + + for (stk=0, a=&ac[i1-i0]; a>ac;) + if ((--a)->inmem) { + assert(a->align <= 4); + stk += a->size; + if (a->align == 4) + stk += stk & 15; + } + stk += stk & 15; + if (stk) { + r = getcon(-(int64_t)stk, fn); + emit(OSAlloc, Kl, R, r, R); + } + + if (!req(i1->arg[1], R)) { + if (aret.inmem) { + /* get the return location from eax + * it saves one callee-save reg */ + r1 = newtmp("abi", Kl, fn); + emit(OCopy, Kl, i1->to, TMP(RAX), R); + ca += 1; + } else { + if (aret.size > 8) { + r = newtmp("abi", Kl, fn); + regcp[1] = newtmp("abi", aret.cls[1], fn); + emit(OStorel, 0, R, regcp[1], r); + emit(OAdd, Kl, r, i1->to, getcon(8, fn)); + chuse(i1->to, +1, fn); + ca += 1 << (2 * KBASE(aret.cls[1])); + } + regcp[0] = newtmp("abi", aret.cls[0], fn); + emit(OStorel, 0, R, regcp[0], i1->to); + ca += 1 << (2 * KBASE(aret.cls[0])); + retr(reg, &aret); + if (aret.size > 8) + emit(OCopy, aret.cls[1], regcp[1], reg[1], R); + emit(OCopy, aret.cls[0], regcp[0], reg[0], R); + r1 = i1->to; + } + /* allocate return pad */ + ra = alloc(sizeof *ra); + assert(NAlign == 3); + aret.align -= 2; + if (aret.align < 0) + aret.align = 0; + ra->i.op = OAlloc + aret.align; + ra->i.cls = Kl; + ra->i.to = r1; + ra->i.arg[0] = getcon(aret.size, fn); + ra->link = (*rap); + *rap = ra; + } else { + ra = 0; + if (KBASE(i1->cls) == 0) { + emit(OCopy, i1->cls, i1->to, TMP(RAX), R); + ca += 1; + } else { + emit(OCopy, i1->cls, i1->to, TMP(XMM0), R); + ca += 1 << 2; + } + } + emit(OCall, i1->cls, R, i1->arg[0], CALL(ca)); + emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R); + + ni = ns = 0; + if (ra && aret.inmem) + emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */ + for (i=i0, a=ac; i<i1; i++, a++) { + if (a->inmem) + continue; + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OArgc) { + if (a->size > 8) { + r2 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + emit(OLoad, a->cls[1], r2, r, R); + emit(OAdd, Kl, r, i->arg[1], getcon(8, fn)); + chuse(i->arg[1], +1, fn); + } + emit(OLoad, a->cls[0], r1, i->arg[1], R); + } else + emit(OCopy, i->cls, r1, i->arg[0], R); + } + + if (!stk) + return; + + r = newtmp("abi", Kl, fn); + chuse(r, -1, fn); + for (i=i0, a=ac, off=0; i<i1; i++, a++) { + if (!a->inmem) + continue; + if (i->op == OArgc) { + if (a->align == 4) + off += off & 15; + blit(r, off, i->arg[1], a->size, fn); + } else { + r1 = newtmp("abi", Kl, fn); + emit(OStorel, 0, R, i->arg[0], r1); + emit(OAdd, Kl, r1, r, getcon(off, fn)); + chuse(r, +1, fn); + } + off += a->size; + } + emit(OSAlloc, Kl, r, getcon(stk, fn), R); +} + +static void +selpar(Fn *fn, Ins *i0, Ins *i1) +{ + AClass *ac, *a, aret; + Ins *i; + int ni, ns, s, al; + Ref r, r1; + + ac = alloc((i1-i0) * sizeof ac[0]); + curi = insb; + ni = ns = 0; + + if (fn->retty >= 0) { + aclass(&aret, &typ[fn->retty]); + if (aret.inmem) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl}; + fn->retr = r; + } + classify(i0, i1, ac, OPar, &aret); + } else + classify(i0, i1, ac, OPar, 0); + + assert(NAlign == 3); + + s = 4; + for (i=i0, a=ac; i<i1; i++, a++) { + switch (a->inmem) { + case 1: + assert(a->align <= 4); + if (a->align == 4) + s = (s+3) & -4; + fn->tmp[i->to.val].slot = -s; /* HACK! */ + s += a->size / 4; + continue; + case 2: + *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls}; + s += 2; + continue; + } + r1 = rarg(a->cls[0], &ni, &ns); + if (i->op == OParc) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[0] = r.val; + if (a->size > 8) { + r1 = rarg(a->cls[1], &ni, &ns); + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OCopy, r, {r1}, Kl}; + a->cls[1] = r.val; + } + } else + *curi++ = (Ins){OCopy, i->to, {r1}, i->cls}; + } + for (i=i0, a=ac; i<i1; i++, a++) { + if (i->op != OParc || a->inmem) + continue; + assert(NAlign == 3); + for (al=0; a->align >> (al+2); al++) + ; + r = TMP(a->cls[0]); + r1 = i->to; + *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl}; + *curi++ = (Ins){OStorel, R, {r, r1}, 0}; + if (a->size > 8) { + r = newtmp("abi", Kl, fn); + *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl}; + r1 = TMP(a->cls[1]); + *curi++ = (Ins){OStorel, R, {r1, r}, 0}; + } + } +} + +static int +aref(Ref r, ANum *ai) +{ + switch (rtype(r)) { + default: + diag("isel: aref defaulted"); + case RCon: + return 2; + case RTmp: + return ai[r.val].n; + } +} + +static int +ascale(Ref r, Con *con) +{ + int64_t n; + + if (rtype(r) != RCon) + return 0; + if (con[r.val].type != CBits) + return 0; + n = con[r.val].bits.i; + return n == 1 || n == 2 || n == 4 || n == 8; +} + +static void +anumber(ANum *ai, Blk *b, Con *con) +{ + /* This should be made obsolete by a proper + * reassoc pass. + * + * Rules: + * + * RTmp(_) -> 0 tmp + * ( RTmp(_) -> 1 slot ) + * RCon(_) -> 2 con + * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) + */ + static char add[10][10] = { + [2] [2] = 2, /* folding */ + [2] [5] = 5, [5] [2] = 5, + [2] [6] = 6, [6] [2] = 6, + [2] [7] = 7, [7] [2] = 7, + [0] [0] = 4, /* 4: b + s * i */ + [0] [3] = 4, [3] [0] = 4, + [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */ + [0] [2] = 6, [2] [0] = 6, /* 6: o + b */ + [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */ + [0] [5] = 7, [5] [0] = 7, + [6] [3] = 7, [3] [6] = 7, + + }; + int a, a1, a2, n1, n2, t1, t2; + Ins *i; + + for (i=b->ins; i-b->ins < b->nins; i++) { + if (rtype(i->to) == RTmp) + ai[i->to.val].i = i; + if (i->op != OAdd && i->op != OMul) + continue; + a1 = aref(i->arg[0], ai); + a2 = aref(i->arg[1], ai); + t1 = a1 != 1 && a1 != 2; + t2 = a2 != 1 && a2 != 2; + if (i->op == OAdd) { + a = add[n1 = a1][n2 = a2]; + if (t1 && a < add[0][a2]) + a = add[n1 = 0][n2 = a2]; + if (t2 && a < add[a1][0]) + a = add[n1 = a1][n2 = 0]; + if (t1 && t2 && a < add[0][0]) + a = add[n1 = 0][n2 = 0]; + } else { + n1 = n2 = a = 0; + if (ascale(i->arg[0], con) && t2) + a = 3, n1 = 2, n2 = 0; + if (t1 && ascale(i->arg[1], con)) + a = 3, n1 = 0, n2 = 2; + } + ai[i->to.val].n = a; + ai[i->to.val].l = n1; + ai[i->to.val].r = n2; + } +} + +static void +amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top) +{ + Ins *i; + int nl, nr, t, s; + Ref al, ar; + + if (top) + memset(a, 0, sizeof *a); + if (rtype(r) == RCon) { + addcon(&a->offset, &fn->con[r.val]); + return; + } + assert(rtype(r) == RTmp); + i = ai[r.val].i; + nl = ai[r.val].l; + nr = ai[r.val].r; + if (i) { + if (nl > nr) { + al = i->arg[1]; + ar = i->arg[0]; + t = nl, nl = nr, nr = t; + } else { + al = i->arg[0]; + ar = i->arg[1]; + } + } + switch (ai[r.val].n) { + default: + diag("isel: amatch defaulted"); + case 3: /* s * i */ + if (!top) { + a->index = al; + a->scale = fn->con[ar.val].bits.i; + } else + a->base = r; + break; + case 4: /* b + s * i */ + switch (nr) { + case 0: + if (fn->tmp[ar.val].slot != -1) { + al = i->arg[1]; + ar = i->arg[0]; + } + a->index = ar; + a->scale = 1; + break; + case 3: + amatch(a, ar, ai, fn, 0); + break; + } + r = al; + case 0: + s = fn->tmp[r.val].slot; + if (s != -1) + r = SLOT(s); + a->base = r; + break; + case 2: /* constants */ + case 5: /* o + s * i */ + case 6: /* o + b */ + case 7: /* o + b + s * i */ + amatch(a, ar, ai, fn, 0); + amatch(a, al, ai, fn, 0); + break; + } +} + +/* instruction selection + * requires use counts (as given by parsing) + */ +void +isel(Fn *fn) +{ + Blk *b, **sb; + Ins *i, *i0, *ip; + Phi *p; + uint a; + int n, al; + int64_t sz; + ANum *ainfo; + RAlloc *ral; + + for (n=0; n<fn->ntmp; n++) + fn->tmp[n].slot = -1; + fn->slot = 0; + + /* lower arguments */ + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (i->op != OPar && i->op != OParc) + break; + selpar(fn, b->ins, i); + n = b->nins - (i - b->ins) + (curi - insb); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, insb, curi - insb); + ip = icpy(ip, i, &b->ins[b->nins] - i); + b->nins = n; + b->ins = i0; + + /* lower function calls and returns */ + ral = 0; + b = fn->start; + do { + if (!(b = b->link)) + b = fn->start; /* do it last */ + curi = &insb[NIns]; + selret(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) { + if ((--i)->op == OCall) { + for (i0=i; i0>b->ins; i0--) + if ((i0-1)->op != OArg) + if ((i0-1)->op != OArgc) + break; + selcall(fn, i0, i, &ral); + i = i0; + continue; + } + assert(i->op != OArg && i->op != OArgc); + emiti(*i); + } + if (b == fn->start) + for (; ral; ral=ral->link) + emiti(ral->i); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } while (b != fn->start); + + if (debug['A']) { + fprintf(stderr, "\n> After call lowering:\n"); + printfn(fn, stderr); + } + + /* assign slots to fast allocs */ + b = fn->start; + assert(NAlign == 3 && "change n=4 and sz /= 4 below"); + for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2) + for (i=b->ins; i-b->ins < b->nins; i++) + if (i->op == al) { + if (rtype(i->arg[0]) != RCon) + break; + sz = fn->con[i->arg[0].val].bits.i; + if (sz < 0 || sz >= INT_MAX-3) + diag("isel: invalid alloc size"); + sz = (sz + n-1) & -n; + sz /= 4; + fn->tmp[i->to.val].slot = fn->slot; + fn->slot += sz; + *i = (Ins){.op = ONop}; + } + + /* process basic blocks */ + n = fn->ntmp; + ainfo = emalloc(n * sizeof ainfo[0]); + for (b=fn->start; b; b=b->link) { + curi = &insb[NIns]; + for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) + for (p=(*sb)->phi; p; p=p->link) { + for (a=0; p->blk[a] != b; a++) + assert(a+1 < p->narg); + fixarg(&p->arg[a], p->cls, 1, fn); + } + memset(ainfo, 0, n * sizeof ainfo[0]); + anumber(ainfo, b, fn->con); + seljmp(b, fn); + for (i=&b->ins[b->nins]; i!=b->ins;) + sel(*--i, ainfo, fn); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + free(ainfo); + + if (debug['I']) { + fprintf(stderr, "\n> After instruction selection:\n"); + printfn(fn, stderr); + } +} diff --git a/src/live.c b/src/live.c @@ -0,0 +1,174 @@ +#include "all.h" + +void +liveon(BSet *v, Blk *b, Blk *s) +{ + Phi *p; + uint a; + + bscopy(v, s->in); + for (p=s->phi; p; p=p->link) { + bsclr(v, p->to.val); + for (a=0; a<p->narg; a++) + if (p->blk[a] == b) + if (rtype(p->arg[a]) == RTmp) + bsset(v, p->arg[a].val); + } +} + +static int +phitmp(int t, Tmp *tmp) +{ + int tp; + + tp = tmp[t].phi; + return tp ? tp : t; +} + +static void +phifix(int t1, short *phi, Tmp *tmp) +{ + int t, t2; + + /* detect temporaries arguments + * of the same phi node that + * interfere and separate them + */ + t = phitmp(t1, tmp); + t2 = phi[t]; + if (t2 && t2 != t1) { + if (t != t1) { + tmp[t1].phi = t1; + t = t1; + } else { + tmp[t2].phi = t2; + phi[t2] = t2; + } + } + phi[t] = t1; +} + +static void +bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp) +{ + + if (rtype(r) != RTmp) + return; + bsset(b->gen, r.val); + phifix(r.val, phi, tmp); + if (!bshas(b->in, r.val)) { + nlv[KBASE(tmp[r.val].cls)]++; + bsset(b->in, r.val); + } +} + +/* liveness analysis + * requires rpo computation + */ +void +filllive(Fn *f) +{ + Blk *b; + Ins *i; + int k, t, m[2], n, chg, nlv[2]; + short *phi; + BSet u[1], v[1]; + Mem *ma; + + bsinit(u, f->ntmp); + bsinit(v, f->ntmp); + phi = emalloc(f->ntmp * sizeof phi[0]); + for (b=f->start; b; b=b->link) { + bsinit(b->in, f->ntmp); + bsinit(b->out, f->ntmp); + bsinit(b->gen, f->ntmp); + } + chg = 1; +Again: + for (n=f->nblk-1; n>=0; n--) { + b = f->rpo[n]; + + bscopy(u, b->out); + if (b->s1) { + liveon(v, b, b->s1); + bsunion(b->out, v); + } + if (b->s2) { + liveon(v, b, b->s2); + bsunion(b->out, v); + } + chg |= !bsequal(b->out, u); + + memset(phi, 0, f->ntmp * sizeof phi[0]); + memset(nlv, 0, sizeof nlv); + bscopy(b->in, b->out); + for (t=0; t<f->ntmp; t++) + if (bshas(b->in, t)) { + phifix(t, phi, f->tmp); + nlv[KBASE(f->tmp[t].cls)]++; + } + if (rtype(b->jmp.arg) == RACall) { + assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0); + b->in->t[0] |= retregs(b->jmp.arg, nlv); + } else + bset(b->jmp.arg, b, nlv, phi, f->tmp); + for (k=0; k<2; k++) + b->nlive[k] = nlv[k]; + for (i=&b->ins[b->nins]; i!=b->ins;) { + if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) { + b->in->t[0] &= ~retregs(i->arg[1], m); + for (k=0; k<2; k++) + nlv[k] -= m[k]; + if (nlv[0] + NISave > b->nlive[0]) + b->nlive[0] = nlv[0] + NISave; + if (nlv[1] + NFSave > b->nlive[1]) + b->nlive[1] = nlv[1] + NFSave; + b->in->t[0] |= argregs(i->arg[1], m); + for (k=0; k<2; k++) + nlv[k] += m[k]; + } + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + if (bshas(b->in, i->to.val)) + nlv[KBASE(f->tmp[t].cls)]--; + bsset(b->gen, t); + bsclr(b->in, t); + phi[phitmp(t, f->tmp)] = 0; + } + for (k=0; k<2; k++) + switch (rtype(i->arg[k])) { + case RAMem: + ma = &f->mem[i->arg[k].val & AMask]; + bset(ma->base, b, nlv, phi, f->tmp); + bset(ma->index, b, nlv, phi, f->tmp); + break; + default: + bset(i->arg[k], b, nlv, phi, f->tmp); + break; + } + for (k=0; k<2; k++) + if (nlv[k] > b->nlive[k]) + b->nlive[k] = nlv[k]; + } + } + if (chg) { + chg = 0; + goto Again; + } + free(phi); + + if (debug['L']) { + fprintf(stderr, "\n> Liveness analysis:\n"); + for (b=f->start; b; b=b->link) { + fprintf(stderr, "\t%-10sin: ", b->name); + dumpts(b->in, f->tmp, stderr); + fprintf(stderr, "\t out: "); + dumpts(b->out, f->tmp, stderr); + fprintf(stderr, "\t gen: "); + dumpts(b->gen, f->tmp, stderr); + fprintf(stderr, "\t live: "); + fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]); + } + } +} diff --git a/src/main.c b/src/main.c @@ -0,0 +1,117 @@ +#include "all.h" +#include <ctype.h> +#include <getopt.h> + +char debug['Z'+1] = { + ['P'] = 0, /* parsing */ + ['A'] = 0, /* abi lowering */ + ['I'] = 0, /* instruction selection */ + ['L'] = 0, /* liveness */ + ['M'] = 0, /* memory optimization */ + ['N'] = 0, /* ssa construction */ + ['C'] = 0, /* copy elimination */ + ['S'] = 0, /* spilling */ + ['R'] = 0, /* reg. allocation */ +}; + +static FILE *outf; +static int dbg; + +static void +data(Dat *d) +{ + if (dbg) + return; + if (d->type == DEnd) { + fputs("/* end data */\n\n", outf); + freeall(); + } + emitdat(d, outf); +} + +static void +func(Fn *fn) +{ + int n; + + if (dbg) + fprintf(stderr, "**** Function %s ****", fn->name); + if (debug['P']) { + fprintf(stderr, "\n> After parsing:\n"); + printfn(fn, stderr); + } + fillrpo(fn); + fillpreds(fn); + filluse(fn); + memopt(fn); + ssa(fn); + filluse(fn); + copy(fn); + filluse(fn); + isel(fn); + filllive(fn); + fillcost(fn); + spill(fn); + rega(fn); + fillrpo(fn); + assert(fn->rpo[0] == fn->start); + for (n=0;; n++) + if (n == fn->nblk-1) { + fn->rpo[n]->link = 0; + break; + } else + fn->rpo[n]->link = fn->rpo[n+1]; + if (!dbg) { + emitfn(fn, outf); + fprintf(outf, "/* end function %s */\n\n", fn->name); + } else + fprintf(stderr, "\n"); + freeall(); +} + +int +main(int ac, char *av[]) +{ + FILE *inf; + char *f; + int c; + + outf = stdout; + while ((c = getopt(ac, av, "d:o:")) != -1) + switch (c) { + case 'd': + for (; *optarg; optarg++) + if (isalpha(*optarg)) { + debug[toupper(*optarg)] = 1; + dbg = 1; + } + break; + case 'o': + if (strcmp(optarg, "-") != 0) + outf = fopen(optarg, "w"); + break; + default: + fprintf(stderr, "usage: %s [-d <flags>] [-o out] {file.ssa, -}\n", av[0]); + exit(1); + } + + do { + f = av[optind]; + if (!f || strcmp(f, "-") == 0) { + inf = stdin; + f = "-"; + } else { + inf = fopen(f, "r"); + if (!inf) { + fprintf(stderr, "cannot open '%s'\n", f); + exit(1); + } + } + parse(inf, f, data, func); + } while (++optind < ac); + + if (!dbg) + emitfin(outf); + + exit(0); +} diff --git a/src/mem.c b/src/mem.c @@ -0,0 +1,81 @@ +#include "all.h" + +/* Memory optimization: + * + * - replace alloced slots used only in + * load/store operations + * Assumption: all the accesses have the + * same size (this could be wrong...) + */ + +/* require use, maintains use counts */ +void +memopt(Fn *fn) +{ + Blk *b; + Ins *i, *l; + Tmp *t; + Use *u, *ue; + int a; + + b = fn->start; + for (i=b->ins; i-b->ins < b->nins; i++) { + if (OAlloc > i->op || i->op > OAlloc1) + continue; + assert(NAlign == 3); + assert(rtype(i->to) == RTmp); + t = &fn->tmp[i->to.val]; + for (u=t->use; u != &t->use[t->nuse]; u++) { + if (u->type != UIns) + goto NextIns; + l = u->u.ins; + if (!isload(l->op) + && (!isstore(l->op) || req(i->to, l->arg[0]))) + goto NextIns; + } + /* get rid of the alloc and replace uses */ + *i = (Ins){.op = ONop}; + t->ndef--; + ue = &t->use[t->nuse]; + for (u=t->use; u!=ue; u++) { + l = u->u.ins; + if (isstore(l->op)) { + if (l->op == OStores) + l->cls = Kd; + else if (l->op == OStored) + l->cls = Kd; + else if (l->op == OStorel) + l->cls = Kl; + else + l->cls = Kw; + l->op = OCopy; + l->to = l->arg[1]; + l->arg[1] = R; + t->nuse--; + t->ndef++; + } else + /* try to turn loads into copies so we + * can eliminate them later */ + switch(l->op) { + case OLoad: + l->op = OCopy; + break; + case OLoadsw: + case OLoaduw: + l->cls = Kw; + l->op = OCopy; + break; + default: + /* keep l->cls */ + a = l->op - OLoadsw; + l->op = OExtsw + a; + break; + } + } + NextIns:; + } + if (debug['M']) { + fprintf(stderr, "\n> After memory optimization:\n"); + printfn(fn, stderr); + } +} diff --git a/src/parse.c b/src/parse.c @@ -0,0 +1,1081 @@ +#include "all.h" +#include <ctype.h> +#include <stdarg.h> + +enum { + Kx = -1, /* Invalid operand */ + Km = Kl, /* Memory pointer (for x64) */ +}; + +OpDesc opdesc[NOp] = { +#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d} + + /* NAME NM ARGCLS0 ARGCLS1 SF LF */ + [OAdd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OSub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [ODiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [ORem] = { "rem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, + [OUDiv] = { "udiv", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OURem] = { "urem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 }, + [OMul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OAnd] = { "and", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OOr] = { "or", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OXor] = { "xor", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OSar] = { "sar", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OShr] = { "shr", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OShl] = { "shl", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 }, + [OStored] = { "stored", 0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 }, + [OStores] = { "stores", 0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 }, + [OStorel] = { "storel", 0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 }, + [OStorew] = { "storew", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OStoreh] = { "storeh", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OStoreb] = { "storeb", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 }, + [OLoad] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 }, + [OLoadsw] = { "loadsw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoaduw] = { "loaduw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadsh] = { "loadsh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoaduh] = { "loaduh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadsb] = { "loadsb", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OLoadub] = { "loadub", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OExtsw] = { "extsw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtuw] = { "extuw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtsh] = { "extsh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtuh] = { "extuh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtsb] = { "extsb", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExtub] = { "extub", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OExts] = { "exts", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 }, + [OTruncd] = { "truncd", 0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 }, + [OFtosi] = { "ftosi", 0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 }, + [OSitof] = { "sitof", 0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 }, + [OCast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 }, + [OCopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 }, + [ONop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, + [OSwap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 }, + [OSign] = { "sign", 0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OSAlloc] = { "salloc", 0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OXDiv] = { "xdiv", 1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 }, + [OXCmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 }, + [OXTest] = { "xtest", 1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 }, + [OAddr] = { "addr", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 }, + [OPar] = { "parn", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OParc] = { "parc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OArg] = { "arg", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OArgc] = { "argc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OCall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 }, + [OXSetnp] = { "xsetnp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OXSetp] = { "xsetp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 }, + [OAlloc] = { "alloc4", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, + [OAlloc+1] = { "alloc8", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, + [OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 }, +#define X(c) \ + [OCmpw+IC##c] = { "c" #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \ + [OCmpl+IC##c] = { "c" #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \ + [OXSet+IC##c] = { "xset" #c, 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 }, + ICMPS(X) +#undef X +#define X(c) \ + [OCmps+FC##c] = { "c" #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \ + [OCmpd+FC##c] = { "c" #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 }, + FCMPS(X) +#undef X + +}; +#undef A + +typedef enum { + PXXX, + PLbl, + PPhi, + PIns, + PEnd, +} PState; + +enum { + TXXX = NPubOp, + TCall, + TPhi, + TJmp, + TJnz, + TRet, + TFunc, + TType, + TData, + TAlign, + TL, + TW, + TH, + TB, + TD, + TS, + TZ, + + TInt, + TFlts, + TFltd, + TTmp, + TLbl, + TGlo, + TTyp, + TStr, + + TPlus, + TEq, + TComma, + TLParen, + TRParen, + TLBrace, + TRBrace, + TNL, + TEOF, +}; + + +static FILE *inf; +static char *inpath; +static int thead; +static struct { + char chr; + double fltd; + float flts; + int64_t num; + char *str; +} tokval; +static int lnum; + +static Tmp *tmp; +static Con *con; +static int ntmp; +static int ncon; +static Phi **plink; +static Blk **bmap; +static Blk *curb; +static Blk **blink; +static int nblk; +static int rcls; +static int ntyp; + + +void +err(char *s, ...) +{ + char buf[100], *p, *end; + va_list ap; + + p = buf; + end = buf + sizeof(buf); + + va_start(ap, s); + p += snprintf(p, end - p, "%s:%d: ", inpath, lnum); + p += vsnprintf(p, end - p, s, ap); + va_end(ap); + + diag(buf); +} + +static int +lex() +{ + static struct { + char *str; + int tok; + } tmap[] = { + { "call", TCall }, + { "phi", TPhi }, + { "jmp", TJmp }, + { "jnz", TJnz }, + { "ret", TRet }, + { "function", TFunc }, + { "type", TType }, + { "data", TData }, + { "align", TAlign }, + { "l", TL }, + { "w", TW }, + { "h", TH }, + { "b", TB }, + { "d", TD }, + { "s", TS }, + { "z", TZ }, + { "loadw", OLoad }, /* for convenience */ + { "loadl", OLoad }, + { "loads", OLoad }, + { "loadd", OLoad }, + { "alloc1", OAlloc }, + { "alloc2", OAlloc }, + { 0, TXXX } + }; + static char tok[NString]; + int c, i; + int t; + + do + c = fgetc(inf); + while (isblank(c)); + t = TXXX; + tokval.chr = c; + switch (c) { + case EOF: + return TEOF; + case ',': + return TComma; + case '(': + return TLParen; + case ')': + return TRParen; + case '{': + return TLBrace; + case '}': + return TRBrace; + case '=': + return TEq; + case '+': + return TPlus; + case 's': + if (fscanf(inf, "_%f", &tokval.flts) != 1) + break; + return TFlts; + case 'd': + if (fscanf(inf, "_%lf", &tokval.fltd) != 1) + break; + return TFltd; + case '%': + t = TTmp; + goto Alpha; + case '@': + t = TLbl; + goto Alpha; + case '$': + t = TGlo; + goto Alpha; + case ':': + t = TTyp; + goto Alpha; + case '#': + while (fgetc(inf) != '\n') + ; + case '\n': + lnum++; + return TNL; + } + if (isdigit(c) || c == '-' || c == '+') { + ungetc(c, inf); + if (fscanf(inf, "%"SCNd64, &tokval.num) != 1) + err("invalid integer literal"); + return TInt; + } + if (c == '"') { + tokval.str = vnew(0, 1); + for (i=0;; i++) { + c = fgetc(inf); + vgrow(&tokval.str, i+1); + if (c == '"') + if (!i || tokval.str[i-1] != '\\') { + tokval.str[i] = 0; + return TStr; + } + tokval.str[i] = c; + } + } + if (0) +Alpha: c = fgetc(inf); + if (!isalpha(c) && c != '.' && c != '_') + err("lexing failure: invalid character %c (%d)", c, c); + i = 0; + do { + if (i >= NString-1) + err("identifier too long"); + tok[i++] = c; + c = fgetc(inf); + } while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c)); + tok[i] = 0; + ungetc(c, inf); + tokval.str = tok; + if (t != TXXX) { + return t; + } + for (i=0; i<NPubOp; i++) + if (opdesc[i].name) + if (strcmp(tok, opdesc[i].name) == 0) + return i; + for (i=0; tmap[i].str; i++) + if (strcmp(tok, tmap[i].str) == 0) + return tmap[i].tok; + err("unknown keyword %s", tokval.str); + return TXXX; +} + +static int +peek() +{ + if (thead == TXXX) + thead = lex(); + return thead; +} + +static int +next() +{ + int t; + + t = peek(); + thead = TXXX; + return t; +} + +static int +nextnl() +{ + int t; + + while ((t = next()) == TNL) + ; + return t; +} + +static void +expect(int t) +{ + static char *ttoa[] = { + [TLbl] = "label", + [TComma] = ",", + [TEq] = "=", + [TNL] = "newline", + [TLParen] = "(", + [TRParen] = ")", + [TLBrace] = "{", + [TRBrace] = "}", + [TEOF] = 0, + }; + char buf[128], *s1, *s2; + int t1; + + t1 = next(); + if (t == t1) + return; + s1 = ttoa[t] ? ttoa[t] : "??"; + s2 = ttoa[t1] ? ttoa[t1] : "??"; + sprintf(buf, "%s expected, got %s instead", s1, s2); + err(buf); +} + +static Ref +tmpref(char *v) +{ + int t; + + for (t=Tmp0; t<ntmp; t++) + if (strcmp(v, tmp[t].name) == 0) + return TMP(t); + vgrow(&tmp, ++ntmp); + strcpy(tmp[t].name, v); + return TMP(t); +} + +static Ref +parseref() +{ + Con c; + int i; + + memset(&c, 0, sizeof c); + switch (next()) { + case TTmp: + return tmpref(tokval.str); + case TInt: + c.type = CBits; + c.bits.i = tokval.num; + goto Look; + case TFlts: + c.type = CBits; + c.bits.s = tokval.flts; + c.flt = 1; + goto Look; + case TFltd: + c.type = CBits; + c.bits.d = tokval.fltd; + c.flt = 2; + goto Look; + case TGlo: + c.type = CAddr; + strcpy(c.label, tokval.str); + Look: + for (i=0; i<ncon; i++) + if (con[i].type == c.type + && con[i].bits.i == c.bits.i + && strcmp(con[i].label, c.label) == 0) + return CON(i); + vgrow(&con, ++ncon); + con[i] = c; + return CON(i); + default: + return R; + } +} + +static int +parsecls(int *tyn) +{ + int i; + + switch (next()) { + default: + err("invalid class specifier"); + case TTyp: + for (i=0; i<ntyp; i++) + if (strcmp(tokval.str, typ[i].name) == 0) { + *tyn = i; + return 4; + } + err("undefined type"); + case TW: + return Kw; + case TL: + return Kl; + case TS: + return Ks; + case TD: + return Kd; + } +} + +static void +parserefl(int arg) +{ + int k, t, ty; + Ref r; + + expect(TLParen); + if (peek() == TRParen) { + next(); + return; + } + for (;;) { + if (curi - insb >= NIns) + err("too many instructions (1)"); + k = parsecls(&ty); + r = parseref(); + if (req(r, R)) + err("invalid reference argument"); + if (!arg && rtype(r) != RTmp) + err("invalid function parameter"); + if (k == 4) + if (arg) + *curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl}; + else + *curi = (Ins){OParc, r, {TYPE(ty)}, Kl}; + else + if (arg) + *curi = (Ins){OArg, R, {r}, k}; + else + *curi = (Ins){OPar, r, {R}, k}; + curi++; + t = next(); + if (t == TRParen) + break; + if (t != TComma) + err(", or ) expected"); + } +} + +static Blk * +findblk(char *name) +{ + int i; + + for (i=0; i<nblk; i++) + if (strcmp(bmap[i]->name, name) == 0) + return bmap[i]; + vgrow(&bmap, ++nblk); + bmap[i] = blknew(); + strcpy(bmap[i]->name, name); + return bmap[i]; +} + +static void +closeblk() +{ + curb->nins = curi - insb; + idup(&curb->ins, insb, curb->nins); + blink = &curb->link; + curi = insb; +} + +static PState +parseline(PState ps) +{ + Ref arg[NPred] = {R}; + Blk *blk[NPred]; + Phi *phi; + Ref r; + Blk *b; + int t, op, i, k, ty; + + t = nextnl(); + if (ps == PLbl && t != TLbl && t != TRBrace) + err("label or } expected"); + switch (t) { + default: + if (isstore(t)) { + /* operations without result */ + r = R; + k = 0; + op = t; + goto DoOp; + } + err("label, instruction or jump expected"); + case TRBrace: + return PEnd; + case TTmp: + break; + case TLbl: + b = findblk(tokval.str); + if (b->jmp.type != JXXX) + err("multiple definitions of block"); + if (curb && curb->jmp.type == JXXX) { + closeblk(); + curb->jmp.type = JJmp; + curb->s1 = b; + } + *blink = b; + curb = b; + plink = &curb->phi; + expect(TNL); + return PPhi; + case TRet: + curb->jmp.type = (int[]){ + JRetw, JRetl, + JRets, JRetd, + JRetc, JRet0 + }[rcls]; + if (rcls < 5) { + r = parseref(); + if (req(r, R)) + err("return value expected"); + curb->jmp.arg = r; + } + goto Close; + case TJmp: + curb->jmp.type = JJmp; + goto Jump; + case TJnz: + curb->jmp.type = JJnz; + r = parseref(); + if (req(r, R)) + err("invalid argument for jnz jump"); + curb->jmp.arg = r; + expect(TComma); + Jump: + expect(TLbl); + curb->s1 = findblk(tokval.str); + if (curb->jmp.type != JJmp) { + expect(TComma); + expect(TLbl); + curb->s2 = findblk(tokval.str); + } + Close: + expect(TNL); + closeblk(); + return PLbl; + } + r = tmpref(tokval.str); + expect(TEq); + k = parsecls(&ty); + op = next(); +DoOp: + if (op == TPhi) { + if (ps != PPhi) + err("unexpected phi instruction"); + op = -1; + } + if (op == TCall) { + arg[0] = parseref(); + parserefl(1); + expect(TNL); + op = OCall; + if (k == 4) { + k = Kl; + arg[1] = TYPE(ty); + } else + arg[1] = R; + goto Ins; + } + if (k == 4) + err("size class must be w, l, s, or d"); + if (op >= NPubOp) + err("invalid instruction"); + i = 0; + if (peek() != TNL) + for (;;) { + if (i == NPred) + err("too many arguments"); + if (op == -1) { + expect(TLbl); + blk[i] = findblk(tokval.str); + } + arg[i] = parseref(); + if (req(arg[i], R)) + err("invalid instruction argument"); + i++; + t = peek(); + if (t == TNL) + break; + if (t != TComma) + err(", or end of line expected"); + next(); + } + next(); + if (op != -1) { + Ins: + if (curi - insb >= NIns) + err("too many instructions (2)"); + curi->op = op; + curi->cls = k; + curi->to = r; + curi->arg[0] = arg[0]; + curi->arg[1] = arg[1]; + curi++; + return PIns; + } else { + phi = alloc(sizeof *phi); + phi->to = r; + phi->cls = k; + memcpy(phi->arg, arg, i * sizeof arg[0]); + memcpy(phi->blk, blk, i * sizeof blk[0]); + phi->narg = i; + *plink = phi; + plink = &phi->link; + return PPhi; + } +} + +static Fn * +parsefn() +{ + PState ps; + Fn *fn; + + ntmp = Tmp0; + ncon = 1; /* first constant must be 0 */ + curb = 0; + nblk = 0; + curi = insb; + tmp = vnew(ntmp, sizeof tmp[0]); + con = vnew(ncon, sizeof con[0]); + bmap = vnew(nblk, sizeof bmap[0]); + con[0].type = CBits; + fn = alloc(sizeof *fn); + blink = &fn->start; + fn->retty = -1; + if (peek() != TGlo) + rcls = parsecls(&fn->retty); + else + rcls = 5; + if (next() != TGlo) + err("function name expected"); + strcpy(fn->name, tokval.str); + parserefl(0); + if (nextnl() != TLBrace) + err("function body must start with {"); + ps = PLbl; + do + ps = parseline(ps); + while (ps != PEnd); + if (!curb) + err("empty file"); + if (curb->jmp.type == JXXX) + err("last block misses jump"); + fn->tmp = tmp; + fn->con = con; + fn->mem = vnew(0, sizeof fn->mem[0]); + fn->ntmp = ntmp; + fn->ncon = ncon; + fn->nmem = 0; + fn->nblk = nblk; + fn->rpo = 0; + return fn; +} + +static void +parsetyp() +{ + Typ *ty; + int t, n, sz, al, s, a, c, flt; + + if (ntyp >= NTyp) + err("too many type definitions"); + ty = &typ[ntyp++]; + ty->align = -1; + if (nextnl() != TTyp || nextnl() != TEq) + err("type name, then = expected"); + strcpy(ty->name, tokval.str); + t = nextnl(); + if (t == TAlign) { + if (nextnl() != TInt) + err("alignment expected"); + for (al=0; tokval.num /= 2; al++) + ; + ty->align = al; + t = nextnl(); + } + if (t != TLBrace) + err("type body must start with {"); + t = nextnl(); + if (t == TInt) { + ty->dark = 1; + ty->size = tokval.num; + if (ty->align == -1) + err("dark types need alignment"); + t = nextnl(); + } else { + ty->dark = 0; + n = -1; + sz = 0; + al = 0; + for (;;) { + flt = 0; + switch (t) { + default: err("invalid size specifier %c", tokval.chr); + case TD: flt = 1; + case TL: s = 8; a = 3; break; + case TS: flt = 1; + case TW: s = 4; a = 2; break; + case TH: s = 2; a = 1; break; + case TB: s = 1; a = 0; break; + } + if (a > al) + al = a; + if ((a = sz & (s-1))) { + a = s - a; + if (++n < NSeg) { + /* padding segment */ + ty->seg[n].ispad = 1; + ty->seg[n].len = a; + } + } + t = nextnl(); + if (t == TInt) { + c = tokval.num; + t = nextnl(); + } else + c = 1; + while (c-- > 0) { + if (++n < NSeg) { + ty->seg[n].isflt = flt; + ty->seg[n].ispad = 0; + ty->seg[n].len = s; + } + sz += a + s; + } + if (t != TComma) + break; + t = nextnl(); + } + if (++n >= NSeg) + ty->dark = 1; + else + ty->seg[n].len = 0; + if (ty->align == -1) + ty->align = al; + else + al = ty->align; + a = (1 << al) - 1; + ty->size = (sz + a) & ~a; + } + if (t != TRBrace) + err("expected closing }"); +} + +static void +parsedatref(Dat *d) +{ + int t; + + d->isref = 1; + d->u.ref.nam = tokval.str; + d->u.ref.off = 0; + t = peek(); + if (t == TPlus) { + next(); + if (next() != TInt) + err("invalid token after offset in ref"); + d->u.ref.off = tokval.num; + } +} + +static void +parsedatstr(Dat *d) +{ + d->isstr = 1; + d->u.str = tokval.str; +} + +static void +parsedat(void cb(Dat *)) +{ + char s[NString]; + int t; + Dat d; + + d.type = DStart; + d.isstr = 0; + d.isref = 0; + cb(&d); + if (nextnl() != TGlo || nextnl() != TEq) + err("data name, then = expected"); + strcpy(s, tokval.str); + t = nextnl(); + if (t == TAlign) { + if (nextnl() != TInt) + err("alignment expected"); + d.type = DAlign; + d.u.num = tokval.num; + cb(&d); + t = nextnl(); + } + d.type = DName; + d.u.str = s; + cb(&d); + + if (t != TLBrace) + err("expected data contents in { .. }"); + for (;;) { + switch (nextnl()) { + default: err("invalid size specifier %c in data", tokval.chr); + case TRBrace: goto Done; + case TL: d.type = DL; break; + case TW: d.type = DW; break; + case TH: d.type = DH; break; + case TB: d.type = DB; break; + case TS: d.type = DW; break; + case TD: d.type = DL; break; + case TZ: d.type = DZ; break; + } + t = nextnl(); + do { + d.isref = 0; + d.isstr = 0; + memset(&d.u, 0, sizeof d.u); + if (t == TFlts) + d.u.flts = tokval.flts; + else if (t == TFltd) + d.u.fltd = tokval.fltd; + else if (t == TInt) + d.u.num = tokval.num; + else if (t == TGlo) + parsedatref(&d); + else if (t == TStr) + parsedatstr(&d); + else + err("constant literal expected"); + cb(&d); + t = nextnl(); + } while (t == TInt || t == TFlts || t == TFltd); + if (t == TRBrace) + break; + if (t != TComma) + err(", or } expected"); + } +Done: + d.type = DEnd; + cb(&d); +} + +void +parse(FILE *f, char *path, void data(Dat *), void func(Fn *)) +{ + inf = f; + inpath = path; + lnum = 1; + thead = TXXX; + ntyp = 0; + for (;;) + switch (nextnl()) { + case TFunc: + func(parsefn()); + break; + case TType: + parsetyp(); + break; + case TData: + parsedat(data); + break; + case TEOF: + return; + default: + err("top-level definition expected"); + break; + } +} + +static void +printcon(Con *c, FILE *f) +{ + switch (c->type) { + case CUndef: + break; + case CAddr: + fprintf(f, "$%s", c->label); + if (c->bits.i) + fprintf(f, "%+"PRIi64, c->bits.i); + break; + case CBits: + if (c->flt == 1) + fprintf(f, "s_%f", c->bits.s); + else if (c->flt == 2) + fprintf(f, "d_%lf", c->bits.d); + else + fprintf(f, "%"PRIi64, c->bits.i); + break; + } +} + +void +printref(Ref r, Fn *fn, FILE *f) +{ + int i; + Mem *m; + + switch (rtype(r)) { + case RTmp: + if (r.val < Tmp0) + fprintf(f, "R%d", r.val); + else + fprintf(f, "%%%s", fn->tmp[r.val].name); + break; + case RCon: + printcon(&fn->con[r.val], f); + break; + case RSlot: + fprintf(f, "S%d", r.val); + break; + case RACall: + fprintf(f, "%03x", r.val & AMask); + break; + case RAType: + fprintf(f, ":%s", typ[r.val & AMask].name); + break; + case RAMem: + i = 0; + m = &fn->mem[r.val & AMask]; + fputc('[', f); + if (m->offset.type != CUndef) { + printcon(&m->offset, f); + i = 1; + } + if (!req(m->base, R)) { + if (i) + fprintf(f, " + "); + printref(m->base, fn, f); + i = 1; + } + if (!req(m->index, R)) { + if (i) + fprintf(f, " + "); + fprintf(f, "%d * ", m->scale); + printref(m->index, fn, f); + } + fputc(']', f); + break; + } +} + +void +printfn(Fn *fn, FILE *f) +{ + static char *jtoa[NJmp] = { + [JRet0] = "ret", + [JRetw] = "retw", + [JRetl] = "retl", + [JRetc] = "retc", + [JRets] = "rets", + [JRetd] = "retd", + [JJnz] = "jnz", + [JXJnp] = "xjnp", + [JXJp] = "xjp", + #define X(c) [JXJc+IC##c] = "xj" #c, + ICMPS(X) + #undef X + }; + static char prcls[NOp] = { + [OArg] = 1, + [OSwap] = 1, + [OXCmp] = 1, + [OXTest] = 1, + [OXDiv] = 1, + [OXIDiv] = 1, + }; + static char ktoc[] = "wlsd"; + Blk *b; + Phi *p; + Ins *i; + uint n; + + fprintf(f, "function $%s() {\n", fn->name); + for (b=fn->start; b; b=b->link) { + fprintf(f, "@%s\n", b->name); + for (p=b->phi; p; p=p->link) { + fprintf(f, "\t"); + printref(p->to, fn, f); + fprintf(f, " =%c phi ", ktoc[p->cls]); + assert(p->narg); + for (n=0;; n++) { + fprintf(f, "@%s ", p->blk[n]->name); + printref(p->arg[n], fn, f); + if (n == p->narg-1) { + fprintf(f, "\n"); + break; + } else + fprintf(f, ", "); + } + } + for (i=b->ins; i-b->ins < b->nins; i++) { + fprintf(f, "\t"); + if (!req(i->to, R)) { + printref(i->to, fn, f); + fprintf(f, " =%c ", ktoc[i->cls]); + } + assert(opdesc[i->op].name); + fprintf(f, "%s", opdesc[i->op].name); + if (req(i->to, R) && prcls[i->op]) + fputc(ktoc[i->cls], f); + if (!req(i->arg[0], R)) { + fprintf(f, " "); + printref(i->arg[0], fn, f); + } + if (!req(i->arg[1], R)) { + fprintf(f, ", "); + printref(i->arg[1], fn, f); + } + fprintf(f, "\n"); + } + switch (b->jmp.type) { + case JRet0: + case JRetw: + case JRetl: + case JRets: + case JRetd: + case JRetc: + fprintf(f, "\t%s", jtoa[b->jmp.type]); + if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) { + fprintf(f, " "); + printref(b->jmp.arg, fn, f); + } + if (b->jmp.type == JRetc) + fprintf(f, ", :%s", typ[fn->retty].name); + fprintf(f, "\n"); + break; + case JJmp: + if (b->s1 != b->link) + fprintf(f, "\tjmp @%s\n", b->s1->name); + break; + default: + fprintf(f, "\t%s ", jtoa[b->jmp.type]); + if (b->jmp.type == JJnz) { + printref(b->jmp.arg, fn, f); + fprintf(f, ", "); + } + fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name); + break; + } + } + fprintf(f, "}\n"); +} diff --git a/src/rega.c b/src/rega.c @@ -0,0 +1,598 @@ +#include "all.h" + +#ifdef TEST_PMOV + #undef assert + #define assert(x) assert_test(#x, x) +#endif + +typedef struct RMap RMap; + +struct RMap { + int t[NIReg+NFReg]; + int r[NIReg+NFReg]; + BSet b[1]; + int n; +}; + +static bits regu; /* registers used */ +static Tmp *tmp; /* function temporaries */ +static Mem *mem; /* function mem references */ +static struct { + Ref src, dst; + int cls; +} *pm; /* parallel move constructed */ +static int cpm, npm; /* capacity and size of pm */ + +static int * +hint(int t) +{ + return &tmp[phicls(t, tmp)].hint.r; +} + +static void +sethint(int t, int r) +{ + bits m; + + m = tmp[phicls(t, tmp)].hint.m; + if (*hint(t) == -1) + if (!(BIT(r) & m)) + *hint(t) = r; +} + +static void +rcopy(RMap *ma, RMap *mb) +{ + memcpy(ma->t, mb->t, sizeof ma->t); + memcpy(ma->r, mb->r, sizeof ma->r); + bscopy(ma->b, mb->b); + ma->n = mb->n; +} + +static int +rfind(RMap *m, int t) +{ + int i; + + for (i=0; i<m->n; i++) + if (m->t[i] == t) + return m->r[i]; + return -1; +} + +static Ref +rref(RMap *m, int t) +{ + int r, s; + + r = rfind(m, t); + if (r == -1) { + s = tmp[t].slot; + assert(s != -1 && "should have spilled"); + return SLOT(s); + } else + return TMP(r); +} + +static void +radd(RMap *m, int t, int r) +{ + assert((t >= Tmp0 || t == r) && "invalid temporary"); + assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register"); + assert(!bshas(m->b, t) && "temporary has mapping"); + assert(!bshas(m->b, r) && "register already allocated"); + assert(m->n <= NIReg+NFReg && "too many mappings"); + bsset(m->b, t); + bsset(m->b, r); + m->t[m->n] = t; + m->r[m->n] = r; + m->n++; + regu |= BIT(r); +} + +static Ref +ralloc(RMap *m, int t) +{ + bits regs; + int r, r0, r1; + + if (t < Tmp0) { + assert(bshas(m->b, t)); + return TMP(t); + } + if (bshas(m->b, t)) { + r = rfind(m, t); + assert(r != -1); + return TMP(r); + } + r = *hint(t); + if (r == -1 || bshas(m->b, r)) { + regs = tmp[phicls(t, tmp)].hint.m; + regs |= m->b->t[0]; + switch (KBASE(tmp[t].cls)) { + case 0: + r0 = RAX; + r1 = RAX + NIReg; + break; + case 1: + r0 = XMM0; + r1 = XMM0 + NFReg; + break; + } + for (r=r0; r<r1; r++) + if (!(regs & BIT(r))) + goto Found; + for (r=r0; r<r1; r++) + if (!bshas(m->b, r)) + goto Found; + diag("rega: no more regs"); + } +Found: + radd(m, t, r); + sethint(t, r); + return TMP(r); +} + +static int +rfree(RMap *m, int t) +{ + int i, r; + + if (!bshas(m->b, t)) + return -1; + for (i=0; m->t[i] != t; i++) + assert(i+1 < m->n); + r = m->r[i]; + bsclr(m->b, t); + bsclr(m->b, r); + m->n--; + memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); + memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); + return r; +} + +static void +mdump(RMap *m) +{ + int i; + + for (i=0; i<m->n; i++) + fprintf(stderr, " (%s, R%d)", + tmp[m->t[i]].name, + m->r[i]); + fprintf(stderr, "\n"); +} + +static void +pmadd(Ref src, Ref dst, int k) +{ + if (npm == cpm) { + cpm = cpm * 2 + 16; + pm = realloc(pm, cpm * sizeof pm[0]); + if (!pm) + diag("pmadd: out of memory"); + } + pm[npm].src = src; + pm[npm].dst = dst; + pm[npm].cls = k; + npm++; +} + +enum PMStat { ToMove, Moving, Moved }; + +static Ref +pmrec(enum PMStat *status, int i, int *k) +{ + Ref swp, swp1; + int j, k1; + + /* note, this routine might emit + * too many large instructions: + * + * , x -- x + * x -- x -- x | + * ` x -- x + * + * if only the first move is wide + * the whole cycle will be wide, + * this is safe but not necessary + */ + + if (req(pm[i].src, pm[i].dst)) + return R; + status[i] = Moving; + assert(KBASE(*k) == KBASE(pm[i].cls)); + assert((Kw|1) == Kl && (Ks|1) == Kd); + *k |= KWIDE(pm[i].cls); /* see above */ + swp = R; + for (j=0; j<npm; j++) { + if (req(pm[j].src, pm[i].dst)) + switch (status[j]) { + case ToMove: + k1 = *k; + swp1 = pmrec(status, j, &k1); + if (!req(swp1, R)) { + assert(req(swp, R)); + swp = swp1; + *k = k1; + } + break; + case Moving: + assert(req(swp, R)); + swp = pm[i].dst; + break; + case Moved: + break; + } + } + status[i] = Moved; + if (req(swp, R)) { + *curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls}; + return R; + } else if (!req(swp, pm[i].src)) { + *curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k}; + return swp; + } else + return R; + +} + +static void +pmgen() +{ + int i, k; + enum PMStat *status; + + status = alloc(npm * sizeof status[0]); + assert(!npm || status[npm-1] == ToMove); + curi = insb; + for (i=0; i<npm; i++) + if (status[i] == ToMove) { + k = pm[i].cls; + pmrec(status, i, &k); + } +} + +static void +move(int r, Ref to, RMap *m) +{ + int n, t, r1; + + r1 = req(to, R) ? -1 : rfree(m, to.val); + if (bshas(m->b, r) && r1 != r) { + /* r is used and not by to */ + for (n=0; m->r[n] != r; n++) + assert(n+1 < m->n); + t = m->t[n]; + rfree(m, t); + bsset(m->b, r); + ralloc(m, t); + bsclr(m->b, r); + } + t = req(to, R) ? r : to.val; + radd(m, t, r); +} + +static int +regcpy(Ins *i) +{ + return i->op == OCopy && isreg(i->arg[0]); +} + +static Ins * +dopm(Blk *b, Ins *i, RMap *m) +{ + RMap m0; + int n, r, r1, t, s; + Ins *i0, *i1, *ip, *ir; + bits def; + + m0 = *m; + i1 = ++i; + do { + i--; + move(i->arg[0].val, i->to, m); + } while (i != b->ins && regcpy(i-1)); + assert(m0.n <= m->n); + if (i != b->ins && (i-1)->op == OCall) { + def = retregs((i-1)->arg[1], 0); + for (r=0; r<NRSave; r++) + if (!(BIT(rsave[r]) & def)) + move(rsave[r], R, m); + } + for (npm=0, n=0; n<m->n; n++) { + t = m->t[n]; + s = tmp[t].slot; + r1 = m->r[n]; + r = rfind(&m0, t); + if (r != -1) + pmadd(TMP(r1), TMP(r), tmp[t].cls); + else if (s != -1) + pmadd(TMP(r1), SLOT(s), tmp[t].cls); + } + for (ip=i; ip<i1; ip++) { + if (!req(ip->to, R)) + rfree(m, ip->to.val); + r = ip->arg[0].val; + if (rfind(m, r) == -1) + radd(m, r, r); + } + pmgen(); +#ifdef TEST_PMOV + return 0; +#endif + n = b->nins - (i1 - i) + (curi - insb); + i0 = alloc(n * sizeof(Ins)); + ip = icpy(ip = i0, b->ins, i - b->ins); + ip = icpy(ir = ip, insb, curi - insb); + ip = icpy(ip, i1, &b->ins[b->nins] - i1); + b->nins = n; + b->ins = i0; + return ir; +} + +static int +prio(Ref r1, Ref r2) +{ + /* trivial heuristic to begin with, + * later we can use the distance to + * the definition instruction + */ + (void) r2; + return *hint(r1.val) != -1; +} + +static void +insert(Ref *r, Ref **rs, int p) +{ + int i; + + rs[i = p] = r; + while (i-- > 0 && prio(*r, *rs[i])) { + rs[i+1] = rs[i]; + rs[i] = r; + } +} + +static void +doblk(Blk *b, RMap *cur) +{ + int x, r, nr; + bits rs; + Ins *i; + Mem *m; + Ref *ra[4]; + + if (rtype(b->jmp.arg) == RTmp) + b->jmp.arg = ralloc(cur, b->jmp.arg.val); + else if (rtype(b->jmp.arg) == RACall) { + /* add return registers */ + rs = retregs(b->jmp.arg, 0); + for (r=0; rs; rs/=2, r++) + if (rs & 1) + radd(cur, r, r); + } + for (i=&b->ins[b->nins]; i!=b->ins;) { + switch ((--i)->op) { + case OCall: + rs = argregs(i->arg[1], 0); + for (r=0; r<NRSave; r++) + if (!(BIT(rsave[r]) & rs)) + rfree(cur, rsave[r]); + break; + case OCopy: + if (isreg(i->arg[0])) { + i = dopm(b, i, cur); + continue; + } + if (isreg(i->to)) + if (rtype(i->arg[0]) == RTmp) + sethint(i->arg[0].val, i->to.val); + /* fall through */ + default: + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + r = rfree(cur, i->to.val); + if (r == -1 && !isreg(i->to)) { + *i = (Ins){.op = ONop}; + continue; + } + if (i->to.val >= Tmp0) + i->to = TMP(r); + } + break; + } + for (x=0, nr=0; x<2; x++) + switch (rtype(i->arg[x])) { + case RAMem: + m = &mem[i->arg[x].val & AMask]; + if (rtype(m->base) == RTmp) + insert(&m->base, ra, nr++); + if (rtype(m->index) == RTmp) + insert(&m->index, ra, nr++); + break; + case RTmp: + insert(&i->arg[x], ra, nr++); + break; + } + for (r=0; r<nr; r++) + *ra[r] = ralloc(cur, ra[r]->val); + } +} + +/* register allocation + * depends on rpo, phi, cost, (and obviously spill) + */ +void +rega(Fn *fn) +{ + int j, n, t, r, r1, x, rl[Tmp0]; + Blk *b, *b1, *s, ***ps, *blist; + RMap *end, *beg, cur, old; + Ins *i; + Phi *p; + uint u; + Ref src, dst; + + /* 1. setup */ + regu = 0; + tmp = fn->tmp; + mem = fn->mem; + end = alloc(fn->nblk * sizeof end[0]); + beg = alloc(fn->nblk * sizeof beg[0]); + for (n=0; n<fn->nblk; n++) { + bsinit(end[n].b, fn->ntmp); + bsinit(beg[n].b, fn->ntmp); + } + bsinit(cur.b, fn->ntmp); + bsinit(old.b, fn->ntmp); + + for (t=Tmp0; t<fn->ntmp; t++) + *hint(t) = -1; + for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++) + if (i->op != OCopy || !isreg(i->arg[0])) + break; + else { + assert(rtype(i->to) == RTmp); + sethint(i->to.val, i->arg[0].val); + } + + /* 2. assign registers following post-order */ + for (n=fn->nblk-1; n>=0; n--) { + b = fn->rpo[n]; + cur.n = 0; + bszero(cur.b); + for (x=0; x<2; x++) + for (t=Tmp0; t<fn->ntmp; t++) { + assert(bshas(b->out, t) || + !bshas(cur.b, t)); + if (bshas(b->out, t)) + if (!bshas(cur.b, t)) + if (x || (r=*hint(t)) != -1) + if (x || !bshas(cur.b, r)) + ralloc(&cur, t); + } + rcopy(&end[n], &cur); + doblk(b, &cur); + bscopy(b->in, cur.b); + for (p=b->phi; p; p=p->link) + if (rtype(p->to) == RTmp) { + bsclr(b->in, p->to.val); + /* heuristic 0: + * if the phi destination has an + * argument from a frequent block + * that was already allocated to + * 'r', use 'r' as the new hint + */ + memset(rl, 0, sizeof rl); + for (u=0; u<p->narg; u++) { + t = p->arg[u].val; + b1 = p->blk[u]; + if (rtype(p->arg[u]) == RTmp) + if ((r=rfind(&end[b1->id], t)) != -1) + rl[r] += b1->loop; + } + for (x=0, j=0; j<Tmp0; j++) + if (rl[j] > rl[x]) + x = j; + if (rl[x] >= b->loop) + *hint(p->to.val) = x; + } + if (b->npred > 1) { + /* heuristic 1: + * attempt to satisfy hints + * when it's simple and we have + * multiple predecessors + */ + rcopy(&old, &cur); + curi = &insb[NIns]; + for (j=0; j<old.n; j++) { + t = old.t[j]; + r = *hint(t); + r1 = rfind(&cur, t); + if (r != -1 && r != r1) + if (!bshas(cur.b, r)) { + rfree(&cur, t); + radd(&cur, t, r); + x = tmp[t].cls; + emit(OCopy, x, TMP(r1), TMP(r), R); + } + } + if ((j = &insb[NIns] - curi)) { + b->nins += j; + i = alloc(b->nins * sizeof(Ins)); + icpy(icpy(i, curi, j), b->ins, b->nins-j); + b->ins = i; + } + } + rcopy(&beg[n], &cur); + } + if (debug['R']) { + fprintf(stderr, "\n> Register mappings:\n"); + for (n=0; n<fn->nblk; n++) { + b = fn->rpo[n]; + fprintf(stderr, "\t%-10s beg", b->name); + mdump(&beg[n]); + fprintf(stderr, "\t end"); + mdump(&end[n]); + } + fprintf(stderr, "\n"); + } + + /* 3. compose glue code */ + blist = 0; + for (b=fn->start;; b=b->link) { + ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; + for (; (s=**ps); ps++) { + npm = 0; + for (p=s->phi; p; p=p->link) { + dst = p->to; + assert(rtype(dst)==RSlot || rtype(dst)==RTmp); + if (rtype(dst) == RTmp) { + r = rfind(&beg[s->id], dst.val); + if (r == -1) + continue; + dst = TMP(r); + } + for (u=0; p->blk[u]!=b; u++) + assert(u+1 < p->narg); + src = p->arg[u]; + if (rtype(src) == RTmp) + src = rref(&end[b->id], src.val); + pmadd(src, dst, p->cls); + } + for (t=Tmp0; t<fn->ntmp; t++) + if (bshas(s->in, t)) { + src = rref(&end[b->id], t); + dst = rref(&beg[s->id], t); + pmadd(src, dst, tmp[t].cls); + } + pmgen(); + if (curi == insb) + continue; + b1 = blknew(); + b1->loop = (b->loop+s->loop) / 2; + b1->link = blist; + blist = b1; + fn->nblk++; + sprintf(b1->name, "%s_%s", b->name, s->name); + b1->nins = curi - insb; + idup(&b1->ins, insb, b1->nins); + b1->jmp.type = JJmp; + b1->s1 = s; + **ps = b1; + } + if (!b->link) { + b->link = blist; + break; + } + } + for (b=fn->start; b; b=b->link) + b->phi = 0; + fn->reg = regu; + + if (debug['R']) { + fprintf(stderr, "\n> After register allocation:\n"); + printfn(fn, stderr); + } +} diff --git a/src/spill.c b/src/spill.c @@ -0,0 +1,507 @@ +#include "all.h" + +static void +loopmark(Blk *hd, Blk *b, Phi *p) +{ + int k, head; + uint n, a; + + head = hd->id; + if (b->id < head) + return; + for (; p; p=p->link) + for (a=0; a<p->narg; a++) + if (p->blk[a] == b) + if (rtype(p->arg[a]) == RTmp) + bsset(hd->gen, p->arg[a].val); + if (b->visit == head) + return; + b->visit = head; + b->loop *= 10; + /* aggregate looping information at + * loop headers */ + bsunion(hd->gen, b->gen); + for (k=0; k<2; k++) + if (b->nlive[k] > hd->nlive[k]) + hd->nlive[k] = b->nlive[k]; + for (n=0; n<b->npred; n++) + loopmark(hd, b->pred[n], b->phi); +} + +static void +tmpuse(Ref r, int use, int loop, Fn *fn) +{ + Mem *m; + Tmp *t; + + if (rtype(r) == RAMem) { + m = &fn->mem[r.val & AMask]; + tmpuse(m->base, 1, loop, fn); + tmpuse(m->index, 1, loop, fn); + } + else if (rtype(r) == RTmp && r.val >= Tmp0) { + t = &fn->tmp[r.val]; + t->nuse += use; + t->ndef += !use; + t->cost += loop; + } +} + +/* evaluate spill costs of temporaries, + * this also fills usage information + * requires rpo, preds + */ +void +fillcost(Fn *fn) +{ + int n, hd; + uint a; + Blk *b; + Ins *i; + Tmp *t; + Phi *p; + + for (b=fn->start; b; b=b->link) { + b->loop = 1; + b->visit = -1; + } + if (debug['S']) + fprintf(stderr, "\n> Loop information:\n"); + for (n=0; n<fn->nblk; n++) { + b = fn->rpo[n]; + hd = 0; + for (a=0; a<b->npred; a++) + if (b->pred[a]->id >= n) { + loopmark(b, b->pred[a], b->phi); + hd = 1; + } + if (hd && debug['S']) { + fprintf(stderr, "\t%-10s", b->name); + fprintf(stderr, " (% 3d ", b->nlive[0]); + fprintf(stderr, "% 3d) ", b->nlive[1]); + dumpts(b->gen, fn->tmp, stderr); + } + } + for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) { + t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0; + t->nuse = 0; + t->ndef = 0; + } + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) { + /* todo, the cost computation + * for p->to is not great... */ + tmpuse(p->to, 0, 0, fn); + for (a=0; a<p->narg; a++) { + n = p->blk[a]->loop; + assert(b->npred==p->narg && + "wrong cfg"); + n /= b->npred; + tmpuse(p->arg[a], 1, n, fn); + } + } + n = b->loop; + for (i=b->ins; i-b->ins < b->nins; i++) { + tmpuse(i->to, 0, n, fn); + tmpuse(i->arg[0], 1, n, fn); + tmpuse(i->arg[1], 1, n, fn); + } + tmpuse(b->jmp.arg, 1, n, fn); + } + if (debug['S']) { + fprintf(stderr, "\n> Spill costs:\n"); + for (n=Tmp0; n<fn->ntmp; n++) + fprintf(stderr, "\t%-10s %d\n", + fn->tmp[n].name, + fn->tmp[n].cost); + fprintf(stderr, "\n"); + } +} + +static BSet *fst; /* temps to prioritize in registers (for tcmp1) */ +static Tmp *tmp; /* current temporaries (for tcmpX) */ +static int ntmp; /* current # of temps (for limit) */ +static int locs; /* stack size used by locals */ +static int slot4; /* next slot of 4 bytes */ +static int slot8; /* ditto, 8 bytes */ +static BSet mask[2][1]; /* class masks */ + +static int +tcmp0(const void *pa, const void *pb) +{ + return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost; +} + +static int +tcmp1(const void *pa, const void *pb) +{ + int c; + + c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa); + return c ? c : tcmp0(pa, pb); +} + +static Ref +slot(int t) +{ + int s; + + if (t < Tmp0) + diag("spill: cannot spill register"); + s = tmp[t].slot; + if (s == -1) { + assert(NAlign == 3); + /* nice logic to pack stack slots + * on demand, there can be only + * one hole and slot4 points to it + * + * invariant: slot4 <= slot8 + */ + if (KWIDE(tmp[t].cls)) { + s = slot8; + if (slot4 == slot8) + slot4 += 2; + slot8 += 2; + } else { + s = slot4; + if (slot4 == slot8) { + slot8 += 2; + slot4 += 1; + } else + slot4 = slot8; + } + s += locs; + tmp[t].slot = s; + } + return SLOT(s); +} + +static void +limit(BSet *b, int k, BSet *f) +{ + static int *tarr, maxt; + int i, nt; + uint t; + + nt = bscount(b); + if (nt <= k) + return; + if (nt > maxt) { + free(tarr); + tarr = emalloc(nt * sizeof tarr[0]); + maxt = nt; + } + for (i=0, t=0; bsiter(b, &t); t++) { + bsclr(b, t); + tarr[i++] = t; + } + if (!f) + qsort(tarr, nt, sizeof tarr[0], tcmp0); + else { + fst = f; + qsort(tarr, nt, sizeof tarr[0], tcmp1); + } + for (i=0; i<k && i<nt; i++) + bsset(b, tarr[i]); + for (; i<nt; i++) + slot(tarr[i]); +} + +static void +limit2(BSet *b1, int k1, int k2, BSet *fst) +{ + BSet b2[1]; + + bsinit(b2, ntmp); /* todo, free those */ + bscopy(b2, b1); + bsinter(b1, mask[0]); + bsinter(b2, mask[1]); + limit(b1, NIReg - k1, fst); + limit(b2, NFReg - k2, fst); + bsunion(b1, b2); +} + +static void +sethint(BSet *u, bits r) +{ + uint t; + + for (t=Tmp0; bsiter(u, &t); t++) + tmp[phicls(t, tmp)].hint.m |= r; +} + +static void +reloads(BSet *u, BSet *v) +{ + uint t; + + for (t=Tmp0; bsiter(u, &t); t++) + if (!bshas(v, t)) + emit(OLoad, tmp[t].cls, TMP(t), slot(t), R); +} + +static void +store(Ref r, int s) +{ + static int kstore[] = { + [Kw] = OStorew, [Kl] = OStorel, + [Ks] = OStores, [Kd] = OStored, + }; + + if (s != -1) + emit(kstore[tmp[r.val].cls], 0, R, r, SLOT(s)); +} + +static int +regcpy(Ins *i) +{ + return i->op == OCopy && isreg(i->arg[0]); +} + +static Ins * +dopm(Blk *b, Ins *i, BSet *v) +{ + int n, t; + BSet u[1]; + Ins *i1; + bits r; + + bsinit(u, ntmp); /* todo, free those */ + /* consecutive copies from + * registers need to be handled + * as one large instruction + * + * fixme: there is an assumption + * that calls are always followed + * by copy instructions here, this + * might not be true if previous + * passes change + */ + i1 = ++i; + do { + i--; + t = i->to.val; + if (!req(i->to, R)) + if (bshas(v, t)) { + bsclr(v, t); + store(i->to, tmp[t].slot); + } + bsset(v, i->arg[0].val); + } while (i != b->ins && regcpy(i-1)); + bscopy(u, v); + if (i != b->ins && (i-1)->op == OCall) { + v->t[0] &= ~retregs((i-1)->arg[1], 0); + limit2(v, NISave, NFSave, 0); + for (r=0, n=0; n<NRSave; n++) + r |= BIT(rsave[n]); + v->t[0] |= argregs((i-1)->arg[1], 0); + } else { + limit2(v, 0, 0, 0); + r = v->t[0]; + } + sethint(v, r); + reloads(u, v); + do + emiti(*--i1); + while (i1 != i); + return i; +} + +/* spill code insertion + * requires spill costs, rpo, liveness + * + * Note: this will replace liveness + * information (in, out) with temporaries + * that must be in registers at block + * borders + * + * Be careful with: + * - OCopy instructions to ensure register + * constraints + */ +void +spill(Fn *fn) +{ + Blk *b, *s1, *s2, *hd, **bp; + int j, n, l, t, k, lvarg[2]; + BSet u[1], v[1], w[1]; + Ins *i; + Phi *p; + Mem *m; + bits r; + + tmp = fn->tmp; + ntmp = fn->ntmp; + bsinit(u, ntmp); + bsinit(v, ntmp); + bsinit(w, ntmp); + bsinit(mask[0], ntmp); + bsinit(mask[1], ntmp); + locs = fn->slot; + slot4 = 0; + slot8 = 0; + for (t=0; t<ntmp; t++) { + k = 0; + if (t >= XMM0 && t < XMM0 + NFReg) + k = 1; + else if (t >= Tmp0) + k = KBASE(tmp[t].cls); + bsset(mask[k], t); + } + + for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) { + b = *--bp; + /* invariant: all bocks with bigger rpo got + * their in,out updated. */ + + /* 1. find temporaries in registers at + * the end of the block (put them in v) */ + curi = 0; + s1 = b->s1; + s2 = b->s2; + hd = 0; + if (s1 && s1->id <= n) + hd = s1; + if (s2 && s2->id <= n) + if (!hd || s2->id >= hd->id) + hd = s2; + r = 0; + bszero(v); + if (hd) { + /* back-edge */ + for (k=0; k<2; k++) { + n = k == 0 ? NIReg : NFReg; + bscopy(u, b->out); + bsinter(u, mask[k]); + bscopy(w, u); + bsinter(u, hd->gen); + bsdiff(w, hd->gen); + if ((int)bscount(u) < n) { /* fixme */ + j = bscount(w); /* live through */ + l = hd->nlive[k]; + limit(w, n - (l - j), 0); + bsunion(u, w); + } else + limit(u, n, 0); + bsunion(v, u); + } + } else if (s1) { + liveon(v, b, s1); + if (s2) { + liveon(u, b, s2); + bscopy(w, u); + bsinter(w, v); + bsunion(v, u); + } + limit2(v, 0, 0, w); + } else if (rtype(b->jmp.arg) == RACall) { + /* return */ + r = retregs(b->jmp.arg, 0); + v->t[0] |= r; + } + bscopy(b->out, v); + + /* 2. process the block instructions */ + curi = &insb[NIns]; + for (i=&b->ins[b->nins]; i!=b->ins;) { + i--; + if (regcpy(i)) { + i = dopm(b, i, v); + continue; + } + bszero(w); + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + if (bshas(v, t)) + bsclr(v, t); + else { + /* make sure we have a reg + * for the result */ + bsset(v, t); + bsset(w, t); + } + } + j = opdesc[i->op].nmem; + for (n=0; n<2; n++) + if (rtype(i->arg[n]) == RAMem) + j--; + for (n=0; n<2; n++) + switch (rtype(i->arg[n])) { + case RAMem: + t = i->arg[n].val; + m = &fn->mem[t & AMask]; + if (rtype(m->base) == RTmp) { + bsset(v, m->base.val); + bsset(w, m->base.val); + } + if (rtype(m->index) == RTmp) { + bsset(v, m->index.val); + bsset(w, m->index.val); + } + break; + case RTmp: + t = i->arg[n].val; + lvarg[n] = bshas(v, t); + bsset(v, t); + if (j-- <= 0) + bsset(w, t); + break; + } + bscopy(u, v); + limit2(v, 0, 0, w); + for (n=0; n<2; n++) + if (rtype(i->arg[n]) == RTmp) { + t = i->arg[n].val; + if (!bshas(v, t)) { + /* do not reload if the + * the temporary was dead + */ + if (!lvarg[n]) + bsclr(u, t); + i->arg[n] = slot(t); + } + } + reloads(u, v); + if (!req(i->to, R)) { + t = i->to.val; + store(i->to, tmp[t].slot); + bsclr(v, t); + } + emiti(*i); + r = v->t[0] & (BIT(Tmp0)-1); + if (r) + sethint(v, r); + } + assert(!r || b==fn->start); + + for (p=b->phi; p; p=p->link) { + assert(rtype(p->to) == RTmp); + t = p->to.val; + if (bshas(v, t)) { + bsclr(v, t); + store(p->to, tmp[t].slot); + } else if (bshas(b->in, t)) + /* only if the phi is live */ + p->to = slot(p->to.val); + } + bscopy(b->in, v); + b->nins = &insb[NIns] - curi; + idup(&b->ins, curi, b->nins); + } + + /* align the locals to a 16 byte boundary */ + assert(NAlign == 3); + slot8 += slot8 & 3; + fn->slot += slot8; + + if (debug['S']) { + fprintf(stderr, "\n> Block information:\n"); + for (b=fn->start; b; b=b->link) { + printf("\t%-10s (% 5d) ", b->name, b->loop); + dumpts(b->out, fn->tmp, stdout); + } + fprintf(stderr, "\n> After spilling:\n"); + printfn(fn, stderr); + } +} diff --git a/src/ssa.c b/src/ssa.c @@ -0,0 +1,516 @@ +#include "all.h" +#include <stdarg.h> + +static void +adduse(Tmp *tmp, int ty, Blk *b, ...) +{ + Use *u; + int n; + va_list ap; + + va_start(ap, b); + n = tmp->nuse; + vgrow(&tmp->use, ++tmp->nuse); + u = &tmp->use[n]; + u->type = ty; + u->bid = b->id; + switch (ty) { + default: + diag("ssa: adduse defaulted"); + case UPhi: + u->u.phi = va_arg(ap, Phi *); + break; + case UIns: + u->u.ins = va_arg(ap, Ins *); + break; + case UJmp: + break; + } + va_end(ap); +} + +/* fill usage, phi, and class information + */ +void +filluse(Fn *fn) +{ + Blk *b; + Phi *p; + Ins *i; + int m, t; + uint a; + Tmp *tmp; + + /* todo, is this the correct file? */ + tmp = fn->tmp; + for (t=0; t<fn->ntmp; t++) { + tmp[t].ndef = 0; + tmp[t].nuse = 0; + tmp[t].phi = 0; + tmp[t].cls = 0; + if (tmp[t].use == 0) + tmp[t].use = vnew(0, sizeof(Use)); + } + for (b=fn->start; b; b=b->link) { + for (p=b->phi; p; p=p->link) { + assert(rtype(p->to) == RTmp); + t = p->to.val; + tmp[t].ndef++; + tmp[t].cls = p->cls; + tmp[t].phi = p->to.val; + for (a=0; a<p->narg; a++) + if (rtype(p->arg[a]) == RTmp) { + t = p->arg[a].val; + adduse(&tmp[t], UPhi, b, p); + if (!tmp[t].phi) + tmp[t].phi = p->to.val; + } + } + for (i=b->ins; i-b->ins < b->nins; i++) { + if (!req(i->to, R)) { + assert(rtype(i->to) == RTmp); + t = i->to.val; + tmp[t].ndef++; + tmp[t].cls = i->cls; + } + for (m=0; m<2; m++) + if (rtype(i->arg[m]) == RTmp) { + t = i->arg[m].val; + adduse(&tmp[t], UIns, b, i); + } + } + if (rtype(b->jmp.arg) == RTmp) + adduse(&tmp[b->jmp.arg.val], UJmp, b); + } +} + +static void +addpred(Blk *bp, Blk *bc) +{ + uint i; + + if (!bc->pred) { + bc->pred = alloc(bc->npred * sizeof bc->pred[0]); + for (i=0; i<bc->npred; i++) + bc->pred[i] = 0; + } + for (i=0; bc->pred[i]; i++) + ; + bc->pred[i] = bp; +} + +/* fill predecessors information in blocks + */ +void +fillpreds(Fn *f) +{ + Blk *b; + + for (b=f->start; b; b=b->link) { + b->npred = 0; + b->pred = 0; + } + for (b=f->start; b; b=b->link) { + if (b->s1) + b->s1->npred++; + if (b->s2) + b->s2->npred++; + } + for (b=f->start; b; b=b->link) { + if (b->s1) + addpred(b, b->s1); + if (b->s2) + addpred(b, b->s2); + } +} + +static int +rporec(Blk *b, int x) +{ + Blk *s1, *s2; + + if (!b || b->id >= 0) + return x; + b->id = 1; + s1 = b->s1; + s2 = b->s2; + if (s1 && s2 && s1->loop > s2->loop) { + s1 = b->s2; + s2 = b->s1; + } + x = rporec(s1, x); + x = rporec(s2, x); + b->id = x; + assert(x >= 0); + return x - 1; +} + +/* fill the rpo information in blocks + */ +void +fillrpo(Fn *f) +{ + int n; + Blk *b, **p; + + for (b=f->start; b; b=b->link) + b->id = -1; + n = 1 + rporec(f->start, f->nblk-1); + f->nblk -= n; + f->rpo = alloc(f->nblk * sizeof f->rpo[0]); + for (p=&f->start; *p;) { + b = *p; + if (b->id == -1) { + *p = b->link; + /* todo, free block */ + } else { + b->id -= n; + f->rpo[b->id] = b; + p=&(*p)->link; + } + } +} + +/* for dominators computation, read + * "A Simple, Fast Dominance Algorithm" + * by K. Cooper, T. Harvey, and K. Kennedy. + */ + +static Blk * +inter(Blk *b1, Blk *b2) +{ + Blk *bt; + + if (b1 == 0) + return b2; + while (b1 != b2) { + if (b1->id < b2->id) { + bt = b1; + b1 = b2; + b2 = bt; + } + while (b1->id > b2->id) { + b1 = b1->idom; + assert(b1); + } + } + return b1; +} + +static void +filldom(Fn *fn) +{ + Blk *b, *d; + int ch, n; + uint p; + + for (b=fn->start; b; b=b->link) { + b->idom = 0; + b->dom = 0; + b->dlink = 0; + } + do { + ch = 0; + for (n=1; n<fn->nblk; n++) { + b = fn->rpo[n]; + d = 0; + for (p=0; p<b->npred; p++) + if (b->pred[p]->idom + || b->pred[p] == fn->start) + d = inter(d, b->pred[p]); + if (d != b->idom) { + ch++; + b->idom = d; + } + } + } while (ch); + for (b=fn->start; b; b=b->link) + if ((d=b->idom)) { + assert(d != b); + b->dlink = d->dom; + d->dom = b; + } +} + +static int +sdom(Blk *b1, Blk *b2) +{ + assert(b1 && b2); + if (b1 == b2) + return 0; + while (b2->id > b1->id) + b2 = b2->idom; + return b1 == b2; +} + +static int +dom(Blk *b1, Blk *b2) +{ + return b1 == b2 || sdom(b1, b2); +} + +static void +addfron(Blk *a, Blk *b) +{ + int n; + + for (n=0; n<a->nfron; n++) + if (a->fron[n] == b) + return; + if (!a->nfron) + a->fron = vnew(++a->nfron, sizeof a->fron[0]); + else + vgrow(&a->fron, ++a->nfron); + a->fron[a->nfron-1] = b; +} + +static void +fillfron(Fn *fn) +{ + Blk *a, *b; + + for (b=fn->start; b; b=b->link) { + if (b->s1) + for (a=b; !sdom(a, b->s1); a=a->idom) + addfron(a, b->s1); + if (b->s2) + for (a=b; !sdom(a, b->s2); a=a->idom) + addfron(a, b->s2); + } +} + +static Ref +refindex(int t, Fn *fn) +{ + return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn); +} + +static void +phiins(Fn *fn) +{ + BSet u[1], defs[1]; + Blk *a, *b, **blist, **be, **bp; + Ins *i; + Phi *p; + Ref r; + int t, n, k, nt; + + bsinit(u, fn->nblk); + bsinit(defs, fn->nblk); + blist = emalloc(fn->nblk * sizeof blist[0]); + be = &blist[fn->nblk]; + nt = fn->ntmp; + for (t=Tmp0; t<nt; t++) { + fn->tmp[t].visit = 0; + if (fn->tmp[t].phi != 0) + continue; + bszero(u); + k = -1; + bp = be; + for (b=fn->start; b; b=b->link) { + b->visit = 0; + r = R; + for (i=b->ins; i-b->ins < b->nins; i++) { + if (!req(r, R)) { + if (req(i->arg[0], TMP(t))) + i->arg[0] = r; + if (req(i->arg[1], TMP(t))) + i->arg[1] = r; + } + if (req(i->to, TMP(t))) { + if (!bshas(b->out, t)) { + if (fn->tmp[t].ndef == 1) + r = TMP(t); + else + r = refindex(t, fn); + i->to = r; + } else { + if (!bshas(u, b->id)) { + bsset(u, b->id); + *--bp = b; + } + if (k == -1) + k = i->cls; + assert(k == i->cls); + } + } + } + if (!req(r, R) && req(b->jmp.arg, TMP(t))) + b->jmp.arg = r; + } + bscopy(defs, u); + while (bp != be) { + fn->tmp[t].visit = t; + b = *bp++; + bsclr(u, b->id); + for (n=0; n<b->nfron; n++) { + a = b->fron[n]; + if (a->visit++ == 0) + if (bshas(a->in, t)) { + p = alloc(sizeof *p); + p->cls = k; + p->to = TMP(t); + p->link = a->phi; + a->phi = p; + if (!bshas(defs, a->id)) + if (!bshas(u, a->id)) { + bsset(u, a->id); + *--bp = a; + } + } + } + } + } + free(blist); +} + +typedef struct Name Name; +struct Name { + Ref r; + Blk *b; + Name *up; +}; + +static Name *namel; + +static Name * +nnew(Ref r, Blk *b, Name *up) +{ + Name *n; + + if (namel) { + n = namel; + namel = n->up; + } else + /* could use alloc, here + * but namel should be reset + */ + n = emalloc(sizeof *n); + n->r = r; + n->b = b; + n->up = up; + return n; +} + +static void +nfree(Name *n) +{ + n->up = namel; + namel = n; +} + +static void +rendef(Ref *r, Blk *b, Name **stk, Fn *fn) +{ + Ref r1; + int t; + + t = r->val; + if (req(*r, R) || !fn->tmp[t].visit) + return; + r1 = refindex(t, fn); + fn->tmp[r1.val].visit = t; + stk[t] = nnew(r1, b, stk[t]); + *r = r1; +} + +static Ref +getstk(int t, Blk *b, Name **stk) +{ + Name *n, *n1; + + n = stk[t]; + while (n && !dom(n->b, b)) { + n1 = n; + n = n->up; + nfree(n1); + } + stk[t] = n; + if (!n) { + /* uh, oh, warn */ + return CON_Z; + } else + return n->r; +} + +static void +renblk(Blk *b, Name **stk, Fn *fn) +{ + Phi *p; + Ins *i; + Blk *s, **ps, *succ[3]; + int t, m; + + for (p=b->phi; p; p=p->link) + rendef(&p->to, b, stk, fn); + for (i=b->ins; i-b->ins < b->nins; i++) { + for (m=0; m<2; m++) { + t = i->arg[m].val; + if (rtype(i->arg[m]) == RTmp) + if (fn->tmp[t].visit) + i->arg[m] = getstk(t, b, stk); + } + rendef(&i->to, b, stk, fn); + } + t = b->jmp.arg.val; + if (rtype(b->jmp.arg) == RTmp) + if (fn->tmp[t].visit) + b->jmp.arg = getstk(t, b, stk); + succ[0] = b->s1; + succ[1] = b->s2; + succ[2] = 0; + for (ps=succ; (s=*ps); ps++) + for (p=s->phi; p; p=p->link) { + t = p->to.val; + if ((t=fn->tmp[t].visit)) { + m = p->narg++; + if (m == NPred) + diag("ssa: too many phi arguments"); + p->arg[m] = getstk(t, b, stk); + p->blk[m] = b; + } + } + for (s=b->dom; s; s=s->dlink) + renblk(s, stk, fn); +} + +/* require ndef */ +void +ssa(Fn *fn) +{ + Name **stk, *n; + int d, nt; + Blk *b, *b1; + + nt = fn->ntmp; + stk = emalloc(nt * sizeof stk[0]); + d = debug['L']; + debug['L'] = 0; + filldom(fn); + if (debug['N']) { + fprintf(stderr, "\n> Dominators:\n"); + for (b1=fn->start; b1; b1=b1->link) { + if (!b1->dom) + continue; + fprintf(stderr, "%10s:", b1->name); + for (b=b1->dom; b; b=b->dlink) + fprintf(stderr, " %s", b->name); + fprintf(stderr, "\n"); + } + } + fillfron(fn); + filllive(fn); + phiins(fn); + renblk(fn->start, stk, fn); + while (nt--) + while ((n=stk[nt])) { + stk[nt] = n->up; + nfree(n); + } + debug['L'] = d; + free(stk); + if (debug['N']) { + fprintf(stderr, "\n> After SSA construction:\n"); + printfn(fn, stderr); + } +} diff --git a/lisc/test/_alt.ssa b/src/test/_alt.ssa diff --git a/lisc/test/_dragon.ssa b/src/test/_dragon.ssa diff --git a/lisc/test/_fix1.ssa b/src/test/_fix1.ssa diff --git a/lisc/test/_fix2.ssa b/src/test/_fix2.ssa diff --git a/lisc/test/_fix3.ssa b/src/test/_fix3.ssa diff --git a/lisc/test/_fix4.ssa b/src/test/_fix4.ssa diff --git a/lisc/test/_live.ssa b/src/test/_live.ssa diff --git a/lisc/test/_rpo.ssa b/src/test/_rpo.ssa diff --git a/lisc/test/_spill1.ssa b/src/test/_spill1.ssa diff --git a/lisc/test/_spill2.ssa b/src/test/_spill2.ssa diff --git a/lisc/test/_spill3.ssa b/src/test/_spill3.ssa diff --git a/lisc/test/abi1.ssa b/src/test/abi1.ssa diff --git a/lisc/test/abi2.ssa b/src/test/abi2.ssa diff --git a/lisc/test/abi3.ssa b/src/test/abi3.ssa diff --git a/lisc/test/abi4.ssa b/src/test/abi4.ssa diff --git a/lisc/test/abi5.ssa b/src/test/abi5.ssa diff --git a/lisc/test/align.ssa b/src/test/align.ssa diff --git a/lisc/test/collatz.ssa b/src/test/collatz.ssa diff --git a/lisc/test/cprime.ssa b/src/test/cprime.ssa diff --git a/lisc/test/cup.ssa b/src/test/cup.ssa diff --git a/lisc/test/dark.ssa b/src/test/dark.ssa diff --git a/lisc/test/double.ssa b/src/test/double.ssa diff --git a/lisc/test/echo.ssa b/src/test/echo.ssa diff --git a/lisc/test/eucl.ssa b/src/test/eucl.ssa diff --git a/lisc/test/euclc.ssa b/src/test/euclc.ssa diff --git a/lisc/test/fpcnv.ssa b/src/test/fpcnv.ssa diff --git a/src/test/go.sh b/src/test/go.sh @@ -0,0 +1,116 @@ +#!/bin/sh + +TMP=/tmp/qbe.zzzz + +DRV=$TMP.c +ASM=$TMP.s +BIN=$TMP.bin +OUT=$TMP.out + +cleanup() { + rm -f $DRV $ASM $BIN $OUT +} + +extract() { + WHAT="$1" + FILE="$2" + + awk " + /^# >>> $WHAT/ { + p = 1 + next + } + /^# <<</ { + if (p) + p = 0 + } + p + " $FILE \ + | sed -e 's/# //' \ + | sed -e 's/#$//' +} + +once() { + T="$1" + + if ! test -f $T + then + echo "invalid test file $T" >&2 + exit 1 + fi + + echo "$T... " + + if ! ./qbe $T -o $ASM + then + echo "[qbe fail]" + return 1 + fi + + extract driver $T > $DRV + extract output $T > $OUT + + if test -s $DRV + then + LNK="$DRV $ASM" + else + LNK="$ASM" + fi + + if ! cc -g -o $BIN $LNK + then + echo "[cc fail]" + return 1 + fi + + if test -s $OUT + then + $BIN a b c | diff - $OUT + RET=$? + REASON="output" + else + $BIN a b c + RET=$? + REASON="returned $RET" + fi + + if test $RET -ne 0 + then + echo "[$REASON fail]" + return 1 + fi + + printf "\033[1A\033[45C[ok]\n" +} + + +#trap cleanup TERM QUIT + +if test -z "$1" +then + echo "usage: test/go.sh {all, SSAFILE}" 2>&1 + exit 1 +fi + +case $1 in + "all") + F=0 + for T in test/[!_]*.ssa + do + once $T + F=`expr $F + $?` + done + if test $F -ge 1 + then + echo + echo "$F test(s) failed!" + else + echo + echo "All is fine!" + fi + ;; + *) + once $1 + exit $? + ;; +esac diff --git a/lisc/test/loop.ssa b/src/test/loop.ssa diff --git a/lisc/test/mandel.ssa b/src/test/mandel.ssa diff --git a/lisc/test/max.ssa b/src/test/max.ssa diff --git a/lisc/test/prime.ssa b/src/test/prime.ssa diff --git a/lisc/test/puts10.ssa b/src/test/puts10.ssa diff --git a/lisc/test/sum.ssa b/src/test/sum.ssa diff --git a/lisc/tools/abi.ml b/src/tools/abi.ml diff --git a/src/tools/abitest.sh b/src/tools/abitest.sh @@ -0,0 +1,104 @@ +#!/bin/sh + +OCAMLC=/usr/bin/ocamlc +QBE=`pwd`/qbe + +failure() { + echo "Failure at stage:" $1 >&2 + exit 1 +} + +cleanup() { + rm -fr $TMP +} + +init() { + cp tools/abi.ml $TMP + pushd $TMP > /dev/null + + cat > Makefile << EOM + +.PHONY: test +test: caller.o callee.o + c99 -o \$@ caller.o callee.o +%.o: %.c + c99 -c -o \$@ \$< +%.o: %.ssa + $QBE -o \$*.s \$< + c99 -c -o \$@ \$*.s + +EOM + + if ! $OCAMLC abi.ml -o gentest + then + popd > /dev/null + cleanup + failure "abifuzz compilation" + fi + popd > /dev/null +} + +once() { + if test -z "$3" + then + $TMP/gentest $TMP $1 $2 + else + $TMP/gentest -s $3 $TMP $1 $2 + fi + make -C $TMP test > /dev/null || failure "building" + $TMP/test || failure "runtime" +} + +usage() { + echo "usage: abitest.sh [-callssa] [-callc] [-s SEED] [-n ITERATIONS]" >&2 + exit 1 +} + +N=1 +CALLER=c +CALLEE=ssa + +while test -n "$1" +do + case "$1" in + "-callssa") + ;; + "-callc") + CALLER=ssa + CALLEE=c + ;; + "-s") + test -n "$2" || usage + shift + SEED="$1" + ;; + "-n") + test -n "$2" || usage + shift + N="$1" + ;; + *) + usage + ;; + esac + shift +done + +TMP=`mktemp -d abifuzz.XXXXXX` + +init + +if test -n "$S" +then + once $CALLER $CALLEE $SEED +else + for n in `seq $N` + do + once $CALLER $CALLEE + echo "$n" | grep "00$" + done +fi + +echo "All done." + +cleanup diff --git a/lisc/tools/fptox.c b/src/tools/fptox.c diff --git a/lisc/tools/pmov.c b/src/tools/pmov.c diff --git a/src/tools/regress.sh b/src/tools/regress.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +for t in test/* +do + printf "Test $t ... " + + ./qbe $t >/tmp/out.0 2>&1 + ./qbe.1 $t >/tmp/out.1 2>&1 + + if diff /tmp/out.0 /tmp/out.1 > /dev/null + then + echo "OK" + else + echo "KO" + break + fi +done diff --git a/src/util.c b/src/util.c @@ -0,0 +1,329 @@ +#include "all.h" + +typedef struct Bitset Bitset; +typedef struct Vec Vec; + +struct Vec { + ulong mag; + size_t esz; + ulong cap; + union { + long long ll; + long double ld; + void *ptr; + } align[]; +}; + +enum { + VMin = 2, + VMag = 0xcabba9e, + NPtr = 256, +}; + +Typ typ[NTyp]; +Ins insb[NIns], *curi; + +static void *ptr[NPtr]; +static void **pool = ptr; +static int nptr = 1; + +void +diag(char *s) +{ + fputs(s, stderr); + fputc('\n', stderr); + abort(); +} + +void * +emalloc(size_t n) +{ + void *p; + + p = calloc(1, n); + if (!p) + diag("emalloc: out of memory"); + return p; +} + +void * +alloc(size_t n) +{ + void **pp; + + if (n == 0) + return 0; + if (nptr >= NPtr) { + pp = emalloc(NPtr * sizeof(void *)); + pp[0] = pool; + pool = pp; + nptr = 1; + } + return pool[nptr++] = emalloc(n); +} + +void +freeall() +{ + void **pp; + + for (;;) { + for (pp = &pool[1]; pp < &pool[nptr]; pp++) + free(*pp); + pp = pool[0]; + if (!pp) + break; + free(pool); + pool = pp; + nptr = NPtr; + } + nptr = 1; +} + +Blk * +blknew() +{ + static Blk z; + Blk *b; + + b = alloc(sizeof *b); + *b = z; + return b; +} + +void +emit(int op, int k, Ref to, Ref arg0, Ref arg1) +{ + if (curi == insb) + diag("emit: too many instructions"); + *--curi = (Ins){ + .op = op, .cls = k, + .to = to, .arg = {arg0, arg1} + }; +} + +void +emiti(Ins i) +{ + emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]); +} + +void +idup(Ins **pd, Ins *s, ulong n) +{ + *pd = alloc(n * sizeof(Ins)); + memcpy(*pd, s, n * sizeof(Ins)); +} + +Ins * +icpy(Ins *d, Ins *s, ulong n) +{ + memcpy(d, s, n * sizeof(Ins)); + return d + n; +} + +void * +vnew(ulong len, size_t esz) +{ + ulong cap; + Vec *v; + + for (cap=VMin; cap<len; cap*=2) + ; + v = alloc(cap * esz + sizeof(Vec)); + v->mag = VMag; + v->cap = cap; + v->esz = esz; + return v + 1; +} + +void +vgrow(void *vp, ulong len) +{ + Vec *v; + void *v1; + + v = *(Vec **)vp - 1; + assert(v+1 && v->mag == VMag); + if (v->cap >= len) + return; + v1 = vnew(len, v->esz); + memcpy(v1, v+1, v->cap * v->esz); + *(Vec **)vp = v1; +} + +int +phicls(int t, Tmp *tmp /*, int c*/) +{ + if (tmp[t].phi) + return tmp[t].phi; + return t; +#if 0 + int t1; + + t1 = tmp[t].phi; + if (!t1) + t1 = t; + if (t != t1) { + t1 = phitmp(t1, tmp, c); + if (c) + tmp[t].phi = t1; + } + return t1; +#endif +} + +Ref +newtmp(char *prfx, int k, Fn *fn) +{ + static int n; + int t; + + t = fn->ntmp++; + vgrow(&fn->tmp, fn->ntmp); + sprintf(fn->tmp[t].name, "%s%d", prfx, ++n); + fn->tmp[t].cls = k; + fn->tmp[t].slot = -1; + fn->tmp[t].nuse = +1; + fn->tmp[t].ndef = +1; + return TMP(t); +} + +Ref +getcon(int64_t val, Fn *fn) +{ + int c; + + for (c=0; c<fn->ncon; c++) + if (fn->con[c].type == CBits && fn->con[c].bits.i == val) + return CON(c); + fn->ncon++; + vgrow(&fn->con, fn->ncon); + fn->con[c] = (Con){.type = CBits, .bits.i = val}; + return CON(c); +} + +void +addcon(Con *c0, Con *c1) +{ + if (c0->type == CUndef) + *c0 = *c1; + else { + if (c1->type == CAddr) { + if (c0->type == CAddr) + diag("addcon: adding two addresses"); + c0->type = CAddr; + strcpy(c0->label, c1->label); + } + c0->bits.i += c1->bits.i; + } +} + +void +bsinit(BSet *bs, uint n) +{ + n = (n + NBit-1) / NBit; + bs->nt = n; + bs->t = alloc(n * sizeof bs->t[0]); +} + +uint +bscount(BSet *bs) +{ + uint i, j, n; + + n = 0; + for (i=0; i<bs->nt; i++) + for (j=0; j<NBit; j++) + if (bs->t[i] & BIT(j)) + n++; + return n; +} + +static inline uint +bsmax(BSet *bs) +{ + return bs->nt * NBit; +} + +void +bsset(BSet *bs, uint elt) +{ + assert(elt < bsmax(bs)); + bs->t[elt/NBit] |= BIT(elt%NBit); +} + +void +bsclr(BSet *bs, uint elt) +{ + assert(elt < bsmax(bs)); + bs->t[elt/NBit] &= ~BIT(elt%NBit); +} + +#define BSOP(f, op) \ + void \ + f(BSet *a, BSet *b) \ + { \ + uint i; \ + \ + assert(a->nt == b->nt); \ + for (i=0; i<a->nt; i++) \ + a->t[i] op b->t[i]; \ + } + +BSOP(bscopy, =) +BSOP(bsunion, |=) +BSOP(bsinter, &=) +BSOP(bsdiff, &= ~) + +int +bsequal(BSet *a, BSet *b) +{ + uint i; + + assert(a->nt == b->nt); + for (i=0; i<a->nt; i++) + if (a->t[i] != b->t[i]) + return 0; + return 1; +} + +void +bszero(BSet *bs) +{ + memset(bs->t, 0, bs->nt * sizeof bs->t[0]); +} + +/* iterates on a bitset, use as follows + * + * for (i=0; bsiter(set, &i); i++) + * use(i); + * + */ +int +bsiter(BSet *bs, uint *elt) +{ + uint i; + + for (i=*elt;; i++) { + while (i < bsmax(bs) && !bs->t[i/NBit]) + i = (i + NBit) & -NBit; + if (i >= bsmax(bs)) + return 0; + if (bshas(bs, i)) { + *elt = i; + return 1; + } + } +} + +void +dumpts(BSet *bs, Tmp *tmp, FILE *f) +{ + uint t; + + fprintf(f, "["); + for (t=Tmp0; bsiter(bs, &t); t++) + fprintf(f, " %s", tmp[t].name); + fprintf(f, " ]\n"); +}