commit 62e238a6ef151d56b79e1f076a57463f2e1fb020
parent 97b58def96d47d937d86849380d8316ddb16bed8
Author: Quentin Carbonneaux <quentin.carbonneaux@yale.edu>
Date: Fri, 25 Mar 2016 14:02:43 -0400
great renaming campain!
Diffstat:
70 files changed, 5623 insertions(+), 5622 deletions(-)
diff --git a/lisc/.gitignore b/lisc/.gitignore
@@ -1,5 +0,0 @@
-lisc
-doc
-.comfile
-*.o
-*.out
diff --git a/lisc/Makefile b/lisc/Makefile
@@ -1,17 +0,0 @@
-BIN = lisc
-OBJ = main.o util.o parse.o mem.o ssa.o copy.o live.o isel.o spill.o rega.o emit.o
-
-CFLAGS = -Wall -Wextra -std=c99 -g -pedantic
-
-$(BIN): $(OBJ)
- $(CC) $(LDFLAGS) $(OBJ) -o $@
-
-$(OBJ): lisc.h
-
-.PHONY: clean check syndoc
-clean:
- rm -f $(BIN) $(OBJ)
-check: $(BIN)
- test/go.sh all
-syndoc:
- unison -auto doc ssh://qcar@h/data/d/ssa-doc
diff --git a/lisc/copy.c b/lisc/copy.c
@@ -1,159 +0,0 @@
-#include "lisc.h"
-
-typedef struct RList RList;
-struct RList {
- int t;
- RList *l;
-};
-
-static Ref
-copyof(Ref r, Ref *cp)
-{
- if (rtype(r) == RTmp)
- return cp[r.val];
- else
- return r;
-}
-
-static void
-update(Ref r, Ref rcp, Ref *cp, RList **w)
-{
- RList *l;
-
- if (!req(cp[r.val], rcp)) {
- cp[r.val] = rcp;
- l = emalloc(sizeof *l);
- l->t = r.val;
- l->l = *w;
- *w = l;
- }
-}
-
-static void
-visitphi(Phi *p, Ref *cp, RList **w)
-{
- uint a;
- Ref r, r1;
-
- r = R;
- for (a=0; a<p->narg; a++) {
- r1 = copyof(p->arg[a], cp);
- if (req(r1, R))
- continue;
- if (req(r, R) || req(r, r1))
- r = r1;
- else {
- r = p->to;
- break;
- }
- }
- assert(!req(r, R));
- update(p->to, r, cp, w);
-}
-
-static void
-visitins(Ins *i, Ref *cp, RList **w)
-{
- Ref r;
-
- if (i->op == OCopy) {
- r = copyof(i->arg[0], cp);
- update(i->to, r, cp, w);
- } else if (!req(i->to, R)) {
- assert(rtype(i->to) == RTmp);
- update(i->to, i->to, cp, w);
- }
-}
-
-void
-copy(Fn *fn)
-{
- Blk *b;
- Ref *cp, r;
- RList *w, *w1;
- Use *u, *u1;
- Ins *i;
- Phi *p, **pp;
- uint a;
- int t;
-
- w = 0;
- cp = emalloc(fn->ntmp * sizeof cp[0]);
- for (b=fn->start; b; b=b->link) {
- for (p=b->phi; p; p=p->link)
- visitphi(p, cp, &w);
- for (i=b->ins; i-b->ins < b->nins; i++)
- visitins(i, cp, &w);
- }
- while ((w1=w)) {
- t = w->t;
- w = w->l;
- free(w1);
- u = fn->tmp[t].use;
- u1 = u + fn->tmp[t].nuse;
- for (; u<u1; u++)
- switch (u->type) {
- default:
- diag("copy: invalid use");
- case UPhi:
- visitphi(u->u.phi, cp, &w);
- break;
- case UIns:
- visitins(u->u.ins, cp, &w);
- break;
- case UJmp:
- break;
- }
- }
- for (b=fn->start; b; b=b->link) {
- for (pp=&b->phi; (p=*pp);) {
- r = cp[p->to.val];
- if (!req(r, p->to)) {
- *pp = p->link;
- continue;
- }
- for (a=0; a<p->narg; a++)
- if (rtype(p->arg[a]) == RTmp) {
- r = cp[p->arg[a].val];
- assert(!req(r, R));
- p->arg[a] = r;
- }
- pp=&p->link;
- }
- for (i=b->ins; i-b->ins < b->nins; i++) {
- r = cp[i->to.val];
- if (!req(r, i->to)) {
- *i = (Ins){.op = ONop};
- continue;
- }
- for (a=0; a<2; a++)
- if (rtype(i->arg[a]) == RTmp) {
- r = cp[i->arg[a].val];
- assert(!req(r, R));
- i->arg[a] = r;
- }
- }
- if (rtype(b->jmp.arg) == RTmp) {
- r = cp[b->jmp.arg.val];
- assert(!req(r, R));
- b->jmp.arg = r;
- }
- }
- if (debug['C']) {
- fprintf(stderr, "\n> Copy information:");
- for (t=Tmp0; t<fn->ntmp; t++) {
- if (req(cp[t], R)) {
- fprintf(stderr, "\n%10s not seen!",
- fn->tmp[t].name);
- }
- else if (!req(cp[t], TMP(t))) {
- fprintf(stderr, "\n%10s copy of ",
- fn->tmp[t].name);
- printref(cp[t], fn, stderr);
- }
- }
- fprintf(stderr, "\n\n> After copy elimination:\n");
- printfn(fn, stderr);
- }
- free(cp);
-}
diff --git a/lisc/emit.c b/lisc/emit.c
@@ -1,666 +0,0 @@
-#include "lisc.h"
-
-enum {
- SLong = 0,
- SWord = 1,
- SShort = 2,
- SByte = 3,
-
- Ki = -1, /* matches Kw and Kl */
- Ka = -2, /* matches all classes */
-};
-
-/* Instruction format strings:
- *
- * if the format string starts with -, the instruction
- * is assumed to be 3-address and is put in 2-address
- * mode using an extra mov if necessary
- *
- * if the format string starts with +, the same as the
- * above applies, but commutativity is also assumed
- *
- * %k is used to set the class of the instruction,
- * it'll expand to "l", "q", "ss", "sd", depending
- * on the instruction class
- * %0 designates the first argument
- * %1 designates the second argument
- * %= designates the result
- *
- * if %k is not used, a prefix to 0, 1, or = must be
- * added, it can be:
- * M - memory reference
- * L - long (64 bits)
- * W - word (32 bits)
- * H - short (16 bits)
- * B - byte (8 bits)
- * S - single precision float
- * D - double precision float
- */
-static struct {
- short op;
- short cls;
- char *asm;
-} omap[] = {
- { OAdd, Ka, "+add%k %1, %=" },
- { OSub, Ka, "-sub%k %1, %=" },
- { OAnd, Ki, "+and%k %1, %=" },
- { OOr, Ki, "+or%k %1, %=" },
- { OXor, Ki, "+xor%k %1, %=" },
- { OSar, Ki, "-sar%k %B1, %=" },
- { OShr, Ki, "-shr%k %B1, %=" },
- { OShl, Ki, "-shl%k %B1, %=" },
- { OMul, Ki, "+imul%k %1, %=" },
- { OMul, Ks, "+mulss %1, %=" }, /* fixme */
- { OMul, Kd, "+mulsd %1, %=" },
- { ODiv, Ka, "-div%k %1, %=" },
- { OStorel, Ka, "movq %L0, %M1" },
- { OStorew, Ka, "movl %W0, %M1" },
- { OStoreh, Ka, "movw %H0, %M1" },
- { OStoreb, Ka, "movb %B0, %M1" },
- { OStores, Ka, "movss %S0, %M1" },
- { OStored, Ka, "movsd %D0, %M1" },
- { OLoad, Ka, "mov%k %M0, %=" },
- { OLoadsw, Kl, "movslq %M0, %L=" },
- { OLoadsw, Kw, "movl %M0, %W=" },
- { OLoaduw, Ki, "movl %M0, %W=" },
- { OLoadsh, Ki, "movsw%k %M0, %=" },
- { OLoaduh, Ki, "movzw%k %M0, %=" },
- { OLoadsb, Ki, "movsb%k %M0, %=" },
- { OLoadub, Ki, "movzb%k %M0, %=" },
- { OExtsw, Kl, "movslq %W0, %L=" },
- { OExtuw, Kl, "movl %W0, %W=" },
- { OExtsh, Ki, "movsw%k %H0, %=" },
- { OExtuh, Ki, "movzw%k %H0, %=" },
- { OExtsb, Ki, "movsb%k %B0, %=" },
- { OExtub, Ki, "movzb%k %B0, %=" },
-
- { OExts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */
- { OTruncd, Ks, "cvttsd2ss %0, %=" },
- { OFtosi, Kw, "cvttss2si %0, %=" },
- { OFtosi, Kl, "cvttsd2si %0, %=" },
- { OSitof, Ks, "cvtsi2ss %W0, %=" },
- { OSitof, Kd, "cvtsi2sd %L0, %=" },
- { OCast, Ki, "movq %D0, %L=" },
- { OCast, Ka, "movq %L0, %D=" },
-
- { OAddr, Ki, "lea%k %M0, %=" },
- { OSwap, Ki, "xchg%k %0, %1" },
- { OSign, Kl, "cqto" },
- { OSign, Kw, "cltd" },
- { OXDiv, Ki, "div%k %0" },
- { OXIDiv, Ki, "idiv%k %0" },
- { OXCmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */
- { OXCmp, Kd, "comisd %D0, %D1" },
- { OXCmp, Ki, "cmp%k %0, %1" },
- { OXTest, Ki, "test%k %0, %1" },
- { OXSet+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
- { OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" },
- { OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" },
- { OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" },
- { OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" },
- { OXSet+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
- { OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" },
- { OXSet+ICXp, Ki, "setp %B=\n\tmovsb%k %B=, %=" },
- { NOp, 0, 0 }
-};
-
-static char *rname[][4] = {
- [RAX] = {"rax", "eax", "ax", "al"},
- [RBX] = {"rbx", "ebx", "bx", "bl"},
- [RCX] = {"rcx", "ecx", "cx", "cl"},
- [RDX] = {"rdx", "edx", "dx", "dl"},
- [RSI] = {"rsi", "esi", "si", "sil"},
- [RDI] = {"rdi", "edi", "di", "dil"},
- [RBP] = {"rbp", "ebp", "bp", "bpl"},
- [RSP] = {"rsp", "esp", "sp", "spl"},
- [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
- [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
- [R10] = {"r10", "r10d", "r10w", "r10b"},
- [R11] = {"r11", "r11d", "r11w", "r11b"},
- [R12] = {"r12", "r12d", "r12w", "r12b"},
- [R13] = {"r13", "r13d", "r13w", "r13b"},
- [R14] = {"r14", "r14d", "r14w", "r14b"},
- [R15] = {"r15", "r15d", "r15w", "r15b"},
-};
-
-
-static int
-slot(int s, Fn *fn)
-{
- struct { int i:14; } x;
-
- /* sign extend s using a bitfield */
- x.i = s;
- assert(NAlign == 3);
- if (x.i < 0)
- return -4 * x.i;
- else {
- assert(fn->slot >= x.i);
- return -4 * (fn->slot - x.i);
- }
-}
-
-static void
-emitcon(Con *con, FILE *f)
-{
- switch (con->type) {
- default:
- diag("emit: invalid constant");
- case CAddr:
- fputs(con->label, f);
- if (con->bits.i)
- fprintf(f, "%+"PRId64, con->bits.i);
- break;
- case CBits:
- fprintf(f, "%"PRId64, con->bits.i);
- break;
- }
-}
-
-static char *
-regtoa(int reg, int sz)
-{
- static char buf[6];
-
- if (reg >= XMM0) {
- sprintf(buf, "xmm%d", reg-XMM0);
- return buf;
- } else
- return rname[reg][sz];
-}
-
-static Ref
-getarg(char c, Ins *i)
-{
- switch (c) {
- default:
- diag("emit: 0, 1, = expected in format");
- case '0':
- return i->arg[0];
- case '1':
- return i->arg[1];
- case '=':
- return i->to;
- }
-}
-
-static void emitins(Ins, Fn *, FILE *);
-
-static void
-emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
-{
- Ins icp;
-
- icp.op = OCopy;
- icp.arg[0] = r2;
- icp.to = r1;
- icp.cls = k;
- emitins(icp, fn, f);
-}
-
-static void
-emitf(char *s, Ins *i, Fn *fn, FILE *f)
-{
- static char clstoa[][3] = {"l", "q", "ss", "sd"};
- char c;
- int sz;
- Ref ref;
- Mem *m;
- Con off;
-
- switch (*s) {
- case '+':
- if (req(i->arg[1], i->to)) {
- ref = i->arg[0];
- i->arg[0] = i->arg[1];
- i->arg[1] = ref;
- }
- /* fall through */
- case '-':
- if (req(i->arg[1], i->to) && !req(i->arg[0], i->to))
- diag("emit: cannot convert to 2-address");
- emitcopy(i->to, i->arg[0], i->cls, fn, f);
- s++;
- break;
- }
-
- fputc('\t', f);
-Next:
- while ((c = *s++) != '%')
- if (!c) {
- fputc('\n', f);
- return;
- } else
- fputc(c, f);
- switch ((c = *s++)) {
- default:
- diag("emit: invalid escape");
- case '%':
- fputc('%', f);
- break;
- case 'k':
- fputs(clstoa[i->cls], f);
- break;
- case '0':
- case '1':
- case '=':
- sz = KWIDE(i->cls) ? SLong : SWord;
- s--;
- /* fall through */
- case 'D':
- case 'S':
- Ref:
- c = *s++;
- ref = getarg(c, i);
- switch (rtype(ref)) {
- default:
- diag("emit: invalid reference");
- case RTmp:
- assert(isreg(ref));
- fprintf(f, "%%%s", regtoa(ref.val, sz));
- break;
- case RSlot:
- fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
- break;
- case RAMem:
- Mem:
- m = &fn->mem[ref.val & AMask];
- if (rtype(m->base) == RSlot) {
- off.type = CBits;
- off.bits.i = slot(m->base.val, fn);
- addcon(&m->offset, &off);
- m->base = TMP(RBP);
- }
- if (m->offset.type != CUndef)
- emitcon(&m->offset, f);
- if (req(m->base, R) && req(m->index, R))
- break;
- fputc('(', f);
- if (!req(m->base, R))
- fprintf(f, "%%%s", regtoa(m->base.val, SLong));
- if (!req(m->index, R))
- fprintf(f, ", %%%s, %d",
- regtoa(m->index.val, SLong),
- m->scale
- );
- fputc(')', f);
- break;
- case RCon:
- fputc('$', f);
- emitcon(&fn->con[ref.val], f);
- break;
- }
- break;
- case 'L':
- sz = SLong;
- goto Ref;
- case 'W':
- sz = SWord;
- goto Ref;
- case 'H':
- sz = SShort;
- goto Ref;
- case 'B':
- sz = SByte;
- goto Ref;
- case 'M':
- c = *s++;
- ref = getarg(c, i);
- switch (rtype(ref)) {
- default:
- diag("emit: invalid memory reference");
- case RAMem:
- goto Mem;
- case RSlot:
- fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
- break;
- case RCon:
- emitcon(&fn->con[ref.val], f);
- fprintf(f, "(%%rip)");
- break;
- case RTmp:
- assert(isreg(ref));
- fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
- break;
- }
- break;
- }
- goto Next;
-}
-
-static void
-emitins(Ins i, Fn *fn, FILE *f)
-{
- Ref r;
- int64_t val;
- int o;
-
- switch (i.op) {
- default:
- Table:
- /* most instructions are just pulled out of
- * the table omap[], some special cases are
- * detailed below */
- for (o=0;; o++) {
- /* this linear search should really be a binary
- * search */
- if (omap[o].op == NOp)
- diag("emit: no entry found for instruction");
- if (omap[o].op == i.op)
- if (omap[o].cls == i.cls
- || (omap[o].cls == Ki && KBASE(i.cls) == 0)
- || (omap[o].cls == Ka))
- break;
- }
- emitf(omap[o].asm, &i, fn, f);
- break;
- case ONop:
- /* just do nothing for nops, they are inserted
- * by some passes */
- break;
- case OMul:
- /* here, we try to use the 3-addresss form
- * of multiplication when possible */
- if (rtype(i.arg[1]) == RCon) {
- r = i.arg[0];
- i.arg[0] = i.arg[1];
- i.arg[1] = r;
- }
- if (KBASE(i.cls) == 0 /* only available for ints */
- && rtype(i.arg[0]) == RCon
- && rtype(i.arg[1]) == RTmp) {
- emitf("imul%k %0, %1, %=", &i, fn, f);
- break;
- }
- goto Table;
- case OSub:
- /* we have to use the negation trick to handle
- * some 3-address substractions */
- if (req(i.to, i.arg[1])) {
- emitf("neg%k %=", &i, fn, f);
- emitf("add%k %0, %=", &i, fn, f);
- break;
- }
- goto Table;
- case OCopy:
- /* make sure we don't emit useless copies,
- * also, we can use a trick to load 64-bits
- * registers, it's detailed in my note below
- * http://c9x.me/art/notes.html?09/19/2015 */
- if (req(i.to, R) || req(i.arg[0], R))
- break;
- if (isreg(i.to)
- && rtype(i.arg[0]) == RCon
- && i.cls == Kl
- && fn->con[i.arg[0].val].type == CBits
- && (val = fn->con[i.arg[0].val].bits.i) >= 0
- && val <= UINT32_MAX) {
- emitf("movl %W0, %W=", &i, fn, f);
- } else if (!req(i.arg[0], i.to))
- emitf("mov%k %0, %=", &i, fn, f);
- break;
- case OCall:
- /* calls simply have a weird syntax in AT&T
- * assembly... */
- switch (rtype(i.arg[0])) {
- default:
- diag("emit: invalid call instruction");
- case RCon:
- fprintf(f, "\tcallq ");
- emitcon(&fn->con[i.arg[0].val], f);
- fprintf(f, "\n");
- break;
- case RTmp:
- emitf("callq *%L0", &i, fn, f);
- break;
- }
- break;
- case OSAlloc:
- /* there is no good reason why this is here
- * maybe we should split OSAlloc in 2 different
- * instructions depending on the result
- */
- emitf("subq %L0, %%rsp", &i, fn, f);
- if (!req(i.to, R))
- emitcopy(i.to, TMP(RSP), Kl, fn, f);
- break;
- case OSwap:
- if (KBASE(i.cls) == 0)
- goto Table;
- /* for floats, there is no swap instruction
- * so we use xmm15 as a temporary
- */
- emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
- emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
- emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
- break;
- }
-}
-
-static int
-cneg(int cmp)
-{
- switch (cmp) {
- default: diag("emit: cneg() unhandled comparison");
- case ICule: return ICugt;
- case ICult: return ICuge;
- case ICsle: return ICsgt;
- case ICslt: return ICsge;
- case ICsgt: return ICsle;
- case ICsge: return ICslt;
- case ICugt: return ICule;
- case ICuge: return ICult;
- case ICeq: return ICne;
- case ICne: return ICeq;
- case ICXnp: return ICXp;
- case ICXp: return ICXnp;
- }
-}
-
-static int
-framesz(Fn *fn)
-{
- int i, o, f;
-
- assert(NAlign == 3);
- for (i=0, o=0; i<NRClob; i++)
- o ^= 1 & (fn->reg >> rclob[i]);
- f = fn->slot;
- f = (f + 3) & -4;
- return 4*f + 8*o;
-}
-
-void
-emitfn(Fn *fn, FILE *f)
-{
- static char *ctoa[] = {
- [ICeq] = "z",
- [ICule] = "be",
- [ICult] = "b",
- [ICsle] = "le",
- [ICslt] = "l",
- [ICsgt] = "g",
- [ICsge] = "ge",
- [ICugt] = "a",
- [ICuge] = "ae",
- [ICne] = "nz",
- [ICXnp] = "np",
- [ICXp] = "p"
- };
- Blk *b, *s;
- Ins *i, itmp;
- int *r, c, fs;
-
- fprintf(f,
- ".text\n"
- ".globl %s\n"
- ".type %s, @function\n"
- "%s:\n"
- "\tpush %%rbp\n"
- "\tmov %%rsp, %%rbp\n",
- fn->name, fn->name, fn->name
- );
- fs = framesz(fn);
- if (fs)
- fprintf(f, "\tsub $%d, %%rsp\n", fs);
- for (r=rclob; r-rclob < NRClob; r++)
- if (fn->reg & BIT(*r)) {
- itmp.arg[0] = TMP(*r);
- emitf("pushq %L0", &itmp, fn, f);
- }
-
- for (b=fn->start; b; b=b->link) {
- fprintf(f, ".L%s:\n", b->name);
- for (i=b->ins; i!=&b->ins[b->nins]; i++)
- emitins(*i, fn, f);
- switch (b->jmp.type) {
- case JRet0:
- for (r=&rclob[NRClob]; r>rclob;)
- if (fn->reg & BIT(*--r)) {
- itmp.arg[0] = TMP(*r);
- emitf("popq %L0", &itmp, fn, f);
- }
- fprintf(f,
- "\tleave\n"
- "\tret\n"
- );
- break;
- case JJmp:
- if (b->s1 != b->link)
- fprintf(f, "\tjmp .L%s\n", b->s1->name);
- break;
- default:
- c = b->jmp.type - JXJc;
- if (0 <= c && c <= NXICmp) {
- if (b->link == b->s2) {
- s = b->s1;
- } else if (b->link == b->s1) {
- c = cneg(c);
- s = b->s2;
- } else
- diag("emit: unhandled jump (1)");
- fprintf(f, "\tj%s .L%s\n", ctoa[c], s->name);
- break;
- }
- diag("emit: unhandled jump (2)");
- }
- }
-
-}
-
-void
-emitdat(Dat *d, FILE *f)
-{
- static int align;
- static char *dtoa[] = {
- [DAlign] = ".align",
- [DB] = "\t.byte",
- [DH] = "\t.value",
- [DW] = "\t.long",
- [DL] = "\t.quad"
- };
-
- switch (d->type) {
- case DStart:
- align = 0;
- fprintf(f, ".data\n");
- break;
- case DEnd:
- break;
- case DName:
- if (!align)
- fprintf(f, ".align 8\n");
- fprintf(f,
- ".globl %s\n"
- ".type %s, @object\n"
- "%s:\n",
- d->u.str, d->u.str, d->u.str
- );
- break;
- case DZ:
- fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
- break;
- default:
- if (d->type == DAlign)
- align = 1;
-
- if (d->isstr) {
- if (d->type != DB)
- err("strings only supported for 'b' currently");
- fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
- }
- else if (d->isref) {
- fprintf(f, "%s %s%+"PRId64"\n",
- dtoa[d->type], d->u.ref.nam,
- d->u.ref.off);
- }
- else {
- fprintf(f, "%s %"PRId64"\n",
- dtoa[d->type], d->u.num);
- }
- break;
- }
-}
-
-typedef struct FBits FBits;
-
-struct FBits {
- int64_t bits;
- int wide;
- FBits *link;
-};
-
-static FBits *stash;
-
-int
-stashfp(int64_t n, int w)
-{
- FBits **pb, *b;
- int i;
-
- /* does a dumb de-dup of fp constants
- * this should be the linker's job */
- for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
- if (n == b->bits && w == b->wide)
- return i;
- b = emalloc(sizeof *b);
- b->bits = n;
- b->wide = w;
- b->link = 0;
- *pb = b;
- return i;
-}
-
-void
-emitfin(FILE *f)
-{
- FBits *b;
- int i;
-
- if (!stash)
- return;
- fprintf(f, "/* floating point constants */\n");
- fprintf(f, ".data\n.align 8\n");
- for (b=stash, i=0; b; b=b->link, i++)
- if (b->wide)
- fprintf(f,
- ".Lfp%d:\n"
- "\t.quad %"PRId64
- " /* %f */\n",
- i, b->bits,
- *(double *)&b->bits
- );
- for (b=stash, i=0; b; b=b->link, i++)
- if (!b->wide)
- fprintf(f,
- ".Lfp%d:\n"
- "\t.long %"PRId64
- " /* %lf */\n",
- i, b->bits & 0xffffffff,
- *(float *)&b->bits
- );
- while ((b=stash)) {
- stash = b->link;
- free(b);
- }
-}
diff --git a/lisc/isel.c b/lisc/isel.c
@@ -1,1135 +0,0 @@
-#include "lisc.h"
-#include <limits.h>
-
-/* For x86_64, do the following:
- *
- * - lower calls
- * - check that constants are used only in
- * places allowed
- * - ensure immediates always fit in 32b
- * - explicit machine register contraints
- * on instructions like division.
- * - implement fast locals (the streak of
- * constant allocX in the first basic block)
- * - recognize complex addressing modes
- *
- * Invariant: the use counts that are used
- * in sel() must be sound. This
- * is not so trivial, maybe the
- * dce should be moved out...
- */
-
-typedef struct ANum ANum;
-typedef struct AClass AClass;
-typedef struct RAlloc RAlloc;
-
-struct ANum {
- char n, l, r;
- Ins *i;
- Ref mem;
-};
-
-static void amatch(Addr *, Ref, ANum *, Fn *, int);
-
-static int
-fcmptoi(int fc)
-{
- switch (fc) {
- default: diag("isel: fcmptoi defaulted");
- case FCle: return ICule;
- case FClt: return ICult;
- case FCgt: return ICugt;
- case FCge: return ICuge;
- case FCne: return ICne;
- case FCeq: return ICeq;
- case FCo: return ICXnp;
- case FCuo: return ICXp;
- }
-}
-
-static int
-iscmp(int op, int *pk, int *pc)
-{
- int k, c;
-
- if (OCmpw <= op && op <= OCmpw1) {
- c = op - OCmpw;
- k = Kw;
- }
- else if (OCmpl <= op && op <= OCmpl1) {
- c = op - OCmpl;
- k = Kl;
- }
- else if (OCmps <= op && op <= OCmps1) {
- c = fcmptoi(op - OCmps);
- k = Ks;
- }
- else if (OCmpd <= op && op <= OCmpd1) {
- c = fcmptoi(op - OCmpd);
- k = Kd;
- }
- else
- return 0;
- if (pk)
- *pk = k;
- if (pc)
- *pc = c;
- return 1;
-}
-
-static int
-noimm(Ref r, Fn *fn)
-{
- int64_t val;
-
- if (rtype(r) != RCon)
- return 0;
- switch (fn->con[r.val].type) {
- default:
- diag("isel: invalid constant");
- case CAddr:
- /* we only support the 'small'
- * code model of the ABI, this
- * means that we can always
- * address data with 32bits
- */
- return 0;
- case CBits:
- val = fn->con[r.val].bits.i;
- return (val < INT32_MIN || val > INT32_MAX);
- }
-}
-
-static int
-rslot(Ref r, Fn *fn)
-{
- if (rtype(r) != RTmp)
- return -1;
- return fn->tmp[r.val].slot;
-}
-
-static int
-argcls(Ins *i, int n)
-{
- return opdesc[i->op].argcls[n][i->cls];
-}
-
-static void
-fixarg(Ref *r, int k, int phi, Fn *fn)
-{
- Addr a;
- Ref r0, r1;
- int s, n;
-
- r1 = r0 = *r;
- s = rslot(r0, fn);
- if (KBASE(k) == 1 && rtype(r0) == RCon) {
- /* load floating points from memory
- * slots, they can't be used as
- * immediates
- */
- r1 = MEM(fn->nmem);
- vgrow(&fn->mem, ++fn->nmem);
- memset(&a, 0, sizeof a);
- a.offset.type = CAddr;
- n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
- sprintf(a.offset.label, ".Lfp%d", n);
- fn->mem[fn->nmem-1] = a;
- }
- else if (!phi && k == Kl && noimm(r0, fn)) {
- /* load constants that do not fit in
- * a 32bit signed integer into a
- * long temporary
- */
- r1 = newtmp("isel", Kl, fn);
- emit(OCopy, Kl, r1, r0, R);
- }
- else if (s != -1) {
- /* load fast locals' addresses into
- * temporaries right before the
- * instruction
- */
- r1 = newtmp("isel", Kl, fn);
- emit(OAddr, Kl, r1, SLOT(s), R);
- }
- *r = r1;
-}
-
-static void
-chuse(Ref r, int du, Fn *fn)
-{
- if (rtype(r) == RTmp)
- fn->tmp[r.val].nuse += du;
-}
-
-static void
-seladdr(Ref *r, ANum *an, Fn *fn)
-{
- Addr a;
- Ref r0, r1;
-
- r0 = *r;
- if (rtype(r0) == RTmp) {
- chuse(r0, -1, fn);
- r1 = an[r0.val].mem;
- if (req(r1, R)) {
- amatch(&a, r0, an, fn, 1);
- vgrow(&fn->mem, ++fn->nmem);
- fn->mem[fn->nmem-1] = a;
- r1 = MEM(fn->nmem-1);
- chuse(a.base, +1, fn);
- chuse(a.index, +1, fn);
- if (rtype(a.base) != RTmp)
- if (rtype(a.index) != RTmp)
- an[r0.val].mem = r1;
- }
- *r = r1;
- }
-}
-
-static void
-selcmp(Ref arg[2], int k, Fn *fn)
-{
- Ref r;
-
- if (rtype(arg[0]) == RCon) {
- r = arg[1];
- arg[1] = arg[0];
- arg[0] = r;
- }
- assert(rtype(arg[0]) != RCon);
- emit(OXCmp, k, R, arg[1], arg[0]);
- fixarg(&curi->arg[0], k, 0, fn);
-}
-
-static void
-sel(Ins i, ANum *an, Fn *fn)
-{
- Ref r0, r1;
- int x, k, kc;
- int64_t val;
- Ins *i0;
-
- if (rtype(i.to) == RTmp)
- if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
- if (fn->tmp[i.to.val].nuse == 0) {
- chuse(i.arg[0], -1, fn);
- chuse(i.arg[1], -1, fn);
- return;
- }
- i0 = curi;
- k = i.cls;
- switch (i.op) {
- case ODiv:
- case ORem:
- case OUDiv:
- case OURem:
- if (i.op == ODiv || i.op == OUDiv)
- r0 = TMP(RAX), r1 = TMP(RDX);
- else
- r0 = TMP(RDX), r1 = TMP(RAX);
- emit(OCopy, k, i.to, r0, R);
- emit(OCopy, k, R, r1, R);
- if (rtype(i.arg[1]) == RCon) {
- /* immediates not allowed for
- * divisions in x86
- */
- r0 = newtmp("isel", k, fn);
- } else
- r0 = i.arg[1];
- if (i.op == ODiv || i.op == ORem) {
- emit(OXIDiv, k, R, r0, R);
- emit(OSign, k, TMP(RDX), TMP(RAX), R);
- } else {
- emit(OXDiv, k, R, r0, R);
- emit(OCopy, k, TMP(RDX), CON_Z, R);
- }
- emit(OCopy, k, TMP(RAX), i.arg[0], R);
- if (rtype(i.arg[1]) == RCon)
- emit(OCopy, k, r0, i.arg[1], R);
- break;
- case OSar:
- case OShr:
- case OShl:
- if (rtype(i.arg[1]) == RCon)
- goto Emit;
- r0 = i.arg[1];
- i.arg[1] = TMP(RCX);
- emit(OCopy, Kw, R, TMP(RCX), R);
- emiti(i);
- emit(OCopy, Kw, TMP(RCX), r0, R);
- break;
- case ONop:
- break;
- case OStored:
- case OStores:
- case OStorel:
- case OStorew:
- case OStoreh:
- case OStoreb:
- if (rtype(i.arg[0]) == RCon) {
- if (i.op == OStored)
- i.op = OStorel;
- if (i.op == OStores)
- i.op = OStorew;
- }
- seladdr(&i.arg[1], an, fn);
- goto Emit;
- case_OLoad:
- seladdr(&i.arg[0], an, fn);
- goto Emit;
- case OCall:
- case OSAlloc:
- case OCopy:
- case OAdd:
- case OSub:
- case OMul:
- case OAnd:
- case OOr:
- case OXor:
- case OXTest:
- case OFtosi:
- case OSitof:
- case OExts:
- case OTruncd:
- case OCast:
- case_OExt:
-Emit:
- emiti(i);
- fixarg(&curi->arg[0], argcls(curi, 0), 0, fn);
- fixarg(&curi->arg[1], argcls(curi, 1), 0, fn);
- break;
- case OAlloc:
- case OAlloc+1:
- case OAlloc+2: /* == OAlloc1 */
- /* we need to make sure
- * the stack remains aligned
- * (rsp = 0) mod 16
- */
- if (rtype(i.arg[0]) == RCon) {
- assert(fn->con[i.arg[0].val].type == CBits);
- val = fn->con[i.arg[0].val].bits.i;
- val = (val + 15) & ~INT64_C(15);
- if (val < 0 || val > INT32_MAX)
- diag("isel: alloc too large");
- emit(OSAlloc, Kl, i.to, getcon(val, fn), R);
- } else {
- /* r0 = (i.arg[0] + 15) & -16 */
- r0 = newtmp("isel", Kl, fn);
- r1 = newtmp("isel", Kl, fn);
- emit(OSAlloc, Kl, i.to, r0, R);
- emit(OAnd, Kl, r0, r1, getcon(-16, fn));
- emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn));
- }
- break;
- default:
- if (isext(i.op))
- goto case_OExt;
- if (isload(i.op))
- goto case_OLoad;
- if (iscmp(i.op, &kc, &x)) {
- if (rtype(i.arg[0]) == RCon)
- x = icmpop(x);
- emit(OXSet+x, k, i.to, R, R);
- selcmp(i.arg, kc, fn);
- break;
- }
- diag("isel: non-exhaustive implementation");
- }
-
- while (i0 > curi && --i0)
- if (rslot(i0->arg[0], fn) != -1
- || rslot(i0->arg[1], fn) != -1)
- diag("isel: usupported address argument");
-}
-
-static Ins *
-flagi(Ins *i0, Ins *i)
-{
- while (i>i0) {
- i--;
- if (opdesc[i->op].sflag)
- return i;
- if (opdesc[i->op].lflag)
- continue;
- return 0;
- }
- return 0;
-}
-
-struct AClass {
- int inmem;
- int align;
- uint size;
- int cls[2];
-};
-
-static void
-aclass(AClass *a, Typ *t)
-{
- int e, s, n, cls;
- uint sz, al;
-
- sz = t->size;
- al = 1u << t->align;
-
- /* the ABI requires sizes to be rounded
- * up to the nearest multiple of 8, moreover
- * it makes it easy load and store structures
- * in registers
- */
- if (al < 8)
- al = 8;
- sz = (sz + al-1) & -al;
-
- a->size = sz;
- a->align = t->align;
-
- if (t->dark || sz > 16) {
- /* large or unaligned structures are
- * required to be passed in memory
- */
- a->inmem = 1;
- return;
- }
-
- a->inmem = 0;
- for (e=0, s=0; e<2; e++) {
- cls = -1;
- for (n=0; n<8 && t->seg[s].len; s++) {
- if (t->seg[s].ispad) {
- /* don't change anything */
- }
- else if (t->seg[s].isflt) {
- if (cls == -1)
- cls = Kd;
- }
- else
- cls = Kl;
- n += t->seg[s].len;
- }
- assert(n <= 8);
- a->cls[e] = cls;
- }
-}
-
-static void
-blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
-{
- Ref r, r1;
- uint boff;
-
- /* it's an impolite blit, we might go across the end
- * of the source object a little bit... */
- for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
- r = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(OStorel, 0, R, r, r1);
- emit(OAdd, Kl, r1, rstk, getcon(soff, fn));
- r1 = newtmp("abi", Kl, fn);
- emit(OLoad, Kl, r, r1, R);
- emit(OAdd, Kl, r1, rsrc, getcon(boff, fn));
- chuse(rsrc, +1, fn);
- chuse(rstk, +1, fn);
- }
-}
-
-static int
-retr(Ref reg[2], AClass *aret)
-{
- static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
- int n, k, ca, nr[2];
-
- nr[0] = nr[1] = 0;
- ca = 0;
- for (n=0; aret->cls[n]>=0 && n<2; n++) {
- k = KBASE(aret->cls[n]);
- reg[n] = TMP(retreg[k][nr[k]++]);
- ca += 1 << (2 * k);
- }
- return ca;
-}
-
-static void
-selret(Blk *b, Fn *fn)
-{
- int j, k, ca;
- Ref r, r0, reg[2];
- AClass aret;
-
- j = b->jmp.type;
-
- if (!isret(j) || j == JRet0)
- return;
-
- r0 = b->jmp.arg;
- b->jmp.type = JRet0;
-
- if (j == JRetc) {
- aclass(&aret, &typ[fn->retty]);
- if (aret.inmem) {
- assert(rtype(fn->retr) == RTmp);
- emit(OCopy, Kl, TMP(RAX), fn->retr, R);
- chuse(fn->retr, +1, fn);
- blit(fn->retr, 0, r0, aret.size, fn);
- ca = 1;
- } else {
- ca = retr(reg, &aret);
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- emit(OLoad, Kl, reg[1], r, R);
- emit(OAdd, Kl, r, r0, getcon(8, fn));
- chuse(r0, +1, fn);
- }
- emit(OLoad, Kl, reg[0], r0, R);
- }
- } else {
- k = j - JRetw;
- if (KBASE(k) == 0) {
- emit(OCopy, k, TMP(RAX), r0, R);
- ca = 1;
- } else {
- emit(OCopy, k, TMP(XMM0), r0, R);
- ca = 1 << 2;
- }
- }
-
- b->jmp.arg = CALL(ca);
-}
-
-static void
-seljmp(Blk *b, Fn *fn)
-{
- Ref r;
- int c, k;
- Ins *fi;
-
- if (b->jmp.type == JRet0 || b->jmp.type == JJmp)
- return;
- assert(b->jmp.type == JJnz);
- r = b->jmp.arg;
- b->jmp.arg = R;
- assert(!req(r, R));
- if (rtype(r) == RCon) {
- b->jmp.type = JJmp;
- if (req(r, CON_Z))
- b->s1 = b->s2;
- b->s2 = 0;
- return;
- }
- fi = flagi(b->ins, &b->ins[b->nins]);
- if (fi && req(fi->to, r)) {
- if (iscmp(fi->op, &k, &c)) {
- if (rtype(fi->arg[0]) == RCon)
- c = icmpop(c);
- b->jmp.type = JXJc + c;
- if (fn->tmp[r.val].nuse == 1) {
- assert(fn->tmp[r.val].ndef == 1);
- selcmp(fi->arg, k, fn);
- *fi = (Ins){.op = ONop};
- }
- return;
- }
- if (fi->op == OAnd && fn->tmp[r.val].nuse == 1
- && (rtype(fi->arg[0]) == RTmp ||
- rtype(fi->arg[1]) == RTmp)) {
- fi->op = OXTest;
- fi->to = R;
- b->jmp.type = JXJc + ICne;
- if (rtype(fi->arg[1]) == RCon) {
- r = fi->arg[1];
- fi->arg[1] = fi->arg[0];
- fi->arg[0] = r;
- }
- return;
- }
- /* since flags are not tracked in liveness,
- * the result of the flag-setting instruction
- * has to be marked as live
- */
- if (fn->tmp[r.val].nuse == 1)
- emit(OCopy, Kw, R, r, R);
- b->jmp.type = JXJc + ICne;
- return;
- }
- selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */
- b->jmp.type = JXJc + ICne;
-}
-
-static int
-classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret)
-{
- int nint, ni, nsse, ns, n, *pn;
- AClass *a;
- Ins *i;
-
- if (aret && aret->inmem)
- nint = 5; /* hidden argument */
- else
- nint = 6;
- nsse = 8;
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (i->op == op) {
- if (KBASE(i->cls) == 0)
- pn = &nint;
- else
- pn = &nsse;
- if (*pn > 0) {
- --*pn;
- a->inmem = 0;
- } else
- a->inmem = 2;
- a->align = 3;
- a->size = 8;
- a->cls[0] = i->cls;
- } else {
- n = i->arg[0].val & AMask;
- aclass(a, &typ[n]);
- if (a->inmem)
- continue;
- ni = ns = 0;
- for (n=0; n<2; n++)
- if (KBASE(a->cls[n]) == 0)
- ni++;
- else
- ns++;
- if (nint >= ni && nsse >= ns) {
- nint -= ni;
- nsse -= ns;
- } else
- a->inmem = 1;
- }
- }
-
- return ((6-nint) << 4) | ((8-nsse) << 8);
-}
-
-int rsave[] = {
- RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
-};
-int rclob[] = {RBX, R12, R13, R14, R15};
-
-MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
-MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
-
-bits
-retregs(Ref r, int p[2])
-{
- bits b;
- int ni, nf;
-
- assert(rtype(r) == RACall);
- b = 0;
- ni = r.val & 3;
- nf = (r.val >> 2) & 3;
- if (ni >= 1)
- b |= BIT(RAX);
- if (ni >= 2)
- b |= BIT(RDX);
- if (nf >= 1)
- b |= BIT(XMM0);
- if (nf >= 2)
- b |= BIT(XMM1);
- if (p) {
- p[0] = ni;
- p[1] = nf;
- }
- return b;
-}
-
-bits
-argregs(Ref r, int p[2])
-{
- bits b;
- int j, ni, nf;
-
- assert(rtype(r) == RACall);
- b = 0;
- ni = (r.val >> 4) & 15;
- nf = (r.val >> 8) & 15;
- for (j=0; j<ni; j++)
- b |= BIT(rsave[j]);
- for (j=0; j<nf; j++)
- b |= BIT(XMM0+j);
- if (p) {
- p[0] = ni + 1;
- p[1] = nf;
- }
- return b | BIT(RAX);
-}
-
-static Ref
-rarg(int ty, int *ni, int *ns)
-{
- if (KBASE(ty) == 0)
- return TMP(rsave[(*ni)++]);
- else
- return TMP(XMM0 + (*ns)++);
-}
-
-struct RAlloc {
- Ins i;
- RAlloc *link;
-};
-
-static void
-selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
-{
- Ins *i;
- AClass *ac, *a, aret;
- int ca, ni, ns;
- uint stk, off;
- Ref r, r1, r2, reg[2], regcp[2];
- RAlloc *ra;
-
- ac = alloc((i1-i0) * sizeof ac[0]);
- if (!req(i1->arg[1], R)) {
- assert(rtype(i1->arg[1]) == RAType);
- aclass(&aret, &typ[i1->arg[1].val & AMask]);
- ca = classify(i0, i1, ac, OArg, &aret);
- } else
- ca = classify(i0, i1, ac, OArg, 0);
-
- for (stk=0, a=&ac[i1-i0]; a>ac;)
- if ((--a)->inmem) {
- assert(a->align <= 4);
- stk += a->size;
- if (a->align == 4)
- stk += stk & 15;
- }
- stk += stk & 15;
- if (stk) {
- r = getcon(-(int64_t)stk, fn);
- emit(OSAlloc, Kl, R, r, R);
- }
-
- if (!req(i1->arg[1], R)) {
- if (aret.inmem) {
- /* get the return location from eax
- * it saves one callee-save reg */
- r1 = newtmp("abi", Kl, fn);
- emit(OCopy, Kl, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- regcp[1] = newtmp("abi", aret.cls[1], fn);
- emit(OStorel, 0, R, regcp[1], r);
- emit(OAdd, Kl, r, i1->to, getcon(8, fn));
- chuse(i1->to, +1, fn);
- ca += 1 << (2 * KBASE(aret.cls[1]));
- }
- regcp[0] = newtmp("abi", aret.cls[0], fn);
- emit(OStorel, 0, R, regcp[0], i1->to);
- ca += 1 << (2 * KBASE(aret.cls[0]));
- retr(reg, &aret);
- if (aret.size > 8)
- emit(OCopy, aret.cls[1], regcp[1], reg[1], R);
- emit(OCopy, aret.cls[0], regcp[0], reg[0], R);
- r1 = i1->to;
- }
- /* allocate return pad */
- ra = alloc(sizeof *ra);
- assert(NAlign == 3);
- aret.align -= 2;
- if (aret.align < 0)
- aret.align = 0;
- ra->i.op = OAlloc + aret.align;
- ra->i.cls = Kl;
- ra->i.to = r1;
- ra->i.arg[0] = getcon(aret.size, fn);
- ra->link = (*rap);
- *rap = ra;
- } else {
- ra = 0;
- if (KBASE(i1->cls) == 0) {
- emit(OCopy, i1->cls, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- emit(OCopy, i1->cls, i1->to, TMP(XMM0), R);
- ca += 1 << 2;
- }
- }
- emit(OCall, i1->cls, R, i1->arg[0], CALL(ca));
- emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
-
- ni = ns = 0;
- if (ra && aret.inmem)
- emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (a->inmem)
- continue;
- r1 = rarg(a->cls[0], &ni, &ns);
- if (i->op == OArgc) {
- if (a->size > 8) {
- r2 = rarg(a->cls[1], &ni, &ns);
- r = newtmp("abi", Kl, fn);
- emit(OLoad, a->cls[1], r2, r, R);
- emit(OAdd, Kl, r, i->arg[1], getcon(8, fn));
- chuse(i->arg[1], +1, fn);
- }
- emit(OLoad, a->cls[0], r1, i->arg[1], R);
- } else
- emit(OCopy, i->cls, r1, i->arg[0], R);
- }
-
- if (!stk)
- return;
-
- r = newtmp("abi", Kl, fn);
- chuse(r, -1, fn);
- for (i=i0, a=ac, off=0; i<i1; i++, a++) {
- if (!a->inmem)
- continue;
- if (i->op == OArgc) {
- if (a->align == 4)
- off += off & 15;
- blit(r, off, i->arg[1], a->size, fn);
- } else {
- r1 = newtmp("abi", Kl, fn);
- emit(OStorel, 0, R, i->arg[0], r1);
- emit(OAdd, Kl, r1, r, getcon(off, fn));
- chuse(r, +1, fn);
- }
- off += a->size;
- }
- emit(OSAlloc, Kl, r, getcon(stk, fn), R);
-}
-
-static void
-selpar(Fn *fn, Ins *i0, Ins *i1)
-{
- AClass *ac, *a, aret;
- Ins *i;
- int ni, ns, s, al;
- Ref r, r1;
-
- ac = alloc((i1-i0) * sizeof ac[0]);
- curi = insb;
- ni = ns = 0;
-
- if (fn->retty >= 0) {
- aclass(&aret, &typ[fn->retty]);
- if (aret.inmem) {
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl};
- fn->retr = r;
- }
- classify(i0, i1, ac, OPar, &aret);
- } else
- classify(i0, i1, ac, OPar, 0);
-
- assert(NAlign == 3);
-
- s = 4;
- for (i=i0, a=ac; i<i1; i++, a++) {
- switch (a->inmem) {
- case 1:
- assert(a->align <= 4);
- if (a->align == 4)
- s = (s+3) & -4;
- fn->tmp[i->to.val].slot = -s; /* HACK! */
- s += a->size / 4;
- continue;
- case 2:
- *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls};
- s += 2;
- continue;
- }
- r1 = rarg(a->cls[0], &ni, &ns);
- if (i->op == OParc) {
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OCopy, r, {r1}, Kl};
- a->cls[0] = r.val;
- if (a->size > 8) {
- r1 = rarg(a->cls[1], &ni, &ns);
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OCopy, r, {r1}, Kl};
- a->cls[1] = r.val;
- }
- } else
- *curi++ = (Ins){OCopy, i->to, {r1}, i->cls};
- }
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (i->op != OParc || a->inmem)
- continue;
- assert(NAlign == 3);
- for (al=0; a->align >> (al+2); al++)
- ;
- r = TMP(a->cls[0]);
- r1 = i->to;
- *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl};
- *curi++ = (Ins){OStorel, R, {r, r1}, 0};
- if (a->size > 8) {
- r = newtmp("abi", Kl, fn);
- *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl};
- r1 = TMP(a->cls[1]);
- *curi++ = (Ins){OStorel, R, {r1, r}, 0};
- }
- }
-}
-
-static int
-aref(Ref r, ANum *ai)
-{
- switch (rtype(r)) {
- default:
- diag("isel: aref defaulted");
- case RCon:
- return 2;
- case RTmp:
- return ai[r.val].n;
- }
-}
-
-static int
-ascale(Ref r, Con *con)
-{
- int64_t n;
-
- if (rtype(r) != RCon)
- return 0;
- if (con[r.val].type != CBits)
- return 0;
- n = con[r.val].bits.i;
- return n == 1 || n == 2 || n == 4 || n == 8;
-}
-
-static void
-anumber(ANum *ai, Blk *b, Con *con)
-{
- /* This should be made obsolete by a proper
- * reassoc pass.
- *
- * Rules:
- *
- * RTmp(_) -> 0 tmp
- * ( RTmp(_) -> 1 slot )
- * RCon(_) -> 2 con
- * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
- */
- static char add[10][10] = {
- [2] [2] = 2, /* folding */
- [2] [5] = 5, [5] [2] = 5,
- [2] [6] = 6, [6] [2] = 6,
- [2] [7] = 7, [7] [2] = 7,
- [0] [0] = 4, /* 4: b + s * i */
- [0] [3] = 4, [3] [0] = 4,
- [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
- [0] [2] = 6, [2] [0] = 6, /* 6: o + b */
- [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
- [0] [5] = 7, [5] [0] = 7,
- [6] [3] = 7, [3] [6] = 7,
-
- };
- int a, a1, a2, n1, n2, t1, t2;
- Ins *i;
-
- for (i=b->ins; i-b->ins < b->nins; i++) {
- if (rtype(i->to) == RTmp)
- ai[i->to.val].i = i;
- if (i->op != OAdd && i->op != OMul)
- continue;
- a1 = aref(i->arg[0], ai);
- a2 = aref(i->arg[1], ai);
- t1 = a1 != 1 && a1 != 2;
- t2 = a2 != 1 && a2 != 2;
- if (i->op == OAdd) {
- a = add[n1 = a1][n2 = a2];
- if (t1 && a < add[0][a2])
- a = add[n1 = 0][n2 = a2];
- if (t2 && a < add[a1][0])
- a = add[n1 = a1][n2 = 0];
- if (t1 && t2 && a < add[0][0])
- a = add[n1 = 0][n2 = 0];
- } else {
- n1 = n2 = a = 0;
- if (ascale(i->arg[0], con) && t2)
- a = 3, n1 = 2, n2 = 0;
- if (t1 && ascale(i->arg[1], con))
- a = 3, n1 = 0, n2 = 2;
- }
- ai[i->to.val].n = a;
- ai[i->to.val].l = n1;
- ai[i->to.val].r = n2;
- }
-}
-
-static void
-amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
-{
- Ins *i;
- int nl, nr, t, s;
- Ref al, ar;
-
- if (top)
- memset(a, 0, sizeof *a);
- if (rtype(r) == RCon) {
- addcon(&a->offset, &fn->con[r.val]);
- return;
- }
- assert(rtype(r) == RTmp);
- i = ai[r.val].i;
- nl = ai[r.val].l;
- nr = ai[r.val].r;
- if (i) {
- if (nl > nr) {
- al = i->arg[1];
- ar = i->arg[0];
- t = nl, nl = nr, nr = t;
- } else {
- al = i->arg[0];
- ar = i->arg[1];
- }
- }
- switch (ai[r.val].n) {
- default:
- diag("isel: amatch defaulted");
- case 3: /* s * i */
- if (!top) {
- a->index = al;
- a->scale = fn->con[ar.val].bits.i;
- } else
- a->base = r;
- break;
- case 4: /* b + s * i */
- switch (nr) {
- case 0:
- if (fn->tmp[ar.val].slot != -1) {
- al = i->arg[1];
- ar = i->arg[0];
- }
- a->index = ar;
- a->scale = 1;
- break;
- case 3:
- amatch(a, ar, ai, fn, 0);
- break;
- }
- r = al;
- case 0:
- s = fn->tmp[r.val].slot;
- if (s != -1)
- r = SLOT(s);
- a->base = r;
- break;
- case 2: /* constants */
- case 5: /* o + s * i */
- case 6: /* o + b */
- case 7: /* o + b + s * i */
- amatch(a, ar, ai, fn, 0);
- amatch(a, al, ai, fn, 0);
- break;
- }
-}
-
-/* instruction selection
- * requires use counts (as given by parsing)
- */
-void
-isel(Fn *fn)
-{
- Blk *b, **sb;
- Ins *i, *i0, *ip;
- Phi *p;
- uint a;
- int n, al;
- int64_t sz;
- ANum *ainfo;
- RAlloc *ral;
-
- for (n=0; n<fn->ntmp; n++)
- fn->tmp[n].slot = -1;
- fn->slot = 0;
-
- /* lower arguments */
- for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
- if (i->op != OPar && i->op != OParc)
- break;
- selpar(fn, b->ins, i);
- n = b->nins - (i - b->ins) + (curi - insb);
- i0 = alloc(n * sizeof(Ins));
- ip = icpy(ip = i0, insb, curi - insb);
- ip = icpy(ip, i, &b->ins[b->nins] - i);
- b->nins = n;
- b->ins = i0;
-
- /* lower function calls and returns */
- ral = 0;
- b = fn->start;
- do {
- if (!(b = b->link))
- b = fn->start; /* do it last */
- curi = &insb[NIns];
- selret(b, fn);
- for (i=&b->ins[b->nins]; i!=b->ins;) {
- if ((--i)->op == OCall) {
- for (i0=i; i0>b->ins; i0--)
- if ((i0-1)->op != OArg)
- if ((i0-1)->op != OArgc)
- break;
- selcall(fn, i0, i, &ral);
- i = i0;
- continue;
- }
- assert(i->op != OArg && i->op != OArgc);
- emiti(*i);
- }
- if (b == fn->start)
- for (; ral; ral=ral->link)
- emiti(ral->i);
- b->nins = &insb[NIns] - curi;
- idup(&b->ins, curi, b->nins);
- } while (b != fn->start);
-
- if (debug['A']) {
- fprintf(stderr, "\n> After call lowering:\n");
- printfn(fn, stderr);
- }
-
- /* assign slots to fast allocs */
- b = fn->start;
- assert(NAlign == 3 && "change n=4 and sz /= 4 below");
- for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2)
- for (i=b->ins; i-b->ins < b->nins; i++)
- if (i->op == al) {
- if (rtype(i->arg[0]) != RCon)
- break;
- sz = fn->con[i->arg[0].val].bits.i;
- if (sz < 0 || sz >= INT_MAX-3)
- diag("isel: invalid alloc size");
- sz = (sz + n-1) & -n;
- sz /= 4;
- fn->tmp[i->to.val].slot = fn->slot;
- fn->slot += sz;
- *i = (Ins){.op = ONop};
- }
-
- /* process basic blocks */
- n = fn->ntmp;
- ainfo = emalloc(n * sizeof ainfo[0]);
- for (b=fn->start; b; b=b->link) {
- curi = &insb[NIns];
- for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
- for (p=(*sb)->phi; p; p=p->link) {
- for (a=0; p->blk[a] != b; a++)
- assert(a+1 < p->narg);
- fixarg(&p->arg[a], p->cls, 1, fn);
- }
- memset(ainfo, 0, n * sizeof ainfo[0]);
- anumber(ainfo, b, fn->con);
- seljmp(b, fn);
- for (i=&b->ins[b->nins]; i!=b->ins;)
- sel(*--i, ainfo, fn);
- b->nins = &insb[NIns] - curi;
- idup(&b->ins, curi, b->nins);
- }
- free(ainfo);
-
- if (debug['I']) {
- fprintf(stderr, "\n> After instruction selection:\n");
- printfn(fn, stderr);
- }
-}
diff --git a/lisc/live.c b/lisc/live.c
@@ -1,174 +0,0 @@
-#include "lisc.h"
-
-void
-liveon(BSet *v, Blk *b, Blk *s)
-{
- Phi *p;
- uint a;
-
- bscopy(v, s->in);
- for (p=s->phi; p; p=p->link) {
- bsclr(v, p->to.val);
- for (a=0; a<p->narg; a++)
- if (p->blk[a] == b)
- if (rtype(p->arg[a]) == RTmp)
- bsset(v, p->arg[a].val);
- }
-}
-
-static int
-phitmp(int t, Tmp *tmp)
-{
- int tp;
-
- tp = tmp[t].phi;
- return tp ? tp : t;
-}
-
-static void
-phifix(int t1, short *phi, Tmp *tmp)
-{
- int t, t2;
-
- /* detect temporaries arguments
- * of the same phi node that
- * interfere and separate them
- */
- t = phitmp(t1, tmp);
- t2 = phi[t];
- if (t2 && t2 != t1) {
- if (t != t1) {
- tmp[t1].phi = t1;
- t = t1;
- } else {
- tmp[t2].phi = t2;
- phi[t2] = t2;
- }
- }
- phi[t] = t1;
-}
-
-static void
-bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp)
-{
-
- if (rtype(r) != RTmp)
- return;
- bsset(b->gen, r.val);
- phifix(r.val, phi, tmp);
- if (!bshas(b->in, r.val)) {
- nlv[KBASE(tmp[r.val].cls)]++;
- bsset(b->in, r.val);
- }
-}
-
-/* liveness analysis
- * requires rpo computation
- */
-void
-filllive(Fn *f)
-{
- Blk *b;
- Ins *i;
- int k, t, m[2], n, chg, nlv[2];
- short *phi;
- BSet u[1], v[1];
- Mem *ma;
-
- bsinit(u, f->ntmp);
- bsinit(v, f->ntmp);
- phi = emalloc(f->ntmp * sizeof phi[0]);
- for (b=f->start; b; b=b->link) {
- bsinit(b->in, f->ntmp);
- bsinit(b->out, f->ntmp);
- bsinit(b->gen, f->ntmp);
- }
- chg = 1;
-Again:
- for (n=f->nblk-1; n>=0; n--) {
- b = f->rpo[n];
-
- bscopy(u, b->out);
- if (b->s1) {
- liveon(v, b, b->s1);
- bsunion(b->out, v);
- }
- if (b->s2) {
- liveon(v, b, b->s2);
- bsunion(b->out, v);
- }
- chg |= !bsequal(b->out, u);
-
- memset(phi, 0, f->ntmp * sizeof phi[0]);
- memset(nlv, 0, sizeof nlv);
- bscopy(b->in, b->out);
- for (t=0; t<f->ntmp; t++)
- if (bshas(b->in, t)) {
- phifix(t, phi, f->tmp);
- nlv[KBASE(f->tmp[t].cls)]++;
- }
- if (rtype(b->jmp.arg) == RACall) {
- assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0);
- b->in->t[0] |= retregs(b->jmp.arg, nlv);
- } else
- bset(b->jmp.arg, b, nlv, phi, f->tmp);
- for (k=0; k<2; k++)
- b->nlive[k] = nlv[k];
- for (i=&b->ins[b->nins]; i!=b->ins;) {
- if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) {
- b->in->t[0] &= ~retregs(i->arg[1], m);
- for (k=0; k<2; k++)
- nlv[k] -= m[k];
- if (nlv[0] + NISave > b->nlive[0])
- b->nlive[0] = nlv[0] + NISave;
- if (nlv[1] + NFSave > b->nlive[1])
- b->nlive[1] = nlv[1] + NFSave;
- b->in->t[0] |= argregs(i->arg[1], m);
- for (k=0; k<2; k++)
- nlv[k] += m[k];
- }
- if (!req(i->to, R)) {
- assert(rtype(i->to) == RTmp);
- t = i->to.val;
- if (bshas(b->in, i->to.val))
- nlv[KBASE(f->tmp[t].cls)]--;
- bsset(b->gen, t);
- bsclr(b->in, t);
- phi[phitmp(t, f->tmp)] = 0;
- }
- for (k=0; k<2; k++)
- switch (rtype(i->arg[k])) {
- case RAMem:
- ma = &f->mem[i->arg[k].val & AMask];
- bset(ma->base, b, nlv, phi, f->tmp);
- bset(ma->index, b, nlv, phi, f->tmp);
- break;
- default:
- bset(i->arg[k], b, nlv, phi, f->tmp);
- break;
- }
- for (k=0; k<2; k++)
- if (nlv[k] > b->nlive[k])
- b->nlive[k] = nlv[k];
- }
- }
- if (chg) {
- chg = 0;
- goto Again;
- }
- free(phi);
-
- if (debug['L']) {
- fprintf(stderr, "\n> Liveness analysis:\n");
- for (b=f->start; b; b=b->link) {
- fprintf(stderr, "\t%-10sin: ", b->name);
- dumpts(b->in, f->tmp, stderr);
- fprintf(stderr, "\t out: ");
- dumpts(b->out, f->tmp, stderr);
- fprintf(stderr, "\t gen: ");
- dumpts(b->gen, f->tmp, stderr);
- fprintf(stderr, "\t live: ");
- fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]);
- }
- }
-}
diff --git a/lisc/main.c b/lisc/main.c
@@ -1,117 +0,0 @@
-#include "lisc.h"
-#include <ctype.h>
-#include <getopt.h>
-
-char debug['Z'+1] = {
- ['P'] = 0, /* parsing */
- ['A'] = 0, /* abi lowering */
- ['I'] = 0, /* instruction selection */
- ['L'] = 0, /* liveness */
- ['M'] = 0, /* memory optimization */
- ['N'] = 0, /* ssa construction */
- ['C'] = 0, /* copy elimination */
- ['S'] = 0, /* spilling */
- ['R'] = 0, /* reg. allocation */
-};
-
-static FILE *outf;
-static int dbg;
-
-static void
-data(Dat *d)
-{
- if (dbg)
- return;
- if (d->type == DEnd) {
- fputs("/* end data */\n\n", outf);
- freeall();
- }
- emitdat(d, outf);
-}
-
-static void
-func(Fn *fn)
-{
- int n;
-
- if (dbg)
- fprintf(stderr, "**** Function %s ****", fn->name);
- if (debug['P']) {
- fprintf(stderr, "\n> After parsing:\n");
- printfn(fn, stderr);
- }
- fillrpo(fn);
- fillpreds(fn);
- filluse(fn);
- memopt(fn);
- ssa(fn);
- filluse(fn);
- copy(fn);
- filluse(fn);
- isel(fn);
- filllive(fn);
- fillcost(fn);
- spill(fn);
- rega(fn);
- fillrpo(fn);
- assert(fn->rpo[0] == fn->start);
- for (n=0;; n++)
- if (n == fn->nblk-1) {
- fn->rpo[n]->link = 0;
- break;
- } else
- fn->rpo[n]->link = fn->rpo[n+1];
- if (!dbg) {
- emitfn(fn, outf);
- fprintf(outf, "/* end function %s */\n\n", fn->name);
- } else
- fprintf(stderr, "\n");
- freeall();
-}
-
-int
-main(int ac, char *av[])
-{
- FILE *inf;
- char *f;
- int c;
-
- outf = stdout;
- while ((c = getopt(ac, av, "d:o:")) != -1)
- switch (c) {
- case 'd':
- for (; *optarg; optarg++)
- if (isalpha(*optarg)) {
- debug[toupper(*optarg)] = 1;
- dbg = 1;
- }
- break;
- case 'o':
- if (strcmp(optarg, "-") != 0)
- outf = fopen(optarg, "w");
- break;
- default:
- fprintf(stderr, "usage: %s [-d <flags>] [-o out] {file.ssa, -}\n", av[0]);
- exit(1);
- }
-
- do {
- f = av[optind];
- if (!f || strcmp(f, "-") == 0) {
- inf = stdin;
- f = "-";
- } else {
- inf = fopen(f, "r");
- if (!inf) {
- fprintf(stderr, "cannot open '%s'\n", f);
- exit(1);
- }
- }
- parse(inf, f, data, func);
- } while (++optind < ac);
-
- if (!dbg)
- emitfin(outf);
-
- exit(0);
-}
diff --git a/lisc/mem.c b/lisc/mem.c
@@ -1,81 +0,0 @@
-#include "lisc.h"
-
-/* Memory optimization:
- *
- * - replace alloced slots used only in
- * load/store operations
- * Assumption: all the accesses have the
- * same size (this could be wrong...)
- */
-
-/* require use, maintains use counts */
-void
-memopt(Fn *fn)
-{
- Blk *b;
- Ins *i, *l;
- Tmp *t;
- Use *u, *ue;
- int a;
-
- b = fn->start;
- for (i=b->ins; i-b->ins < b->nins; i++) {
- if (OAlloc > i->op || i->op > OAlloc1)
- continue;
- assert(NAlign == 3);
- assert(rtype(i->to) == RTmp);
- t = &fn->tmp[i->to.val];
- for (u=t->use; u != &t->use[t->nuse]; u++) {
- if (u->type != UIns)
- goto NextIns;
- l = u->u.ins;
- if (!isload(l->op)
- && (!isstore(l->op) || req(i->to, l->arg[0])))
- goto NextIns;
- }
- /* get rid of the alloc and replace uses */
- *i = (Ins){.op = ONop};
- t->ndef--;
- ue = &t->use[t->nuse];
- for (u=t->use; u!=ue; u++) {
- l = u->u.ins;
- if (isstore(l->op)) {
- if (l->op == OStores)
- l->cls = Kd;
- else if (l->op == OStored)
- l->cls = Kd;
- else if (l->op == OStorel)
- l->cls = Kl;
- else
- l->cls = Kw;
- l->op = OCopy;
- l->to = l->arg[1];
- l->arg[1] = R;
- t->nuse--;
- t->ndef++;
- } else
- /* try to turn loads into copies so we
- * can eliminate them later */
- switch(l->op) {
- case OLoad:
- l->op = OCopy;
- break;
- case OLoadsw:
- case OLoaduw:
- l->cls = Kw;
- l->op = OCopy;
- break;
- default:
- /* keep l->cls */
- a = l->op - OLoadsw;
- l->op = OExtsw + a;
- break;
- }
- }
- NextIns:;
- }
- if (debug['M']) {
- fprintf(stderr, "\n> After memory optimization:\n");
- printfn(fn, stderr);
- }
-}
diff --git a/lisc/parse.c b/lisc/parse.c
@@ -1,1081 +0,0 @@
-#include "lisc.h"
-#include <ctype.h>
-#include <stdarg.h>
-
-enum {
- Kx = -1, /* Invalid operand */
- Km = Kl, /* Memory pointer (for x64) */
-};
-
-OpDesc opdesc[NOp] = {
-#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}
-
- /* NAME NM ARGCLS0 ARGCLS1 SF LF */
- [OAdd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
- [OSub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
- [ODiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
- [ORem] = { "rem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 },
- [OUDiv] = { "udiv", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
- [OURem] = { "urem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 },
- [OMul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
- [OAnd] = { "and", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
- [OOr] = { "or", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
- [OXor] = { "xor", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
- [OSar] = { "sar", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
- [OShr] = { "shr", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
- [OShl] = { "shl", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
- [OStored] = { "stored", 0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 },
- [OStores] = { "stores", 0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 },
- [OStorel] = { "storel", 0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 },
- [OStorew] = { "storew", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
- [OStoreh] = { "storeh", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
- [OStoreb] = { "storeb", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
- [OLoad] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 },
- [OLoadsw] = { "loadsw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
- [OLoaduw] = { "loaduw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
- [OLoadsh] = { "loadsh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
- [OLoaduh] = { "loaduh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
- [OLoadsb] = { "loadsb", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
- [OLoadub] = { "loadub", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
- [OExtsw] = { "extsw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
- [OExtuw] = { "extuw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
- [OExtsh] = { "extsh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
- [OExtuh] = { "extuh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
- [OExtsb] = { "extsb", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
- [OExtub] = { "extub", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
- [OExts] = { "exts", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
- [OTruncd] = { "truncd", 0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 },
- [OFtosi] = { "ftosi", 0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 },
- [OSitof] = { "sitof", 0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 },
- [OCast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 },
- [OCopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 },
- [ONop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 },
- [OSwap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
- [OSign] = { "sign", 0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 },
- [OSAlloc] = { "salloc", 0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 },
- [OXDiv] = { "xdiv", 1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 },
- [OXCmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
- [OXTest] = { "xtest", 1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 },
- [OAddr] = { "addr", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
- [OPar] = { "parn", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
- [OParc] = { "parc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
- [OArg] = { "arg", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
- [OArgc] = { "argc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
- [OCall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 },
- [OXSetnp] = { "xsetnp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
- [OXSetp] = { "xsetp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
- [OAlloc] = { "alloc4", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
- [OAlloc+1] = { "alloc8", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
- [OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
-#define X(c) \
- [OCmpw+IC##c] = { "c" #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \
- [OCmpl+IC##c] = { "c" #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \
- [OXSet+IC##c] = { "xset" #c, 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 },
- ICMPS(X)
-#undef X
-#define X(c) \
- [OCmps+FC##c] = { "c" #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \
- [OCmpd+FC##c] = { "c" #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 },
- FCMPS(X)
-#undef X
-
-};
-#undef A
-
-typedef enum {
- PXXX,
- PLbl,
- PPhi,
- PIns,
- PEnd,
-} PState;
-
-enum {
- TXXX = NPubOp,
- TCall,
- TPhi,
- TJmp,
- TJnz,
- TRet,
- TFunc,
- TType,
- TData,
- TAlign,
- TL,
- TW,
- TH,
- TB,
- TD,
- TS,
- TZ,
-
- TInt,
- TFlts,
- TFltd,
- TTmp,
- TLbl,
- TGlo,
- TTyp,
- TStr,
-
- TPlus,
- TEq,
- TComma,
- TLParen,
- TRParen,
- TLBrace,
- TRBrace,
- TNL,
- TEOF,
-};
-
-
-static FILE *inf;
-static char *inpath;
-static int thead;
-static struct {
- char chr;
- double fltd;
- float flts;
- int64_t num;
- char *str;
-} tokval;
-static int lnum;
-
-static Tmp *tmp;
-static Con *con;
-static int ntmp;
-static int ncon;
-static Phi **plink;
-static Blk **bmap;
-static Blk *curb;
-static Blk **blink;
-static int nblk;
-static int rcls;
-static int ntyp;
-
-
-void
-err(char *s, ...)
-{
- char buf[100], *p, *end;
- va_list ap;
-
- p = buf;
- end = buf + sizeof(buf);
-
- va_start(ap, s);
- p += snprintf(p, end - p, "%s:%d: ", inpath, lnum);
- p += vsnprintf(p, end - p, s, ap);
- va_end(ap);
-
- diag(buf);
-}
-
-static int
-lex()
-{
- static struct {
- char *str;
- int tok;
- } tmap[] = {
- { "call", TCall },
- { "phi", TPhi },
- { "jmp", TJmp },
- { "jnz", TJnz },
- { "ret", TRet },
- { "function", TFunc },
- { "type", TType },
- { "data", TData },
- { "align", TAlign },
- { "l", TL },
- { "w", TW },
- { "h", TH },
- { "b", TB },
- { "d", TD },
- { "s", TS },
- { "z", TZ },
- { "loadw", OLoad }, /* for convenience */
- { "loadl", OLoad },
- { "loads", OLoad },
- { "loadd", OLoad },
- { "alloc1", OAlloc },
- { "alloc2", OAlloc },
- { 0, TXXX }
- };
- static char tok[NString];
- int c, i;
- int t;
-
- do
- c = fgetc(inf);
- while (isblank(c));
- t = TXXX;
- tokval.chr = c;
- switch (c) {
- case EOF:
- return TEOF;
- case ',':
- return TComma;
- case '(':
- return TLParen;
- case ')':
- return TRParen;
- case '{':
- return TLBrace;
- case '}':
- return TRBrace;
- case '=':
- return TEq;
- case '+':
- return TPlus;
- case 's':
- if (fscanf(inf, "_%f", &tokval.flts) != 1)
- break;
- return TFlts;
- case 'd':
- if (fscanf(inf, "_%lf", &tokval.fltd) != 1)
- break;
- return TFltd;
- case '%':
- t = TTmp;
- goto Alpha;
- case '@':
- t = TLbl;
- goto Alpha;
- case '$':
- t = TGlo;
- goto Alpha;
- case ':':
- t = TTyp;
- goto Alpha;
- case '#':
- while (fgetc(inf) != '\n')
- ;
- case '\n':
- lnum++;
- return TNL;
- }
- if (isdigit(c) || c == '-' || c == '+') {
- ungetc(c, inf);
- if (fscanf(inf, "%"SCNd64, &tokval.num) != 1)
- err("invalid integer literal");
- return TInt;
- }
- if (c == '"') {
- tokval.str = vnew(0, 1);
- for (i=0;; i++) {
- c = fgetc(inf);
- vgrow(&tokval.str, i+1);
- if (c == '"')
- if (!i || tokval.str[i-1] != '\\') {
- tokval.str[i] = 0;
- return TStr;
- }
- tokval.str[i] = c;
- }
- }
- if (0)
-Alpha: c = fgetc(inf);
- if (!isalpha(c) && c != '.' && c != '_')
- err("lexing failure: invalid character %c (%d)", c, c);
- i = 0;
- do {
- if (i >= NString-1)
- err("identifier too long");
- tok[i++] = c;
- c = fgetc(inf);
- } while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c));
- tok[i] = 0;
- ungetc(c, inf);
- tokval.str = tok;
- if (t != TXXX) {
- return t;
- }
- for (i=0; i<NPubOp; i++)
- if (opdesc[i].name)
- if (strcmp(tok, opdesc[i].name) == 0)
- return i;
- for (i=0; tmap[i].str; i++)
- if (strcmp(tok, tmap[i].str) == 0)
- return tmap[i].tok;
- err("unknown keyword %s", tokval.str);
- return TXXX;
-}
-
-static int
-peek()
-{
- if (thead == TXXX)
- thead = lex();
- return thead;
-}
-
-static int
-next()
-{
- int t;
-
- t = peek();
- thead = TXXX;
- return t;
-}
-
-static int
-nextnl()
-{
- int t;
-
- while ((t = next()) == TNL)
- ;
- return t;
-}
-
-static void
-expect(int t)
-{
- static char *ttoa[] = {
- [TLbl] = "label",
- [TComma] = ",",
- [TEq] = "=",
- [TNL] = "newline",
- [TLParen] = "(",
- [TRParen] = ")",
- [TLBrace] = "{",
- [TRBrace] = "}",
- [TEOF] = 0,
- };
- char buf[128], *s1, *s2;
- int t1;
-
- t1 = next();
- if (t == t1)
- return;
- s1 = ttoa[t] ? ttoa[t] : "??";
- s2 = ttoa[t1] ? ttoa[t1] : "??";
- sprintf(buf, "%s expected, got %s instead", s1, s2);
- err(buf);
-}
-
-static Ref
-tmpref(char *v)
-{
- int t;
-
- for (t=Tmp0; t<ntmp; t++)
- if (strcmp(v, tmp[t].name) == 0)
- return TMP(t);
- vgrow(&tmp, ++ntmp);
- strcpy(tmp[t].name, v);
- return TMP(t);
-}
-
-static Ref
-parseref()
-{
- Con c;
- int i;
-
- memset(&c, 0, sizeof c);
- switch (next()) {
- case TTmp:
- return tmpref(tokval.str);
- case TInt:
- c.type = CBits;
- c.bits.i = tokval.num;
- goto Look;
- case TFlts:
- c.type = CBits;
- c.bits.s = tokval.flts;
- c.flt = 1;
- goto Look;
- case TFltd:
- c.type = CBits;
- c.bits.d = tokval.fltd;
- c.flt = 2;
- goto Look;
- case TGlo:
- c.type = CAddr;
- strcpy(c.label, tokval.str);
- Look:
- for (i=0; i<ncon; i++)
- if (con[i].type == c.type
- && con[i].bits.i == c.bits.i
- && strcmp(con[i].label, c.label) == 0)
- return CON(i);
- vgrow(&con, ++ncon);
- con[i] = c;
- return CON(i);
- default:
- return R;
- }
-}
-
-static int
-parsecls(int *tyn)
-{
- int i;
-
- switch (next()) {
- default:
- err("invalid class specifier");
- case TTyp:
- for (i=0; i<ntyp; i++)
- if (strcmp(tokval.str, typ[i].name) == 0) {
- *tyn = i;
- return 4;
- }
- err("undefined type");
- case TW:
- return Kw;
- case TL:
- return Kl;
- case TS:
- return Ks;
- case TD:
- return Kd;
- }
-}
-
-static void
-parserefl(int arg)
-{
- int k, t, ty;
- Ref r;
-
- expect(TLParen);
- if (peek() == TRParen) {
- next();
- return;
- }
- for (;;) {
- if (curi - insb >= NIns)
- err("too many instructions (1)");
- k = parsecls(&ty);
- r = parseref();
- if (req(r, R))
- err("invalid reference argument");
- if (!arg && rtype(r) != RTmp)
- err("invalid function parameter");
- if (k == 4)
- if (arg)
- *curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl};
- else
- *curi = (Ins){OParc, r, {TYPE(ty)}, Kl};
- else
- if (arg)
- *curi = (Ins){OArg, R, {r}, k};
- else
- *curi = (Ins){OPar, r, {R}, k};
- curi++;
- t = next();
- if (t == TRParen)
- break;
- if (t != TComma)
- err(", or ) expected");
- }
-}
-
-static Blk *
-findblk(char *name)
-{
- int i;
-
- for (i=0; i<nblk; i++)
- if (strcmp(bmap[i]->name, name) == 0)
- return bmap[i];
- vgrow(&bmap, ++nblk);
- bmap[i] = blknew();
- strcpy(bmap[i]->name, name);
- return bmap[i];
-}
-
-static void
-closeblk()
-{
- curb->nins = curi - insb;
- idup(&curb->ins, insb, curb->nins);
- blink = &curb->link;
- curi = insb;
-}
-
-static PState
-parseline(PState ps)
-{
- Ref arg[NPred] = {R};
- Blk *blk[NPred];
- Phi *phi;
- Ref r;
- Blk *b;
- int t, op, i, k, ty;
-
- t = nextnl();
- if (ps == PLbl && t != TLbl && t != TRBrace)
- err("label or } expected");
- switch (t) {
- default:
- if (isstore(t)) {
- /* operations without result */
- r = R;
- k = 0;
- op = t;
- goto DoOp;
- }
- err("label, instruction or jump expected");
- case TRBrace:
- return PEnd;
- case TTmp:
- break;
- case TLbl:
- b = findblk(tokval.str);
- if (b->jmp.type != JXXX)
- err("multiple definitions of block");
- if (curb && curb->jmp.type == JXXX) {
- closeblk();
- curb->jmp.type = JJmp;
- curb->s1 = b;
- }
- *blink = b;
- curb = b;
- plink = &curb->phi;
- expect(TNL);
- return PPhi;
- case TRet:
- curb->jmp.type = (int[]){
- JRetw, JRetl,
- JRets, JRetd,
- JRetc, JRet0
- }[rcls];
- if (rcls < 5) {
- r = parseref();
- if (req(r, R))
- err("return value expected");
- curb->jmp.arg = r;
- }
- goto Close;
- case TJmp:
- curb->jmp.type = JJmp;
- goto Jump;
- case TJnz:
- curb->jmp.type = JJnz;
- r = parseref();
- if (req(r, R))
- err("invalid argument for jnz jump");
- curb->jmp.arg = r;
- expect(TComma);
- Jump:
- expect(TLbl);
- curb->s1 = findblk(tokval.str);
- if (curb->jmp.type != JJmp) {
- expect(TComma);
- expect(TLbl);
- curb->s2 = findblk(tokval.str);
- }
- Close:
- expect(TNL);
- closeblk();
- return PLbl;
- }
- r = tmpref(tokval.str);
- expect(TEq);
- k = parsecls(&ty);
- op = next();
-DoOp:
- if (op == TPhi) {
- if (ps != PPhi)
- err("unexpected phi instruction");
- op = -1;
- }
- if (op == TCall) {
- arg[0] = parseref();
- parserefl(1);
- expect(TNL);
- op = OCall;
- if (k == 4) {
- k = Kl;
- arg[1] = TYPE(ty);
- } else
- arg[1] = R;
- goto Ins;
- }
- if (k == 4)
- err("size class must be w, l, s, or d");
- if (op >= NPubOp)
- err("invalid instruction");
- i = 0;
- if (peek() != TNL)
- for (;;) {
- if (i == NPred)
- err("too many arguments");
- if (op == -1) {
- expect(TLbl);
- blk[i] = findblk(tokval.str);
- }
- arg[i] = parseref();
- if (req(arg[i], R))
- err("invalid instruction argument");
- i++;
- t = peek();
- if (t == TNL)
- break;
- if (t != TComma)
- err(", or end of line expected");
- next();
- }
- next();
- if (op != -1) {
- Ins:
- if (curi - insb >= NIns)
- err("too many instructions (2)");
- curi->op = op;
- curi->cls = k;
- curi->to = r;
- curi->arg[0] = arg[0];
- curi->arg[1] = arg[1];
- curi++;
- return PIns;
- } else {
- phi = alloc(sizeof *phi);
- phi->to = r;
- phi->cls = k;
- memcpy(phi->arg, arg, i * sizeof arg[0]);
- memcpy(phi->blk, blk, i * sizeof blk[0]);
- phi->narg = i;
- *plink = phi;
- plink = &phi->link;
- return PPhi;
- }
-}
-
-static Fn *
-parsefn()
-{
- PState ps;
- Fn *fn;
-
- ntmp = Tmp0;
- ncon = 1; /* first constant must be 0 */
- curb = 0;
- nblk = 0;
- curi = insb;
- tmp = vnew(ntmp, sizeof tmp[0]);
- con = vnew(ncon, sizeof con[0]);
- bmap = vnew(nblk, sizeof bmap[0]);
- con[0].type = CBits;
- fn = alloc(sizeof *fn);
- blink = &fn->start;
- fn->retty = -1;
- if (peek() != TGlo)
- rcls = parsecls(&fn->retty);
- else
- rcls = 5;
- if (next() != TGlo)
- err("function name expected");
- strcpy(fn->name, tokval.str);
- parserefl(0);
- if (nextnl() != TLBrace)
- err("function body must start with {");
- ps = PLbl;
- do
- ps = parseline(ps);
- while (ps != PEnd);
- if (!curb)
- err("empty file");
- if (curb->jmp.type == JXXX)
- err("last block misses jump");
- fn->tmp = tmp;
- fn->con = con;
- fn->mem = vnew(0, sizeof fn->mem[0]);
- fn->ntmp = ntmp;
- fn->ncon = ncon;
- fn->nmem = 0;
- fn->nblk = nblk;
- fn->rpo = 0;
- return fn;
-}
-
-static void
-parsetyp()
-{
- Typ *ty;
- int t, n, sz, al, s, a, c, flt;
-
- if (ntyp >= NTyp)
- err("too many type definitions");
- ty = &typ[ntyp++];
- ty->align = -1;
- if (nextnl() != TTyp || nextnl() != TEq)
- err("type name, then = expected");
- strcpy(ty->name, tokval.str);
- t = nextnl();
- if (t == TAlign) {
- if (nextnl() != TInt)
- err("alignment expected");
- for (al=0; tokval.num /= 2; al++)
- ;
- ty->align = al;
- t = nextnl();
- }
- if (t != TLBrace)
- err("type body must start with {");
- t = nextnl();
- if (t == TInt) {
- ty->dark = 1;
- ty->size = tokval.num;
- if (ty->align == -1)
- err("dark types need alignment");
- t = nextnl();
- } else {
- ty->dark = 0;
- n = -1;
- sz = 0;
- al = 0;
- for (;;) {
- flt = 0;
- switch (t) {
- default: err("invalid size specifier %c", tokval.chr);
- case TD: flt = 1;
- case TL: s = 8; a = 3; break;
- case TS: flt = 1;
- case TW: s = 4; a = 2; break;
- case TH: s = 2; a = 1; break;
- case TB: s = 1; a = 0; break;
- }
- if (a > al)
- al = a;
- if ((a = sz & (s-1))) {
- a = s - a;
- if (++n < NSeg) {
- /* padding segment */
- ty->seg[n].ispad = 1;
- ty->seg[n].len = a;
- }
- }
- t = nextnl();
- if (t == TInt) {
- c = tokval.num;
- t = nextnl();
- } else
- c = 1;
- while (c-- > 0) {
- if (++n < NSeg) {
- ty->seg[n].isflt = flt;
- ty->seg[n].ispad = 0;
- ty->seg[n].len = s;
- }
- sz += a + s;
- }
- if (t != TComma)
- break;
- t = nextnl();
- }
- if (++n >= NSeg)
- ty->dark = 1;
- else
- ty->seg[n].len = 0;
- if (ty->align == -1)
- ty->align = al;
- else
- al = ty->align;
- a = (1 << al) - 1;
- ty->size = (sz + a) & ~a;
- }
- if (t != TRBrace)
- err("expected closing }");
-}
-
-static void
-parsedatref(Dat *d)
-{
- int t;
-
- d->isref = 1;
- d->u.ref.nam = tokval.str;
- d->u.ref.off = 0;
- t = peek();
- if (t == TPlus) {
- next();
- if (next() != TInt)
- err("invalid token after offset in ref");
- d->u.ref.off = tokval.num;
- }
-}
-
-static void
-parsedatstr(Dat *d)
-{
- d->isstr = 1;
- d->u.str = tokval.str;
-}
-
-static void
-parsedat(void cb(Dat *))
-{
- char s[NString];
- int t;
- Dat d;
-
- d.type = DStart;
- d.isstr = 0;
- d.isref = 0;
- cb(&d);
- if (nextnl() != TGlo || nextnl() != TEq)
- err("data name, then = expected");
- strcpy(s, tokval.str);
- t = nextnl();
- if (t == TAlign) {
- if (nextnl() != TInt)
- err("alignment expected");
- d.type = DAlign;
- d.u.num = tokval.num;
- cb(&d);
- t = nextnl();
- }
- d.type = DName;
- d.u.str = s;
- cb(&d);
-
- if (t != TLBrace)
- err("expected data contents in { .. }");
- for (;;) {
- switch (nextnl()) {
- default: err("invalid size specifier %c in data", tokval.chr);
- case TRBrace: goto Done;
- case TL: d.type = DL; break;
- case TW: d.type = DW; break;
- case TH: d.type = DH; break;
- case TB: d.type = DB; break;
- case TS: d.type = DW; break;
- case TD: d.type = DL; break;
- case TZ: d.type = DZ; break;
- }
- t = nextnl();
- do {
- d.isref = 0;
- d.isstr = 0;
- memset(&d.u, 0, sizeof d.u);
- if (t == TFlts)
- d.u.flts = tokval.flts;
- else if (t == TFltd)
- d.u.fltd = tokval.fltd;
- else if (t == TInt)
- d.u.num = tokval.num;
- else if (t == TGlo)
- parsedatref(&d);
- else if (t == TStr)
- parsedatstr(&d);
- else
- err("constant literal expected");
- cb(&d);
- t = nextnl();
- } while (t == TInt || t == TFlts || t == TFltd);
- if (t == TRBrace)
- break;
- if (t != TComma)
- err(", or } expected");
- }
-Done:
- d.type = DEnd;
- cb(&d);
-}
-
-void
-parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
-{
- inf = f;
- inpath = path;
- lnum = 1;
- thead = TXXX;
- ntyp = 0;
- for (;;)
- switch (nextnl()) {
- case TFunc:
- func(parsefn());
- break;
- case TType:
- parsetyp();
- break;
- case TData:
- parsedat(data);
- break;
- case TEOF:
- return;
- default:
- err("top-level definition expected");
- break;
- }
-}
-
-static void
-printcon(Con *c, FILE *f)
-{
- switch (c->type) {
- case CUndef:
- break;
- case CAddr:
- fprintf(f, "$%s", c->label);
- if (c->bits.i)
- fprintf(f, "%+"PRIi64, c->bits.i);
- break;
- case CBits:
- if (c->flt == 1)
- fprintf(f, "s_%f", c->bits.s);
- else if (c->flt == 2)
- fprintf(f, "d_%lf", c->bits.d);
- else
- fprintf(f, "%"PRIi64, c->bits.i);
- break;
- }
-}
-
-void
-printref(Ref r, Fn *fn, FILE *f)
-{
- int i;
- Mem *m;
-
- switch (rtype(r)) {
- case RTmp:
- if (r.val < Tmp0)
- fprintf(f, "R%d", r.val);
- else
- fprintf(f, "%%%s", fn->tmp[r.val].name);
- break;
- case RCon:
- printcon(&fn->con[r.val], f);
- break;
- case RSlot:
- fprintf(f, "S%d", r.val);
- break;
- case RACall:
- fprintf(f, "%03x", r.val & AMask);
- break;
- case RAType:
- fprintf(f, ":%s", typ[r.val & AMask].name);
- break;
- case RAMem:
- i = 0;
- m = &fn->mem[r.val & AMask];
- fputc('[', f);
- if (m->offset.type != CUndef) {
- printcon(&m->offset, f);
- i = 1;
- }
- if (!req(m->base, R)) {
- if (i)
- fprintf(f, " + ");
- printref(m->base, fn, f);
- i = 1;
- }
- if (!req(m->index, R)) {
- if (i)
- fprintf(f, " + ");
- fprintf(f, "%d * ", m->scale);
- printref(m->index, fn, f);
- }
- fputc(']', f);
- break;
- }
-}
-
-void
-printfn(Fn *fn, FILE *f)
-{
- static char *jtoa[NJmp] = {
- [JRet0] = "ret",
- [JRetw] = "retw",
- [JRetl] = "retl",
- [JRetc] = "retc",
- [JRets] = "rets",
- [JRetd] = "retd",
- [JJnz] = "jnz",
- [JXJnp] = "xjnp",
- [JXJp] = "xjp",
- #define X(c) [JXJc+IC##c] = "xj" #c,
- ICMPS(X)
- #undef X
- };
- static char prcls[NOp] = {
- [OArg] = 1,
- [OSwap] = 1,
- [OXCmp] = 1,
- [OXTest] = 1,
- [OXDiv] = 1,
- [OXIDiv] = 1,
- };
- static char ktoc[] = "wlsd";
- Blk *b;
- Phi *p;
- Ins *i;
- uint n;
-
- fprintf(f, "function $%s() {\n", fn->name);
- for (b=fn->start; b; b=b->link) {
- fprintf(f, "@%s\n", b->name);
- for (p=b->phi; p; p=p->link) {
- fprintf(f, "\t");
- printref(p->to, fn, f);
- fprintf(f, " =%c phi ", ktoc[p->cls]);
- assert(p->narg);
- for (n=0;; n++) {
- fprintf(f, "@%s ", p->blk[n]->name);
- printref(p->arg[n], fn, f);
- if (n == p->narg-1) {
- fprintf(f, "\n");
- break;
- } else
- fprintf(f, ", ");
- }
- }
- for (i=b->ins; i-b->ins < b->nins; i++) {
- fprintf(f, "\t");
- if (!req(i->to, R)) {
- printref(i->to, fn, f);
- fprintf(f, " =%c ", ktoc[i->cls]);
- }
- assert(opdesc[i->op].name);
- fprintf(f, "%s", opdesc[i->op].name);
- if (req(i->to, R) && prcls[i->op])
- fputc(ktoc[i->cls], f);
- if (!req(i->arg[0], R)) {
- fprintf(f, " ");
- printref(i->arg[0], fn, f);
- }
- if (!req(i->arg[1], R)) {
- fprintf(f, ", ");
- printref(i->arg[1], fn, f);
- }
- fprintf(f, "\n");
- }
- switch (b->jmp.type) {
- case JRet0:
- case JRetw:
- case JRetl:
- case JRets:
- case JRetd:
- case JRetc:
- fprintf(f, "\t%s", jtoa[b->jmp.type]);
- if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) {
- fprintf(f, " ");
- printref(b->jmp.arg, fn, f);
- }
- if (b->jmp.type == JRetc)
- fprintf(f, ", :%s", typ[fn->retty].name);
- fprintf(f, "\n");
- break;
- case JJmp:
- if (b->s1 != b->link)
- fprintf(f, "\tjmp @%s\n", b->s1->name);
- break;
- default:
- fprintf(f, "\t%s ", jtoa[b->jmp.type]);
- if (b->jmp.type == JJnz) {
- printref(b->jmp.arg, fn, f);
- fprintf(f, ", ");
- }
- fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name);
- break;
- }
- }
- fprintf(f, "}\n");
-}
diff --git a/lisc/rega.c b/lisc/rega.c
@@ -1,597 +0,0 @@
-#include "lisc.h"
-#ifdef TEST_PMOV
- #undef assert
- #define assert(x) assert_test(#x, x)
-#endif
-
-typedef struct RMap RMap;
-
-struct RMap {
- int t[NIReg+NFReg];
- int r[NIReg+NFReg];
- BSet b[1];
- int n;
-};
-
-static bits regu; /* registers used */
-static Tmp *tmp; /* function temporaries */
-static Mem *mem; /* function mem references */
-static struct {
- Ref src, dst;
- int cls;
-} *pm; /* parallel move constructed */
-static int cpm, npm; /* capacity and size of pm */
-
-static int *
-hint(int t)
-{
- return &tmp[phicls(t, tmp)].hint.r;
-}
-
-static void
-sethint(int t, int r)
-{
- bits m;
-
- m = tmp[phicls(t, tmp)].hint.m;
- if (*hint(t) == -1)
- if (!(BIT(r) & m))
- *hint(t) = r;
-}
-
-static void
-rcopy(RMap *ma, RMap *mb)
-{
- memcpy(ma->t, mb->t, sizeof ma->t);
- memcpy(ma->r, mb->r, sizeof ma->r);
- bscopy(ma->b, mb->b);
- ma->n = mb->n;
-}
-
-static int
-rfind(RMap *m, int t)
-{
- int i;
-
- for (i=0; i<m->n; i++)
- if (m->t[i] == t)
- return m->r[i];
- return -1;
-}
-
-static Ref
-rref(RMap *m, int t)
-{
- int r, s;
-
- r = rfind(m, t);
- if (r == -1) {
- s = tmp[t].slot;
- assert(s != -1 && "should have spilled");
- return SLOT(s);
- } else
- return TMP(r);
-}
-
-static void
-radd(RMap *m, int t, int r)
-{
- assert((t >= Tmp0 || t == r) && "invalid temporary");
- assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
- assert(!bshas(m->b, t) && "temporary has mapping");
- assert(!bshas(m->b, r) && "register already allocated");
- assert(m->n <= NIReg+NFReg && "too many mappings");
- bsset(m->b, t);
- bsset(m->b, r);
- m->t[m->n] = t;
- m->r[m->n] = r;
- m->n++;
- regu |= BIT(r);
-}
-
-static Ref
-ralloc(RMap *m, int t)
-{
- bits regs;
- int r, r0, r1;
-
- if (t < Tmp0) {
- assert(bshas(m->b, t));
- return TMP(t);
- }
- if (bshas(m->b, t)) {
- r = rfind(m, t);
- assert(r != -1);
- return TMP(r);
- }
- r = *hint(t);
- if (r == -1 || bshas(m->b, r)) {
- regs = tmp[phicls(t, tmp)].hint.m;
- regs |= m->b->t[0];
- switch (KBASE(tmp[t].cls)) {
- case 0:
- r0 = RAX;
- r1 = RAX + NIReg;
- break;
- case 1:
- r0 = XMM0;
- r1 = XMM0 + NFReg;
- break;
- }
- for (r=r0; r<r1; r++)
- if (!(regs & BIT(r)))
- goto Found;
- for (r=r0; r<r1; r++)
- if (!bshas(m->b, r))
- goto Found;
- diag("rega: no more regs");
- }
-Found:
- radd(m, t, r);
- sethint(t, r);
- return TMP(r);
-}
-
-static int
-rfree(RMap *m, int t)
-{
- int i, r;
-
- if (!bshas(m->b, t))
- return -1;
- for (i=0; m->t[i] != t; i++)
- assert(i+1 < m->n);
- r = m->r[i];
- bsclr(m->b, t);
- bsclr(m->b, r);
- m->n--;
- memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
- memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
- return r;
-}
-
-static void
-mdump(RMap *m)
-{
- int i;
-
- for (i=0; i<m->n; i++)
- fprintf(stderr, " (%s, R%d)",
- tmp[m->t[i]].name,
- m->r[i]);
- fprintf(stderr, "\n");
-}
-
-static void
-pmadd(Ref src, Ref dst, int k)
-{
- if (npm == cpm) {
- cpm = cpm * 2 + 16;
- pm = realloc(pm, cpm * sizeof pm[0]);
- if (!pm)
- diag("pmadd: out of memory");
- }
- pm[npm].src = src;
- pm[npm].dst = dst;
- pm[npm].cls = k;
- npm++;
-}
-
-enum PMStat { ToMove, Moving, Moved };
-
-static Ref
-pmrec(enum PMStat *status, int i, int *k)
-{
- Ref swp, swp1;
- int j, k1;
-
- /* note, this routine might emit
- * too many large instructions:
- *
- * , x -- x
- * x -- x -- x |
- * ` x -- x
- *
- * if only the first move is wide
- * the whole cycle will be wide,
- * this is safe but not necessary
- */
-
- if (req(pm[i].src, pm[i].dst))
- return R;
- status[i] = Moving;
- assert(KBASE(*k) == KBASE(pm[i].cls));
- assert((Kw|1) == Kl && (Ks|1) == Kd);
- *k |= KWIDE(pm[i].cls); /* see above */
- swp = R;
- for (j=0; j<npm; j++) {
- if (req(pm[j].src, pm[i].dst))
- switch (status[j]) {
- case ToMove:
- k1 = *k;
- swp1 = pmrec(status, j, &k1);
- if (!req(swp1, R)) {
- assert(req(swp, R));
- swp = swp1;
- *k = k1;
- }
- break;
- case Moving:
- assert(req(swp, R));
- swp = pm[i].dst;
- break;
- case Moved:
- break;
- }
- }
- status[i] = Moved;
- if (req(swp, R)) {
- *curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls};
- return R;
- } else if (!req(swp, pm[i].src)) {
- *curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k};
- return swp;
- } else
- return R;
-
-}
-
-static void
-pmgen()
-{
- int i, k;
- enum PMStat *status;
-
- status = alloc(npm * sizeof status[0]);
- assert(!npm || status[npm-1] == ToMove);
- curi = insb;
- for (i=0; i<npm; i++)
- if (status[i] == ToMove) {
- k = pm[i].cls;
- pmrec(status, i, &k);
- }
-}
-
-static void
-move(int r, Ref to, RMap *m)
-{
- int n, t, r1;
-
- r1 = req(to, R) ? -1 : rfree(m, to.val);
- if (bshas(m->b, r) && r1 != r) {
- /* r is used and not by to */
- for (n=0; m->r[n] != r; n++)
- assert(n+1 < m->n);
- t = m->t[n];
- rfree(m, t);
- bsset(m->b, r);
- ralloc(m, t);
- bsclr(m->b, r);
- }
- t = req(to, R) ? r : to.val;
- radd(m, t, r);
-}
-
-static int
-regcpy(Ins *i)
-{
- return i->op == OCopy && isreg(i->arg[0]);
-}
-
-static Ins *
-dopm(Blk *b, Ins *i, RMap *m)
-{
- RMap m0;
- int n, r, r1, t, s;
- Ins *i0, *i1, *ip, *ir;
- bits def;
-
- m0 = *m;
- i1 = ++i;
- do {
- i--;
- move(i->arg[0].val, i->to, m);
- } while (i != b->ins && regcpy(i-1));
- assert(m0.n <= m->n);
- if (i != b->ins && (i-1)->op == OCall) {
- def = retregs((i-1)->arg[1], 0);
- for (r=0; r<NRSave; r++)
- if (!(BIT(rsave[r]) & def))
- move(rsave[r], R, m);
- }
- for (npm=0, n=0; n<m->n; n++) {
- t = m->t[n];
- s = tmp[t].slot;
- r1 = m->r[n];
- r = rfind(&m0, t);
- if (r != -1)
- pmadd(TMP(r1), TMP(r), tmp[t].cls);
- else if (s != -1)
- pmadd(TMP(r1), SLOT(s), tmp[t].cls);
- }
- for (ip=i; ip<i1; ip++) {
- if (!req(ip->to, R))
- rfree(m, ip->to.val);
- r = ip->arg[0].val;
- if (rfind(m, r) == -1)
- radd(m, r, r);
- }
- pmgen();
-#ifdef TEST_PMOV
- return 0;
-#endif
- n = b->nins - (i1 - i) + (curi - insb);
- i0 = alloc(n * sizeof(Ins));
- ip = icpy(ip = i0, b->ins, i - b->ins);
- ip = icpy(ir = ip, insb, curi - insb);
- ip = icpy(ip, i1, &b->ins[b->nins] - i1);
- b->nins = n;
- b->ins = i0;
- return ir;
-}
-
-static int
-prio(Ref r1, Ref r2)
-{
- /* trivial heuristic to begin with,
- * later we can use the distance to
- * the definition instruction
- */
- (void) r2;
- return *hint(r1.val) != -1;
-}
-
-static void
-insert(Ref *r, Ref **rs, int p)
-{
- int i;
-
- rs[i = p] = r;
- while (i-- > 0 && prio(*r, *rs[i])) {
- rs[i+1] = rs[i];
- rs[i] = r;
- }
-}
-
-static void
-doblk(Blk *b, RMap *cur)
-{
- int x, r, nr;
- bits rs;
- Ins *i;
- Mem *m;
- Ref *ra[4];
-
- if (rtype(b->jmp.arg) == RTmp)
- b->jmp.arg = ralloc(cur, b->jmp.arg.val);
- else if (rtype(b->jmp.arg) == RACall) {
- /* add return registers */
- rs = retregs(b->jmp.arg, 0);
- for (r=0; rs; rs/=2, r++)
- if (rs & 1)
- radd(cur, r, r);
- }
- for (i=&b->ins[b->nins]; i!=b->ins;) {
- switch ((--i)->op) {
- case OCall:
- rs = argregs(i->arg[1], 0);
- for (r=0; r<NRSave; r++)
- if (!(BIT(rsave[r]) & rs))
- rfree(cur, rsave[r]);
- break;
- case OCopy:
- if (isreg(i->arg[0])) {
- i = dopm(b, i, cur);
- continue;
- }
- if (isreg(i->to))
- if (rtype(i->arg[0]) == RTmp)
- sethint(i->arg[0].val, i->to.val);
- /* fall through */
- default:
- if (!req(i->to, R)) {
- assert(rtype(i->to) == RTmp);
- r = rfree(cur, i->to.val);
- if (r == -1 && !isreg(i->to)) {
- *i = (Ins){.op = ONop};
- continue;
- }
- if (i->to.val >= Tmp0)
- i->to = TMP(r);
- }
- break;
- }
- for (x=0, nr=0; x<2; x++)
- switch (rtype(i->arg[x])) {
- case RAMem:
- m = &mem[i->arg[x].val & AMask];
- if (rtype(m->base) == RTmp)
- insert(&m->base, ra, nr++);
- if (rtype(m->index) == RTmp)
- insert(&m->index, ra, nr++);
- break;
- case RTmp:
- insert(&i->arg[x], ra, nr++);
- break;
- }
- for (r=0; r<nr; r++)
- *ra[r] = ralloc(cur, ra[r]->val);
- }
-}
-
-/* register allocation
- * depends on rpo, phi, cost, (and obviously spill)
- */
-void
-rega(Fn *fn)
-{
- int j, n, t, r, r1, x, rl[Tmp0];
- Blk *b, *b1, *s, ***ps, *blist;
- RMap *end, *beg, cur, old;
- Ins *i;
- Phi *p;
- uint u;
- Ref src, dst;
-
- /* 1. setup */
- regu = 0;
- tmp = fn->tmp;
- mem = fn->mem;
- end = alloc(fn->nblk * sizeof end[0]);
- beg = alloc(fn->nblk * sizeof beg[0]);
- for (n=0; n<fn->nblk; n++) {
- bsinit(end[n].b, fn->ntmp);
- bsinit(beg[n].b, fn->ntmp);
- }
- bsinit(cur.b, fn->ntmp);
- bsinit(old.b, fn->ntmp);
-
- for (t=Tmp0; t<fn->ntmp; t++)
- *hint(t) = -1;
- for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
- if (i->op != OCopy || !isreg(i->arg[0]))
- break;
- else {
- assert(rtype(i->to) == RTmp);
- sethint(i->to.val, i->arg[0].val);
- }
-
- /* 2. assign registers following post-order */
- for (n=fn->nblk-1; n>=0; n--) {
- b = fn->rpo[n];
- cur.n = 0;
- bszero(cur.b);
- for (x=0; x<2; x++)
- for (t=Tmp0; t<fn->ntmp; t++) {
- assert(bshas(b->out, t) ||
- !bshas(cur.b, t));
- if (bshas(b->out, t))
- if (!bshas(cur.b, t))
- if (x || (r=*hint(t)) != -1)
- if (x || !bshas(cur.b, r))
- ralloc(&cur, t);
- }
- rcopy(&end[n], &cur);
- doblk(b, &cur);
- bscopy(b->in, cur.b);
- for (p=b->phi; p; p=p->link)
- if (rtype(p->to) == RTmp) {
- bsclr(b->in, p->to.val);
- /* heuristic 0:
- * if the phi destination has an
- * argument from a frequent block
- * that was already allocated to
- * 'r', use 'r' as the new hint
- */
- memset(rl, 0, sizeof rl);
- for (u=0; u<p->narg; u++) {
- t = p->arg[u].val;
- b1 = p->blk[u];
- if (rtype(p->arg[u]) == RTmp)
- if ((r=rfind(&end[b1->id], t)) != -1)
- rl[r] += b1->loop;
- }
- for (x=0, j=0; j<Tmp0; j++)
- if (rl[j] > rl[x])
- x = j;
- if (rl[x] >= b->loop)
- *hint(p->to.val) = x;
- }
- if (b->npred > 1) {
- /* heuristic 1:
- * attempt to satisfy hints
- * when it's simple and we have
- * multiple predecessors
- */
- rcopy(&old, &cur);
- curi = &insb[NIns];
- for (j=0; j<old.n; j++) {
- t = old.t[j];
- r = *hint(t);
- r1 = rfind(&cur, t);
- if (r != -1 && r != r1)
- if (!bshas(cur.b, r)) {
- rfree(&cur, t);
- radd(&cur, t, r);
- x = tmp[t].cls;
- emit(OCopy, x, TMP(r1), TMP(r), R);
- }
- }
- if ((j = &insb[NIns] - curi)) {
- b->nins += j;
- i = alloc(b->nins * sizeof(Ins));
- icpy(icpy(i, curi, j), b->ins, b->nins-j);
- b->ins = i;
- }
- }
- rcopy(&beg[n], &cur);
- }
- if (debug['R']) {
- fprintf(stderr, "\n> Register mappings:\n");
- for (n=0; n<fn->nblk; n++) {
- b = fn->rpo[n];
- fprintf(stderr, "\t%-10s beg", b->name);
- mdump(&beg[n]);
- fprintf(stderr, "\t end");
- mdump(&end[n]);
- }
- fprintf(stderr, "\n");
- }
-
- /* 3. compose glue code */
- blist = 0;
- for (b=fn->start;; b=b->link) {
- ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
- for (; (s=**ps); ps++) {
- npm = 0;
- for (p=s->phi; p; p=p->link) {
- dst = p->to;
- assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
- if (rtype(dst) == RTmp) {
- r = rfind(&beg[s->id], dst.val);
- if (r == -1)
- continue;
- dst = TMP(r);
- }
- for (u=0; p->blk[u]!=b; u++)
- assert(u+1 < p->narg);
- src = p->arg[u];
- if (rtype(src) == RTmp)
- src = rref(&end[b->id], src.val);
- pmadd(src, dst, p->cls);
- }
- for (t=Tmp0; t<fn->ntmp; t++)
- if (bshas(s->in, t)) {
- src = rref(&end[b->id], t);
- dst = rref(&beg[s->id], t);
- pmadd(src, dst, tmp[t].cls);
- }
- pmgen();
- if (curi == insb)
- continue;
- b1 = blknew();
- b1->loop = (b->loop+s->loop) / 2;
- b1->link = blist;
- blist = b1;
- fn->nblk++;
- sprintf(b1->name, "%s_%s", b->name, s->name);
- b1->nins = curi - insb;
- idup(&b1->ins, insb, b1->nins);
- b1->jmp.type = JJmp;
- b1->s1 = s;
- **ps = b1;
- }
- if (!b->link) {
- b->link = blist;
- break;
- }
- }
- for (b=fn->start; b; b=b->link)
- b->phi = 0;
- fn->reg = regu;
-
- if (debug['R']) {
- fprintf(stderr, "\n> After register allocation:\n");
- printfn(fn, stderr);
- }
-}
diff --git a/lisc/spill.c b/lisc/spill.c
@@ -1,507 +0,0 @@
-#include "lisc.h"
-
-static void
-loopmark(Blk *hd, Blk *b, Phi *p)
-{
- int k, head;
- uint n, a;
-
- head = hd->id;
- if (b->id < head)
- return;
- for (; p; p=p->link)
- for (a=0; a<p->narg; a++)
- if (p->blk[a] == b)
- if (rtype(p->arg[a]) == RTmp)
- bsset(hd->gen, p->arg[a].val);
- if (b->visit == head)
- return;
- b->visit = head;
- b->loop *= 10;
- /* aggregate looping information at
- * loop headers */
- bsunion(hd->gen, b->gen);
- for (k=0; k<2; k++)
- if (b->nlive[k] > hd->nlive[k])
- hd->nlive[k] = b->nlive[k];
- for (n=0; n<b->npred; n++)
- loopmark(hd, b->pred[n], b->phi);
-}
-
-static void
-tmpuse(Ref r, int use, int loop, Fn *fn)
-{
- Mem *m;
- Tmp *t;
-
- if (rtype(r) == RAMem) {
- m = &fn->mem[r.val & AMask];
- tmpuse(m->base, 1, loop, fn);
- tmpuse(m->index, 1, loop, fn);
- }
- else if (rtype(r) == RTmp && r.val >= Tmp0) {
- t = &fn->tmp[r.val];
- t->nuse += use;
- t->ndef += !use;
- t->cost += loop;
- }
-}
-
-/* evaluate spill costs of temporaries,
- * this also fills usage information
- * requires rpo, preds
- */
-void
-fillcost(Fn *fn)
-{
- int n, hd;
- uint a;
- Blk *b;
- Ins *i;
- Tmp *t;
- Phi *p;
-
- for (b=fn->start; b; b=b->link) {
- b->loop = 1;
- b->visit = -1;
- }
- if (debug['S'])
- fprintf(stderr, "\n> Loop information:\n");
- for (n=0; n<fn->nblk; n++) {
- b = fn->rpo[n];
- hd = 0;
- for (a=0; a<b->npred; a++)
- if (b->pred[a]->id >= n) {
- loopmark(b, b->pred[a], b->phi);
- hd = 1;
- }
- if (hd && debug['S']) {
- fprintf(stderr, "\t%-10s", b->name);
- fprintf(stderr, " (% 3d ", b->nlive[0]);
- fprintf(stderr, "% 3d) ", b->nlive[1]);
- dumpts(b->gen, fn->tmp, stderr);
- }
- }
- for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
- t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0;
- t->nuse = 0;
- t->ndef = 0;
- }
- for (b=fn->start; b; b=b->link) {
- for (p=b->phi; p; p=p->link) {
- /* todo, the cost computation
- * for p->to is not great... */
- tmpuse(p->to, 0, 0, fn);
- for (a=0; a<p->narg; a++) {
- n = p->blk[a]->loop;
- assert(b->npred==p->narg &&
- "wrong cfg");
- n /= b->npred;
- tmpuse(p->arg[a], 1, n, fn);
- }
- }
- n = b->loop;
- for (i=b->ins; i-b->ins < b->nins; i++) {
- tmpuse(i->to, 0, n, fn);
- tmpuse(i->arg[0], 1, n, fn);
- tmpuse(i->arg[1], 1, n, fn);
- }
- tmpuse(b->jmp.arg, 1, n, fn);
- }
- if (debug['S']) {
- fprintf(stderr, "\n> Spill costs:\n");
- for (n=Tmp0; n<fn->ntmp; n++)
- fprintf(stderr, "\t%-10s %d\n",
- fn->tmp[n].name,
- fn->tmp[n].cost);
- fprintf(stderr, "\n");
- }
-}
-
-static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
-static Tmp *tmp; /* current temporaries (for tcmpX) */
-static int ntmp; /* current # of temps (for limit) */
-static int locs; /* stack size used by locals */
-static int slot4; /* next slot of 4 bytes */
-static int slot8; /* ditto, 8 bytes */
-static BSet mask[2][1]; /* class masks */
-
-static int
-tcmp0(const void *pa, const void *pb)
-{
- return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost;
-}
-
-static int
-tcmp1(const void *pa, const void *pb)
-{
- int c;
-
- c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
- return c ? c : tcmp0(pa, pb);
-}
-
-static Ref
-slot(int t)
-{
- int s;
-
- if (t < Tmp0)
- diag("spill: cannot spill register");
- s = tmp[t].slot;
- if (s == -1) {
- assert(NAlign == 3);
- /* nice logic to pack stack slots
- * on demand, there can be only
- * one hole and slot4 points to it
- *
- * invariant: slot4 <= slot8
- */
- if (KWIDE(tmp[t].cls)) {
- s = slot8;
- if (slot4 == slot8)
- slot4 += 2;
- slot8 += 2;
- } else {
- s = slot4;
- if (slot4 == slot8) {
- slot8 += 2;
- slot4 += 1;
- } else
- slot4 = slot8;
- }
- s += locs;
- tmp[t].slot = s;
- }
- return SLOT(s);
-}
-
-static void
-limit(BSet *b, int k, BSet *f)
-{
- static int *tarr, maxt;
- int i, nt;
- uint t;
-
- nt = bscount(b);
- if (nt <= k)
- return;
- if (nt > maxt) {
- free(tarr);
- tarr = emalloc(nt * sizeof tarr[0]);
- maxt = nt;
- }
- for (i=0, t=0; bsiter(b, &t); t++) {
- bsclr(b, t);
- tarr[i++] = t;
- }
- if (!f)
- qsort(tarr, nt, sizeof tarr[0], tcmp0);
- else {
- fst = f;
- qsort(tarr, nt, sizeof tarr[0], tcmp1);
- }
- for (i=0; i<k && i<nt; i++)
- bsset(b, tarr[i]);
- for (; i<nt; i++)
- slot(tarr[i]);
-}
-
-static void
-limit2(BSet *b1, int k1, int k2, BSet *fst)
-{
- BSet b2[1];
-
- bsinit(b2, ntmp); /* todo, free those */
- bscopy(b2, b1);
- bsinter(b1, mask[0]);
- bsinter(b2, mask[1]);
- limit(b1, NIReg - k1, fst);
- limit(b2, NFReg - k2, fst);
- bsunion(b1, b2);
-}
-
-static void
-sethint(BSet *u, bits r)
-{
- uint t;
-
- for (t=Tmp0; bsiter(u, &t); t++)
- tmp[phicls(t, tmp)].hint.m |= r;
-}
-
-static void
-reloads(BSet *u, BSet *v)
-{
- uint t;
-
- for (t=Tmp0; bsiter(u, &t); t++)
- if (!bshas(v, t))
- emit(OLoad, tmp[t].cls, TMP(t), slot(t), R);
-}
-
-static void
-store(Ref r, int s)
-{
- static int kstore[] = {
- [Kw] = OStorew, [Kl] = OStorel,
- [Ks] = OStores, [Kd] = OStored,
- };
-
- if (s != -1)
- emit(kstore[tmp[r.val].cls], 0, R, r, SLOT(s));
-}
-
-static int
-regcpy(Ins *i)
-{
- return i->op == OCopy && isreg(i->arg[0]);
-}
-
-static Ins *
-dopm(Blk *b, Ins *i, BSet *v)
-{
- int n, t;
- BSet u[1];
- Ins *i1;
- bits r;
-
- bsinit(u, ntmp); /* todo, free those */
- /* consecutive copies from
- * registers need to be handled
- * as one large instruction
- *
- * fixme: there is an assumption
- * that calls are always followed
- * by copy instructions here, this
- * might not be true if previous
- * passes change
- */
- i1 = ++i;
- do {
- i--;
- t = i->to.val;
- if (!req(i->to, R))
- if (bshas(v, t)) {
- bsclr(v, t);
- store(i->to, tmp[t].slot);
- }
- bsset(v, i->arg[0].val);
- } while (i != b->ins && regcpy(i-1));
- bscopy(u, v);
- if (i != b->ins && (i-1)->op == OCall) {
- v->t[0] &= ~retregs((i-1)->arg[1], 0);
- limit2(v, NISave, NFSave, 0);
- for (r=0, n=0; n<NRSave; n++)
- r |= BIT(rsave[n]);
- v->t[0] |= argregs((i-1)->arg[1], 0);
- } else {
- limit2(v, 0, 0, 0);
- r = v->t[0];
- }
- sethint(v, r);
- reloads(u, v);
- do
- emiti(*--i1);
- while (i1 != i);
- return i;
-}
-
-/* spill code insertion
- * requires spill costs, rpo, liveness
- *
- * Note: this will replace liveness
- * information (in, out) with temporaries
- * that must be in registers at block
- * borders
- *
- * Be careful with:
- * - OCopy instructions to ensure register
- * constraints
- */
-void
-spill(Fn *fn)
-{
- Blk *b, *s1, *s2, *hd, **bp;
- int j, n, l, t, k, lvarg[2];
- BSet u[1], v[1], w[1];
- Ins *i;
- Phi *p;
- Mem *m;
- bits r;
-
- tmp = fn->tmp;
- ntmp = fn->ntmp;
- bsinit(u, ntmp);
- bsinit(v, ntmp);
- bsinit(w, ntmp);
- bsinit(mask[0], ntmp);
- bsinit(mask[1], ntmp);
- locs = fn->slot;
- slot4 = 0;
- slot8 = 0;
- for (t=0; t<ntmp; t++) {
- k = 0;
- if (t >= XMM0 && t < XMM0 + NFReg)
- k = 1;
- else if (t >= Tmp0)
- k = KBASE(tmp[t].cls);
- bsset(mask[k], t);
- }
-
- for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
- b = *--bp;
- /* invariant: all bocks with bigger rpo got
- * their in,out updated. */
-
- /* 1. find temporaries in registers at
- * the end of the block (put them in v) */
- curi = 0;
- s1 = b->s1;
- s2 = b->s2;
- hd = 0;
- if (s1 && s1->id <= n)
- hd = s1;
- if (s2 && s2->id <= n)
- if (!hd || s2->id >= hd->id)
- hd = s2;
- r = 0;
- bszero(v);
- if (hd) {
- /* back-edge */
- for (k=0; k<2; k++) {
- n = k == 0 ? NIReg : NFReg;
- bscopy(u, b->out);
- bsinter(u, mask[k]);
- bscopy(w, u);
- bsinter(u, hd->gen);
- bsdiff(w, hd->gen);
- if ((int)bscount(u) < n) { /* fixme */
- j = bscount(w); /* live through */
- l = hd->nlive[k];
- limit(w, n - (l - j), 0);
- bsunion(u, w);
- } else
- limit(u, n, 0);
- bsunion(v, u);
- }
- } else if (s1) {
- liveon(v, b, s1);
- if (s2) {
- liveon(u, b, s2);
- bscopy(w, u);
- bsinter(w, v);
- bsunion(v, u);
- }
- limit2(v, 0, 0, w);
- } else if (rtype(b->jmp.arg) == RACall) {
- /* return */
- r = retregs(b->jmp.arg, 0);
- v->t[0] |= r;
- }
- bscopy(b->out, v);
-
- /* 2. process the block instructions */
- curi = &insb[NIns];
- for (i=&b->ins[b->nins]; i!=b->ins;) {
- i--;
- if (regcpy(i)) {
- i = dopm(b, i, v);
- continue;
- }
- bszero(w);
- if (!req(i->to, R)) {
- assert(rtype(i->to) == RTmp);
- t = i->to.val;
- if (bshas(v, t))
- bsclr(v, t);
- else {
- /* make sure we have a reg
- * for the result */
- bsset(v, t);
- bsset(w, t);
- }
- }
- j = opdesc[i->op].nmem;
- for (n=0; n<2; n++)
- if (rtype(i->arg[n]) == RAMem)
- j--;
- for (n=0; n<2; n++)
- switch (rtype(i->arg[n])) {
- case RAMem:
- t = i->arg[n].val;
- m = &fn->mem[t & AMask];
- if (rtype(m->base) == RTmp) {
- bsset(v, m->base.val);
- bsset(w, m->base.val);
- }
- if (rtype(m->index) == RTmp) {
- bsset(v, m->index.val);
- bsset(w, m->index.val);
- }
- break;
- case RTmp:
- t = i->arg[n].val;
- lvarg[n] = bshas(v, t);
- bsset(v, t);
- if (j-- <= 0)
- bsset(w, t);
- break;
- }
- bscopy(u, v);
- limit2(v, 0, 0, w);
- for (n=0; n<2; n++)
- if (rtype(i->arg[n]) == RTmp) {
- t = i->arg[n].val;
- if (!bshas(v, t)) {
- /* do not reload if the
- * the temporary was dead
- */
- if (!lvarg[n])
- bsclr(u, t);
- i->arg[n] = slot(t);
- }
- }
- reloads(u, v);
- if (!req(i->to, R)) {
- t = i->to.val;
- store(i->to, tmp[t].slot);
- bsclr(v, t);
- }
- emiti(*i);
- r = v->t[0] & (BIT(Tmp0)-1);
- if (r)
- sethint(v, r);
- }
- assert(!r || b==fn->start);
-
- for (p=b->phi; p; p=p->link) {
- assert(rtype(p->to) == RTmp);
- t = p->to.val;
- if (bshas(v, t)) {
- bsclr(v, t);
- store(p->to, tmp[t].slot);
- } else if (bshas(b->in, t))
- /* only if the phi is live */
- p->to = slot(p->to.val);
- }
- bscopy(b->in, v);
- b->nins = &insb[NIns] - curi;
- idup(&b->ins, curi, b->nins);
- }
-
- /* align the locals to a 16 byte boundary */
- assert(NAlign == 3);
- slot8 += slot8 & 3;
- fn->slot += slot8;
-
- if (debug['S']) {
- fprintf(stderr, "\n> Block information:\n");
- for (b=fn->start; b; b=b->link) {
- printf("\t%-10s (% 5d) ", b->name, b->loop);
- dumpts(b->out, fn->tmp, stdout);
- }
- fprintf(stderr, "\n> After spilling:\n");
- printfn(fn, stderr);
- }
-}
diff --git a/lisc/ssa.c b/lisc/ssa.c
@@ -1,516 +0,0 @@
-#include "lisc.h"
-#include <stdarg.h>
-
-static void
-adduse(Tmp *tmp, int ty, Blk *b, ...)
-{
- Use *u;
- int n;
- va_list ap;
-
- va_start(ap, b);
- n = tmp->nuse;
- vgrow(&tmp->use, ++tmp->nuse);
- u = &tmp->use[n];
- u->type = ty;
- u->bid = b->id;
- switch (ty) {
- default:
- diag("ssa: adduse defaulted");
- case UPhi:
- u->u.phi = va_arg(ap, Phi *);
- break;
- case UIns:
- u->u.ins = va_arg(ap, Ins *);
- break;
- case UJmp:
- break;
- }
- va_end(ap);
-}
-
-/* fill usage, phi, and class information
- */
-void
-filluse(Fn *fn)
-{
- Blk *b;
- Phi *p;
- Ins *i;
- int m, t;
- uint a;
- Tmp *tmp;
-
- /* todo, is this the correct file? */
- tmp = fn->tmp;
- for (t=0; t<fn->ntmp; t++) {
- tmp[t].ndef = 0;
- tmp[t].nuse = 0;
- tmp[t].phi = 0;
- tmp[t].cls = 0;
- if (tmp[t].use == 0)
- tmp[t].use = vnew(0, sizeof(Use));
- }
- for (b=fn->start; b; b=b->link) {
- for (p=b->phi; p; p=p->link) {
- assert(rtype(p->to) == RTmp);
- t = p->to.val;
- tmp[t].ndef++;
- tmp[t].cls = p->cls;
- tmp[t].phi = p->to.val;
- for (a=0; a<p->narg; a++)
- if (rtype(p->arg[a]) == RTmp) {
- t = p->arg[a].val;
- adduse(&tmp[t], UPhi, b, p);
- if (!tmp[t].phi)
- tmp[t].phi = p->to.val;
- }
- }
- for (i=b->ins; i-b->ins < b->nins; i++) {
- if (!req(i->to, R)) {
- assert(rtype(i->to) == RTmp);
- t = i->to.val;
- tmp[t].ndef++;
- tmp[t].cls = i->cls;
- }
- for (m=0; m<2; m++)
- if (rtype(i->arg[m]) == RTmp) {
- t = i->arg[m].val;
- adduse(&tmp[t], UIns, b, i);
- }
- }
- if (rtype(b->jmp.arg) == RTmp)
- adduse(&tmp[b->jmp.arg.val], UJmp, b);
- }
-}
-
-static void
-addpred(Blk *bp, Blk *bc)
-{
- uint i;
-
- if (!bc->pred) {
- bc->pred = alloc(bc->npred * sizeof bc->pred[0]);
- for (i=0; i<bc->npred; i++)
- bc->pred[i] = 0;
- }
- for (i=0; bc->pred[i]; i++)
- ;
- bc->pred[i] = bp;
-}
-
-/* fill predecessors information in blocks
- */
-void
-fillpreds(Fn *f)
-{
- Blk *b;
-
- for (b=f->start; b; b=b->link) {
- b->npred = 0;
- b->pred = 0;
- }
- for (b=f->start; b; b=b->link) {
- if (b->s1)
- b->s1->npred++;
- if (b->s2)
- b->s2->npred++;
- }
- for (b=f->start; b; b=b->link) {
- if (b->s1)
- addpred(b, b->s1);
- if (b->s2)
- addpred(b, b->s2);
- }
-}
-
-static int
-rporec(Blk *b, int x)
-{
- Blk *s1, *s2;
-
- if (!b || b->id >= 0)
- return x;
- b->id = 1;
- s1 = b->s1;
- s2 = b->s2;
- if (s1 && s2 && s1->loop > s2->loop) {
- s1 = b->s2;
- s2 = b->s1;
- }
- x = rporec(s1, x);
- x = rporec(s2, x);
- b->id = x;
- assert(x >= 0);
- return x - 1;
-}
-
-/* fill the rpo information in blocks
- */
-void
-fillrpo(Fn *f)
-{
- int n;
- Blk *b, **p;
-
- for (b=f->start; b; b=b->link)
- b->id = -1;
- n = 1 + rporec(f->start, f->nblk-1);
- f->nblk -= n;
- f->rpo = alloc(f->nblk * sizeof f->rpo[0]);
- for (p=&f->start; *p;) {
- b = *p;
- if (b->id == -1) {
- *p = b->link;
- /* todo, free block */
- } else {
- b->id -= n;
- f->rpo[b->id] = b;
- p=&(*p)->link;
- }
- }
-}
-
-/* for dominators computation, read
- * "A Simple, Fast Dominance Algorithm"
- * by K. Cooper, T. Harvey, and K. Kennedy.
- */
-
-static Blk *
-inter(Blk *b1, Blk *b2)
-{
- Blk *bt;
-
- if (b1 == 0)
- return b2;
- while (b1 != b2) {
- if (b1->id < b2->id) {
- bt = b1;
- b1 = b2;
- b2 = bt;
- }
- while (b1->id > b2->id) {
- b1 = b1->idom;
- assert(b1);
- }
- }
- return b1;
-}
-
-static void
-filldom(Fn *fn)
-{
- Blk *b, *d;
- int ch, n;
- uint p;
-
- for (b=fn->start; b; b=b->link) {
- b->idom = 0;
- b->dom = 0;
- b->dlink = 0;
- }
- do {
- ch = 0;
- for (n=1; n<fn->nblk; n++) {
- b = fn->rpo[n];
- d = 0;
- for (p=0; p<b->npred; p++)
- if (b->pred[p]->idom
- || b->pred[p] == fn->start)
- d = inter(d, b->pred[p]);
- if (d != b->idom) {
- ch++;
- b->idom = d;
- }
- }
- } while (ch);
- for (b=fn->start; b; b=b->link)
- if ((d=b->idom)) {
- assert(d != b);
- b->dlink = d->dom;
- d->dom = b;
- }
-}
-
-static int
-sdom(Blk *b1, Blk *b2)
-{
- assert(b1 && b2);
- if (b1 == b2)
- return 0;
- while (b2->id > b1->id)
- b2 = b2->idom;
- return b1 == b2;
-}
-
-static int
-dom(Blk *b1, Blk *b2)
-{
- return b1 == b2 || sdom(b1, b2);
-}
-
-static void
-addfron(Blk *a, Blk *b)
-{
- int n;
-
- for (n=0; n<a->nfron; n++)
- if (a->fron[n] == b)
- return;
- if (!a->nfron)
- a->fron = vnew(++a->nfron, sizeof a->fron[0]);
- else
- vgrow(&a->fron, ++a->nfron);
- a->fron[a->nfron-1] = b;
-}
-
-static void
-fillfron(Fn *fn)
-{
- Blk *a, *b;
-
- for (b=fn->start; b; b=b->link) {
- if (b->s1)
- for (a=b; !sdom(a, b->s1); a=a->idom)
- addfron(a, b->s1);
- if (b->s2)
- for (a=b; !sdom(a, b->s2); a=a->idom)
- addfron(a, b->s2);
- }
-}
-
-static Ref
-refindex(int t, Fn *fn)
-{
- return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn);
-}
-
-static void
-phiins(Fn *fn)
-{
- BSet u[1], defs[1];
- Blk *a, *b, **blist, **be, **bp;
- Ins *i;
- Phi *p;
- Ref r;
- int t, n, k, nt;
-
- bsinit(u, fn->nblk);
- bsinit(defs, fn->nblk);
- blist = emalloc(fn->nblk * sizeof blist[0]);
- be = &blist[fn->nblk];
- nt = fn->ntmp;
- for (t=Tmp0; t<nt; t++) {
- fn->tmp[t].visit = 0;
- if (fn->tmp[t].phi != 0)
- continue;
- bszero(u);
- k = -1;
- bp = be;
- for (b=fn->start; b; b=b->link) {
- b->visit = 0;
- r = R;
- for (i=b->ins; i-b->ins < b->nins; i++) {
- if (!req(r, R)) {
- if (req(i->arg[0], TMP(t)))
- i->arg[0] = r;
- if (req(i->arg[1], TMP(t)))
- i->arg[1] = r;
- }
- if (req(i->to, TMP(t))) {
- if (!bshas(b->out, t)) {
- if (fn->tmp[t].ndef == 1)
- r = TMP(t);
- else
- r = refindex(t, fn);
- i->to = r;
- } else {
- if (!bshas(u, b->id)) {
- bsset(u, b->id);
- *--bp = b;
- }
- if (k == -1)
- k = i->cls;
- assert(k == i->cls);
- }
- }
- }
- if (!req(r, R) && req(b->jmp.arg, TMP(t)))
- b->jmp.arg = r;
- }
- bscopy(defs, u);
- while (bp != be) {
- fn->tmp[t].visit = t;
- b = *bp++;
- bsclr(u, b->id);
- for (n=0; n<b->nfron; n++) {
- a = b->fron[n];
- if (a->visit++ == 0)
- if (bshas(a->in, t)) {
- p = alloc(sizeof *p);
- p->cls = k;
- p->to = TMP(t);
- p->link = a->phi;
- a->phi = p;
- if (!bshas(defs, a->id))
- if (!bshas(u, a->id)) {
- bsset(u, a->id);
- *--bp = a;
- }
- }
- }
- }
- }
- free(blist);
-}
-
-typedef struct Name Name;
-struct Name {
- Ref r;
- Blk *b;
- Name *up;
-};
-
-static Name *namel;
-
-static Name *
-nnew(Ref r, Blk *b, Name *up)
-{
- Name *n;
-
- if (namel) {
- n = namel;
- namel = n->up;
- } else
- /* could use alloc, here
- * but namel should be reset
- */
- n = emalloc(sizeof *n);
- n->r = r;
- n->b = b;
- n->up = up;
- return n;
-}
-
-static void
-nfree(Name *n)
-{
- n->up = namel;
- namel = n;
-}
-
-static void
-rendef(Ref *r, Blk *b, Name **stk, Fn *fn)
-{
- Ref r1;
- int t;
-
- t = r->val;
- if (req(*r, R) || !fn->tmp[t].visit)
- return;
- r1 = refindex(t, fn);
- fn->tmp[r1.val].visit = t;
- stk[t] = nnew(r1, b, stk[t]);
- *r = r1;
-}
-
-static Ref
-getstk(int t, Blk *b, Name **stk)
-{
- Name *n, *n1;
-
- n = stk[t];
- while (n && !dom(n->b, b)) {
- n1 = n;
- n = n->up;
- nfree(n1);
- }
- stk[t] = n;
- if (!n) {
- /* uh, oh, warn */
- return CON_Z;
- } else
- return n->r;
-}
-
-static void
-renblk(Blk *b, Name **stk, Fn *fn)
-{
- Phi *p;
- Ins *i;
- Blk *s, **ps, *succ[3];
- int t, m;
-
- for (p=b->phi; p; p=p->link)
- rendef(&p->to, b, stk, fn);
- for (i=b->ins; i-b->ins < b->nins; i++) {
- for (m=0; m<2; m++) {
- t = i->arg[m].val;
- if (rtype(i->arg[m]) == RTmp)
- if (fn->tmp[t].visit)
- i->arg[m] = getstk(t, b, stk);
- }
- rendef(&i->to, b, stk, fn);
- }
- t = b->jmp.arg.val;
- if (rtype(b->jmp.arg) == RTmp)
- if (fn->tmp[t].visit)
- b->jmp.arg = getstk(t, b, stk);
- succ[0] = b->s1;
- succ[1] = b->s2;
- succ[2] = 0;
- for (ps=succ; (s=*ps); ps++)
- for (p=s->phi; p; p=p->link) {
- t = p->to.val;
- if ((t=fn->tmp[t].visit)) {
- m = p->narg++;
- if (m == NPred)
- diag("ssa: too many phi arguments");
- p->arg[m] = getstk(t, b, stk);
- p->blk[m] = b;
- }
- }
- for (s=b->dom; s; s=s->dlink)
- renblk(s, stk, fn);
-}
-
-/* require ndef */
-void
-ssa(Fn *fn)
-{
- Name **stk, *n;
- int d, nt;
- Blk *b, *b1;
-
- nt = fn->ntmp;
- stk = emalloc(nt * sizeof stk[0]);
- d = debug['L'];
- debug['L'] = 0;
- filldom(fn);
- if (debug['N']) {
- fprintf(stderr, "\n> Dominators:\n");
- for (b1=fn->start; b1; b1=b1->link) {
- if (!b1->dom)
- continue;
- fprintf(stderr, "%10s:", b1->name);
- for (b=b1->dom; b; b=b->dlink)
- fprintf(stderr, " %s", b->name);
- fprintf(stderr, "\n");
- }
- }
- fillfron(fn);
- filllive(fn);
- phiins(fn);
- renblk(fn->start, stk, fn);
- while (nt--)
- while ((n=stk[nt])) {
- stk[nt] = n->up;
- nfree(n);
- }
- debug['L'] = d;
- free(stk);
- if (debug['N']) {
- fprintf(stderr, "\n> After SSA construction:\n");
- printfn(fn, stderr);
- }
-}
diff --git a/lisc/test/go.sh b/lisc/test/go.sh
@@ -1,116 +0,0 @@
-#!/bin/sh
-
-TMP=/tmp/qbe.zzzz
-
-DRV=$TMP.c
-ASM=$TMP.s
-BIN=$TMP.bin
-OUT=$TMP.out
-
-cleanup() {
- rm -f $DRV $ASM $BIN $OUT
-}
-
-extract() {
- WHAT="$1"
- FILE="$2"
-
- awk "
- /^# >>> $WHAT/ {
- p = 1
- next
- }
- /^# <<</ {
- if (p)
- p = 0
- }
- p
- " $FILE \
- | sed -e 's/# //' \
- | sed -e 's/#$//'
-}
-
-once() {
- T="$1"
-
- if ! test -f $T
- then
- echo "invalid test file $T" >&2
- exit 1
- fi
-
- echo "$T... "
-
- if ! ./lisc $T -o $ASM
- then
- echo "[qbe fail]"
- return 1
- fi
-
- extract driver $T > $DRV
- extract output $T > $OUT
-
- if test -s $DRV
- then
- LNK="$DRV $ASM"
- else
- LNK="$ASM"
- fi
-
- if ! cc -g -o $BIN $LNK
- then
- echo "[cc fail]"
- return 1
- fi
-
- if test -s $OUT
- then
- $BIN a b c | diff - $OUT
- RET=$?
- REASON="output"
- else
- $BIN a b c
- RET=$?
- REASON="returned $RET"
- fi
-
- if test $RET -ne 0
- then
- echo "[$REASON fail]"
- return 1
- fi
-
- printf "\033[1A\033[45C[ok]\n"
-}
-
-
-#trap cleanup TERM QUIT
-
-if test -z "$1"
-then
- echo "usage: test/go.sh {all, SSAFILE}" 2>&1
- exit 1
-fi
-
-case $1 in
- "all")
- F=0
- for T in test/[!_]*.ssa
- do
- once $T
- F=`expr $F + $?`
- done
- if test $F -ge 1
- then
- echo
- echo "$F test(s) failed!"
- else
- echo
- echo "All is fine!"
- fi
- ;;
- *)
- once $1
- exit $?
- ;;
-esac
diff --git a/lisc/tools/abitest.sh b/lisc/tools/abitest.sh
@@ -1,104 +0,0 @@
-#!/bin/sh
-
-OCAMLC=/usr/bin/ocamlc
-QBE=`pwd`/lisc
-
-failure() {
- echo "Failure at stage:" $1 >&2
- exit 1
-}
-
-cleanup() {
- rm -fr $TMP
-}
-
-init() {
- cp tools/abi.ml $TMP
- pushd $TMP > /dev/null
-
- cat > Makefile << EOM
-
-.PHONY: test
-test: caller.o callee.o
- c99 -o \$@ caller.o callee.o
-%.o: %.c
- c99 -c -o \$@ \$<
-%.o: %.ssa
- $QBE -o \$*.s \$<
- c99 -c -o \$@ \$*.s
-
-EOM
-
- if ! $OCAMLC abi.ml -o gentest
- then
- popd > /dev/null
- cleanup
- failure "abifuzz compilation"
- fi
- popd > /dev/null
-}
-
-once() {
- if test -z "$3"
- then
- $TMP/gentest $TMP $1 $2
- else
- $TMP/gentest -s $3 $TMP $1 $2
- fi
- make -C $TMP test > /dev/null || failure "building"
- $TMP/test || failure "runtime"
-}
-
-usage() {
- echo "usage: abitest.sh [-callssa] [-callc] [-s SEED] [-n ITERATIONS]" >&2
- exit 1
-}
-
-N=1
-CALLER=c
-CALLEE=ssa
-
-while test -n "$1"
-do
- case "$1" in
- "-callssa")
- ;;
- "-callc")
- CALLER=ssa
- CALLEE=c
- ;;
- "-s")
- test -n "$2" || usage
- shift
- SEED="$1"
- ;;
- "-n")
- test -n "$2" || usage
- shift
- N="$1"
- ;;
- *)
- usage
- ;;
- esac
- shift
-done
-
-TMP=`mktemp -d abifuzz.XXXXXX`
-
-init
-
-if test -n "$S"
-then
- once $CALLER $CALLEE $SEED
-else
- for n in `seq $N`
- do
- once $CALLER $CALLEE
- echo "$n" | grep "00$"
- done
-fi
-
-echo "All done."
-
-cleanup
diff --git a/lisc/tools/regress.sh b/lisc/tools/regress.sh
@@ -1,17 +0,0 @@
-#!/bin/sh
-
-for t in test/*
-do
- printf "Test $t ... "
-
- ./lisc $t >/tmp/out.0 2>&1
- ./lisc.1 $t >/tmp/out.1 2>&1
-
- if diff /tmp/out.0 /tmp/out.1 > /dev/null
- then
- echo "OK"
- else
- echo "KO"
- break
- fi
-done
diff --git a/lisc/util.c b/lisc/util.c
@@ -1,329 +0,0 @@
-#include "lisc.h"
-
-typedef struct Bitset Bitset;
-typedef struct Vec Vec;
-
-struct Vec {
- ulong mag;
- size_t esz;
- ulong cap;
- union {
- long long ll;
- long double ld;
- void *ptr;
- } align[];
-};
-
-enum {
- VMin = 2,
- VMag = 0xcabba9e,
- NPtr = 256,
-};
-
-Typ typ[NTyp];
-Ins insb[NIns], *curi;
-
-static void *ptr[NPtr];
-static void **pool = ptr;
-static int nptr = 1;
-
-void
-diag(char *s)
-{
- fputs(s, stderr);
- fputc('\n', stderr);
- abort();
-}
-
-void *
-emalloc(size_t n)
-{
- void *p;
-
- p = calloc(1, n);
- if (!p)
- diag("emalloc: out of memory");
- return p;
-}
-
-void *
-alloc(size_t n)
-{
- void **pp;
-
- if (n == 0)
- return 0;
- if (nptr >= NPtr) {
- pp = emalloc(NPtr * sizeof(void *));
- pp[0] = pool;
- pool = pp;
- nptr = 1;
- }
- return pool[nptr++] = emalloc(n);
-}
-
-void
-freeall()
-{
- void **pp;
-
- for (;;) {
- for (pp = &pool[1]; pp < &pool[nptr]; pp++)
- free(*pp);
- pp = pool[0];
- if (!pp)
- break;
- free(pool);
- pool = pp;
- nptr = NPtr;
- }
- nptr = 1;
-}
-
-Blk *
-blknew()
-{
- static Blk z;
- Blk *b;
-
- b = alloc(sizeof *b);
- *b = z;
- return b;
-}
-
-void
-emit(int op, int k, Ref to, Ref arg0, Ref arg1)
-{
- if (curi == insb)
- diag("emit: too many instructions");
- *--curi = (Ins){
- .op = op, .cls = k,
- .to = to, .arg = {arg0, arg1}
- };
-}
-
-void
-emiti(Ins i)
-{
- emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]);
-}
-
-void
-idup(Ins **pd, Ins *s, ulong n)
-{
- *pd = alloc(n * sizeof(Ins));
- memcpy(*pd, s, n * sizeof(Ins));
-}
-
-Ins *
-icpy(Ins *d, Ins *s, ulong n)
-{
- memcpy(d, s, n * sizeof(Ins));
- return d + n;
-}
-
-void *
-vnew(ulong len, size_t esz)
-{
- ulong cap;
- Vec *v;
-
- for (cap=VMin; cap<len; cap*=2)
- ;
- v = alloc(cap * esz + sizeof(Vec));
- v->mag = VMag;
- v->cap = cap;
- v->esz = esz;
- return v + 1;
-}
-
-void
-vgrow(void *vp, ulong len)
-{
- Vec *v;
- void *v1;
-
- v = *(Vec **)vp - 1;
- assert(v+1 && v->mag == VMag);
- if (v->cap >= len)
- return;
- v1 = vnew(len, v->esz);
- memcpy(v1, v+1, v->cap * v->esz);
- *(Vec **)vp = v1;
-}
-
-int
-phicls(int t, Tmp *tmp /*, int c*/)
-{
- if (tmp[t].phi)
- return tmp[t].phi;
- return t;
-#if 0
- int t1;
-
- t1 = tmp[t].phi;
- if (!t1)
- t1 = t;
- if (t != t1) {
- t1 = phitmp(t1, tmp, c);
- if (c)
- tmp[t].phi = t1;
- }
- return t1;
-#endif
-}
-
-Ref
-newtmp(char *prfx, int k, Fn *fn)
-{
- static int n;
- int t;
-
- t = fn->ntmp++;
- vgrow(&fn->tmp, fn->ntmp);
- sprintf(fn->tmp[t].name, "%s%d", prfx, ++n);
- fn->tmp[t].cls = k;
- fn->tmp[t].slot = -1;
- fn->tmp[t].nuse = +1;
- fn->tmp[t].ndef = +1;
- return TMP(t);
-}
-
-Ref
-getcon(int64_t val, Fn *fn)
-{
- int c;
-
- for (c=0; c<fn->ncon; c++)
- if (fn->con[c].type == CBits && fn->con[c].bits.i == val)
- return CON(c);
- fn->ncon++;
- vgrow(&fn->con, fn->ncon);
- fn->con[c] = (Con){.type = CBits, .bits.i = val};
- return CON(c);
-}
-
-void
-addcon(Con *c0, Con *c1)
-{
- if (c0->type == CUndef)
- *c0 = *c1;
- else {
- if (c1->type == CAddr) {
- if (c0->type == CAddr)
- diag("addcon: adding two addresses");
- c0->type = CAddr;
- strcpy(c0->label, c1->label);
- }
- c0->bits.i += c1->bits.i;
- }
-}
-
-void
-bsinit(BSet *bs, uint n)
-{
- n = (n + NBit-1) / NBit;
- bs->nt = n;
- bs->t = alloc(n * sizeof bs->t[0]);
-}
-
-uint
-bscount(BSet *bs)
-{
- uint i, j, n;
-
- n = 0;
- for (i=0; i<bs->nt; i++)
- for (j=0; j<NBit; j++)
- if (bs->t[i] & BIT(j))
- n++;
- return n;
-}
-
-static inline uint
-bsmax(BSet *bs)
-{
- return bs->nt * NBit;
-}
-
-void
-bsset(BSet *bs, uint elt)
-{
- assert(elt < bsmax(bs));
- bs->t[elt/NBit] |= BIT(elt%NBit);
-}
-
-void
-bsclr(BSet *bs, uint elt)
-{
- assert(elt < bsmax(bs));
- bs->t[elt/NBit] &= ~BIT(elt%NBit);
-}
-
-#define BSOP(f, op) \
- void \
- f(BSet *a, BSet *b) \
- { \
- uint i; \
- \
- assert(a->nt == b->nt); \
- for (i=0; i<a->nt; i++) \
- a->t[i] op b->t[i]; \
- }
-
-BSOP(bscopy, =)
-BSOP(bsunion, |=)
-BSOP(bsinter, &=)
-BSOP(bsdiff, &= ~)
-
-int
-bsequal(BSet *a, BSet *b)
-{
- uint i;
-
- assert(a->nt == b->nt);
- for (i=0; i<a->nt; i++)
- if (a->t[i] != b->t[i])
- return 0;
- return 1;
-}
-
-void
-bszero(BSet *bs)
-{
- memset(bs->t, 0, bs->nt * sizeof bs->t[0]);
-}
-
-/* iterates on a bitset, use as follows
- *
- * for (i=0; bsiter(set, &i); i++)
- * use(i);
- *
- */
-int
-bsiter(BSet *bs, uint *elt)
-{
- uint i;
-
- for (i=*elt;; i++) {
- while (i < bsmax(bs) && !bs->t[i/NBit])
- i = (i + NBit) & -NBit;
- if (i >= bsmax(bs))
- return 0;
- if (bshas(bs, i)) {
- *elt = i;
- return 1;
- }
- }
-}
-
-void
-dumpts(BSet *bs, Tmp *tmp, FILE *f)
-{
- uint t;
-
- fprintf(f, "[");
- for (t=Tmp0; bsiter(bs, &t); t++)
- fprintf(f, " %s", tmp[t].name);
- fprintf(f, " ]\n");
-}
diff --git a/minic/mcc b/minic/mcc
@@ -29,7 +29,7 @@ fi
../minic/minic < $file > /tmp/minic.ssa &&
-../lisc/lisc < /tmp/minic.ssa > /tmp/minic.s &&
+../src/qbe < /tmp/minic.ssa > /tmp/minic.s &&
cc $flags /tmp/minic.s
if test $? -ne 0
diff --git a/src/.gitignore b/src/.gitignore
@@ -0,0 +1,5 @@
+qbe
+doc
+.comfile
+*.o
+*.out
diff --git a/lisc/.tag b/src/.tag
diff --git a/src/Makefile b/src/Makefile
@@ -0,0 +1,17 @@
+BIN = qbe
+OBJ = main.o util.o parse.o mem.o ssa.o copy.o live.o isel.o spill.o rega.o emit.o
+
+CFLAGS = -Wall -Wextra -std=c99 -g -pedantic
+
+$(BIN): $(OBJ)
+ $(CC) $(LDFLAGS) $(OBJ) -o $@
+
+$(OBJ): all.h
+
+.PHONY: clean check syndoc
+clean:
+ rm -f $(BIN) $(OBJ)
+check: $(BIN)
+ test/go.sh all
+syndoc:
+ unison -auto doc ssh://qcar@h/data/d/ssa-doc
diff --git a/lisc/lisc.h b/src/all.h
diff --git a/src/copy.c b/src/copy.c
@@ -0,0 +1,159 @@
+#include "all.h"
+
+typedef struct RList RList;
+struct RList {
+ int t;
+ RList *l;
+};
+
+static Ref
+copyof(Ref r, Ref *cp)
+{
+ if (rtype(r) == RTmp)
+ return cp[r.val];
+ else
+ return r;
+}
+
+static void
+update(Ref r, Ref rcp, Ref *cp, RList **w)
+{
+ RList *l;
+
+ if (!req(cp[r.val], rcp)) {
+ cp[r.val] = rcp;
+ l = emalloc(sizeof *l);
+ l->t = r.val;
+ l->l = *w;
+ *w = l;
+ }
+}
+
+static void
+visitphi(Phi *p, Ref *cp, RList **w)
+{
+ uint a;
+ Ref r, r1;
+
+ r = R;
+ for (a=0; a<p->narg; a++) {
+ r1 = copyof(p->arg[a], cp);
+ if (req(r1, R))
+ continue;
+ if (req(r, R) || req(r, r1))
+ r = r1;
+ else {
+ r = p->to;
+ break;
+ }
+ }
+ assert(!req(r, R));
+ update(p->to, r, cp, w);
+}
+
+static void
+visitins(Ins *i, Ref *cp, RList **w)
+{
+ Ref r;
+
+ if (i->op == OCopy) {
+ r = copyof(i->arg[0], cp);
+ update(i->to, r, cp, w);
+ } else if (!req(i->to, R)) {
+ assert(rtype(i->to) == RTmp);
+ update(i->to, i->to, cp, w);
+ }
+}
+
+void
+copy(Fn *fn)
+{
+ Blk *b;
+ Ref *cp, r;
+ RList *w, *w1;
+ Use *u, *u1;
+ Ins *i;
+ Phi *p, **pp;
+ uint a;
+ int t;
+
+ w = 0;
+ cp = emalloc(fn->ntmp * sizeof cp[0]);
+ for (b=fn->start; b; b=b->link) {
+ for (p=b->phi; p; p=p->link)
+ visitphi(p, cp, &w);
+ for (i=b->ins; i-b->ins < b->nins; i++)
+ visitins(i, cp, &w);
+ }
+ while ((w1=w)) {
+ t = w->t;
+ w = w->l;
+ free(w1);
+ u = fn->tmp[t].use;
+ u1 = u + fn->tmp[t].nuse;
+ for (; u<u1; u++)
+ switch (u->type) {
+ default:
+ diag("copy: invalid use");
+ case UPhi:
+ visitphi(u->u.phi, cp, &w);
+ break;
+ case UIns:
+ visitins(u->u.ins, cp, &w);
+ break;
+ case UJmp:
+ break;
+ }
+ }
+ for (b=fn->start; b; b=b->link) {
+ for (pp=&b->phi; (p=*pp);) {
+ r = cp[p->to.val];
+ if (!req(r, p->to)) {
+ *pp = p->link;
+ continue;
+ }
+ for (a=0; a<p->narg; a++)
+ if (rtype(p->arg[a]) == RTmp) {
+ r = cp[p->arg[a].val];
+ assert(!req(r, R));
+ p->arg[a] = r;
+ }
+ pp=&p->link;
+ }
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ r = cp[i->to.val];
+ if (!req(r, i->to)) {
+ *i = (Ins){.op = ONop};
+ continue;
+ }
+ for (a=0; a<2; a++)
+ if (rtype(i->arg[a]) == RTmp) {
+ r = cp[i->arg[a].val];
+ assert(!req(r, R));
+ i->arg[a] = r;
+ }
+ }
+ if (rtype(b->jmp.arg) == RTmp) {
+ r = cp[b->jmp.arg.val];
+ assert(!req(r, R));
+ b->jmp.arg = r;
+ }
+ }
+ if (debug['C']) {
+ fprintf(stderr, "\n> Copy information:");
+ for (t=Tmp0; t<fn->ntmp; t++) {
+ if (req(cp[t], R)) {
+ fprintf(stderr, "\n%10s not seen!",
+ fn->tmp[t].name);
+ }
+ else if (!req(cp[t], TMP(t))) {
+ fprintf(stderr, "\n%10s copy of ",
+ fn->tmp[t].name);
+ printref(cp[t], fn, stderr);
+ }
+ }
+ fprintf(stderr, "\n\n> After copy elimination:\n");
+ printfn(fn, stderr);
+ }
+ free(cp);
+}
diff --git a/src/emit.c b/src/emit.c
@@ -0,0 +1,666 @@
+#include "all.h"
+
+enum {
+ SLong = 0,
+ SWord = 1,
+ SShort = 2,
+ SByte = 3,
+
+ Ki = -1, /* matches Kw and Kl */
+ Ka = -2, /* matches all classes */
+};
+
+/* Instruction format strings:
+ *
+ * if the format string starts with -, the instruction
+ * is assumed to be 3-address and is put in 2-address
+ * mode using an extra mov if necessary
+ *
+ * if the format string starts with +, the same as the
+ * above applies, but commutativity is also assumed
+ *
+ * %k is used to set the class of the instruction,
+ * it'll expand to "l", "q", "ss", "sd", depending
+ * on the instruction class
+ * %0 designates the first argument
+ * %1 designates the second argument
+ * %= designates the result
+ *
+ * if %k is not used, a prefix to 0, 1, or = must be
+ * added, it can be:
+ * M - memory reference
+ * L - long (64 bits)
+ * W - word (32 bits)
+ * H - short (16 bits)
+ * B - byte (8 bits)
+ * S - single precision float
+ * D - double precision float
+ */
+static struct {
+ short op;
+ short cls;
+ char *asm;
+} omap[] = {
+ { OAdd, Ka, "+add%k %1, %=" },
+ { OSub, Ka, "-sub%k %1, %=" },
+ { OAnd, Ki, "+and%k %1, %=" },
+ { OOr, Ki, "+or%k %1, %=" },
+ { OXor, Ki, "+xor%k %1, %=" },
+ { OSar, Ki, "-sar%k %B1, %=" },
+ { OShr, Ki, "-shr%k %B1, %=" },
+ { OShl, Ki, "-shl%k %B1, %=" },
+ { OMul, Ki, "+imul%k %1, %=" },
+ { OMul, Ks, "+mulss %1, %=" }, /* fixme */
+ { OMul, Kd, "+mulsd %1, %=" },
+ { ODiv, Ka, "-div%k %1, %=" },
+ { OStorel, Ka, "movq %L0, %M1" },
+ { OStorew, Ka, "movl %W0, %M1" },
+ { OStoreh, Ka, "movw %H0, %M1" },
+ { OStoreb, Ka, "movb %B0, %M1" },
+ { OStores, Ka, "movss %S0, %M1" },
+ { OStored, Ka, "movsd %D0, %M1" },
+ { OLoad, Ka, "mov%k %M0, %=" },
+ { OLoadsw, Kl, "movslq %M0, %L=" },
+ { OLoadsw, Kw, "movl %M0, %W=" },
+ { OLoaduw, Ki, "movl %M0, %W=" },
+ { OLoadsh, Ki, "movsw%k %M0, %=" },
+ { OLoaduh, Ki, "movzw%k %M0, %=" },
+ { OLoadsb, Ki, "movsb%k %M0, %=" },
+ { OLoadub, Ki, "movzb%k %M0, %=" },
+ { OExtsw, Kl, "movslq %W0, %L=" },
+ { OExtuw, Kl, "movl %W0, %W=" },
+ { OExtsh, Ki, "movsw%k %H0, %=" },
+ { OExtuh, Ki, "movzw%k %H0, %=" },
+ { OExtsb, Ki, "movsb%k %B0, %=" },
+ { OExtub, Ki, "movzb%k %B0, %=" },
+
+ { OExts, Kd, "cvtss2sd %0, %=" }, /* see if factorization is possible */
+ { OTruncd, Ks, "cvttsd2ss %0, %=" },
+ { OFtosi, Kw, "cvttss2si %0, %=" },
+ { OFtosi, Kl, "cvttsd2si %0, %=" },
+ { OSitof, Ks, "cvtsi2ss %W0, %=" },
+ { OSitof, Kd, "cvtsi2sd %L0, %=" },
+ { OCast, Ki, "movq %D0, %L=" },
+ { OCast, Ka, "movq %L0, %D=" },
+
+ { OAddr, Ki, "lea%k %M0, %=" },
+ { OSwap, Ki, "xchg%k %0, %1" },
+ { OSign, Kl, "cqto" },
+ { OSign, Kw, "cltd" },
+ { OXDiv, Ki, "div%k %0" },
+ { OXIDiv, Ki, "idiv%k %0" },
+ { OXCmp, Ks, "comiss %S0, %S1" }, /* fixme, Kf */
+ { OXCmp, Kd, "comisd %D0, %D1" },
+ { OXCmp, Ki, "cmp%k %0, %1" },
+ { OXTest, Ki, "test%k %0, %1" },
+ { OXSet+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
+ { OXSet+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" },
+ { OXSet+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" },
+ { OXSet+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" },
+ { OXSet+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" },
+ { OXSet+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
+ { OXSet+ICXnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" },
+ { OXSet+ICXp, Ki, "setp %B=\n\tmovsb%k %B=, %=" },
+ { NOp, 0, 0 }
+};
+
+static char *rname[][4] = {
+ [RAX] = {"rax", "eax", "ax", "al"},
+ [RBX] = {"rbx", "ebx", "bx", "bl"},
+ [RCX] = {"rcx", "ecx", "cx", "cl"},
+ [RDX] = {"rdx", "edx", "dx", "dl"},
+ [RSI] = {"rsi", "esi", "si", "sil"},
+ [RDI] = {"rdi", "edi", "di", "dil"},
+ [RBP] = {"rbp", "ebp", "bp", "bpl"},
+ [RSP] = {"rsp", "esp", "sp", "spl"},
+ [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
+ [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
+ [R10] = {"r10", "r10d", "r10w", "r10b"},
+ [R11] = {"r11", "r11d", "r11w", "r11b"},
+ [R12] = {"r12", "r12d", "r12w", "r12b"},
+ [R13] = {"r13", "r13d", "r13w", "r13b"},
+ [R14] = {"r14", "r14d", "r14w", "r14b"},
+ [R15] = {"r15", "r15d", "r15w", "r15b"},
+};
+
+
+static int
+slot(int s, Fn *fn)
+{
+ struct { int i:14; } x;
+
+ /* sign extend s using a bitfield */
+ x.i = s;
+ assert(NAlign == 3);
+ if (x.i < 0)
+ return -4 * x.i;
+ else {
+ assert(fn->slot >= x.i);
+ return -4 * (fn->slot - x.i);
+ }
+}
+
+static void
+emitcon(Con *con, FILE *f)
+{
+ switch (con->type) {
+ default:
+ diag("emit: invalid constant");
+ case CAddr:
+ fputs(con->label, f);
+ if (con->bits.i)
+ fprintf(f, "%+"PRId64, con->bits.i);
+ break;
+ case CBits:
+ fprintf(f, "%"PRId64, con->bits.i);
+ break;
+ }
+}
+
+static char *
+regtoa(int reg, int sz)
+{
+ static char buf[6];
+
+ if (reg >= XMM0) {
+ sprintf(buf, "xmm%d", reg-XMM0);
+ return buf;
+ } else
+ return rname[reg][sz];
+}
+
+static Ref
+getarg(char c, Ins *i)
+{
+ switch (c) {
+ default:
+ diag("emit: 0, 1, = expected in format");
+ case '0':
+ return i->arg[0];
+ case '1':
+ return i->arg[1];
+ case '=':
+ return i->to;
+ }
+}
+
+static void emitins(Ins, Fn *, FILE *);
+
+static void
+emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
+{
+ Ins icp;
+
+ icp.op = OCopy;
+ icp.arg[0] = r2;
+ icp.to = r1;
+ icp.cls = k;
+ emitins(icp, fn, f);
+}
+
+static void
+emitf(char *s, Ins *i, Fn *fn, FILE *f)
+{
+ static char clstoa[][3] = {"l", "q", "ss", "sd"};
+ char c;
+ int sz;
+ Ref ref;
+ Mem *m;
+ Con off;
+
+ switch (*s) {
+ case '+':
+ if (req(i->arg[1], i->to)) {
+ ref = i->arg[0];
+ i->arg[0] = i->arg[1];
+ i->arg[1] = ref;
+ }
+ /* fall through */
+ case '-':
+ if (req(i->arg[1], i->to) && !req(i->arg[0], i->to))
+ diag("emit: cannot convert to 2-address");
+ emitcopy(i->to, i->arg[0], i->cls, fn, f);
+ s++;
+ break;
+ }
+
+ fputc('\t', f);
+Next:
+ while ((c = *s++) != '%')
+ if (!c) {
+ fputc('\n', f);
+ return;
+ } else
+ fputc(c, f);
+ switch ((c = *s++)) {
+ default:
+ diag("emit: invalid escape");
+ case '%':
+ fputc('%', f);
+ break;
+ case 'k':
+ fputs(clstoa[i->cls], f);
+ break;
+ case '0':
+ case '1':
+ case '=':
+ sz = KWIDE(i->cls) ? SLong : SWord;
+ s--;
+ /* fall through */
+ case 'D':
+ case 'S':
+ Ref:
+ c = *s++;
+ ref = getarg(c, i);
+ switch (rtype(ref)) {
+ default:
+ diag("emit: invalid reference");
+ case RTmp:
+ assert(isreg(ref));
+ fprintf(f, "%%%s", regtoa(ref.val, sz));
+ break;
+ case RSlot:
+ fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
+ break;
+ case RAMem:
+ Mem:
+ m = &fn->mem[ref.val & AMask];
+ if (rtype(m->base) == RSlot) {
+ off.type = CBits;
+ off.bits.i = slot(m->base.val, fn);
+ addcon(&m->offset, &off);
+ m->base = TMP(RBP);
+ }
+ if (m->offset.type != CUndef)
+ emitcon(&m->offset, f);
+ if (req(m->base, R) && req(m->index, R))
+ break;
+ fputc('(', f);
+ if (!req(m->base, R))
+ fprintf(f, "%%%s", regtoa(m->base.val, SLong));
+ if (!req(m->index, R))
+ fprintf(f, ", %%%s, %d",
+ regtoa(m->index.val, SLong),
+ m->scale
+ );
+ fputc(')', f);
+ break;
+ case RCon:
+ fputc('$', f);
+ emitcon(&fn->con[ref.val], f);
+ break;
+ }
+ break;
+ case 'L':
+ sz = SLong;
+ goto Ref;
+ case 'W':
+ sz = SWord;
+ goto Ref;
+ case 'H':
+ sz = SShort;
+ goto Ref;
+ case 'B':
+ sz = SByte;
+ goto Ref;
+ case 'M':
+ c = *s++;
+ ref = getarg(c, i);
+ switch (rtype(ref)) {
+ default:
+ diag("emit: invalid memory reference");
+ case RAMem:
+ goto Mem;
+ case RSlot:
+ fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
+ break;
+ case RCon:
+ emitcon(&fn->con[ref.val], f);
+ fprintf(f, "(%%rip)");
+ break;
+ case RTmp:
+ assert(isreg(ref));
+ fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
+ break;
+ }
+ break;
+ }
+ goto Next;
+}
+
+static void
+emitins(Ins i, Fn *fn, FILE *f)
+{
+ Ref r;
+ int64_t val;
+ int o;
+
+ switch (i.op) {
+ default:
+ Table:
+ /* most instructions are just pulled out of
+ * the table omap[], some special cases are
+ * detailed below */
+ for (o=0;; o++) {
+ /* this linear search should really be a binary
+ * search */
+ if (omap[o].op == NOp)
+ diag("emit: no entry found for instruction");
+ if (omap[o].op == i.op)
+ if (omap[o].cls == i.cls
+ || (omap[o].cls == Ki && KBASE(i.cls) == 0)
+ || (omap[o].cls == Ka))
+ break;
+ }
+ emitf(omap[o].asm, &i, fn, f);
+ break;
+ case ONop:
+ /* just do nothing for nops, they are inserted
+ * by some passes */
+ break;
+ case OMul:
+ /* here, we try to use the 3-addresss form
+ * of multiplication when possible */
+ if (rtype(i.arg[1]) == RCon) {
+ r = i.arg[0];
+ i.arg[0] = i.arg[1];
+ i.arg[1] = r;
+ }
+ if (KBASE(i.cls) == 0 /* only available for ints */
+ && rtype(i.arg[0]) == RCon
+ && rtype(i.arg[1]) == RTmp) {
+ emitf("imul%k %0, %1, %=", &i, fn, f);
+ break;
+ }
+ goto Table;
+ case OSub:
+ /* we have to use the negation trick to handle
+ * some 3-address substractions */
+ if (req(i.to, i.arg[1])) {
+ emitf("neg%k %=", &i, fn, f);
+ emitf("add%k %0, %=", &i, fn, f);
+ break;
+ }
+ goto Table;
+ case OCopy:
+ /* make sure we don't emit useless copies,
+ * also, we can use a trick to load 64-bits
+ * registers, it's detailed in my note below
+ * http://c9x.me/art/notes.html?09/19/2015 */
+ if (req(i.to, R) || req(i.arg[0], R))
+ break;
+ if (isreg(i.to)
+ && rtype(i.arg[0]) == RCon
+ && i.cls == Kl
+ && fn->con[i.arg[0].val].type == CBits
+ && (val = fn->con[i.arg[0].val].bits.i) >= 0
+ && val <= UINT32_MAX) {
+ emitf("movl %W0, %W=", &i, fn, f);
+ } else if (!req(i.arg[0], i.to))
+ emitf("mov%k %0, %=", &i, fn, f);
+ break;
+ case OCall:
+ /* calls simply have a weird syntax in AT&T
+ * assembly... */
+ switch (rtype(i.arg[0])) {
+ default:
+ diag("emit: invalid call instruction");
+ case RCon:
+ fprintf(f, "\tcallq ");
+ emitcon(&fn->con[i.arg[0].val], f);
+ fprintf(f, "\n");
+ break;
+ case RTmp:
+ emitf("callq *%L0", &i, fn, f);
+ break;
+ }
+ break;
+ case OSAlloc:
+ /* there is no good reason why this is here
+ * maybe we should split OSAlloc in 2 different
+ * instructions depending on the result
+ */
+ emitf("subq %L0, %%rsp", &i, fn, f);
+ if (!req(i.to, R))
+ emitcopy(i.to, TMP(RSP), Kl, fn, f);
+ break;
+ case OSwap:
+ if (KBASE(i.cls) == 0)
+ goto Table;
+ /* for floats, there is no swap instruction
+ * so we use xmm15 as a temporary
+ */
+ emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
+ emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
+ emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
+ break;
+ }
+}
+
+static int
+cneg(int cmp)
+{
+ switch (cmp) {
+ default: diag("emit: cneg() unhandled comparison");
+ case ICule: return ICugt;
+ case ICult: return ICuge;
+ case ICsle: return ICsgt;
+ case ICslt: return ICsge;
+ case ICsgt: return ICsle;
+ case ICsge: return ICslt;
+ case ICugt: return ICule;
+ case ICuge: return ICult;
+ case ICeq: return ICne;
+ case ICne: return ICeq;
+ case ICXnp: return ICXp;
+ case ICXp: return ICXnp;
+ }
+}
+
+static int
+framesz(Fn *fn)
+{
+ int i, o, f;
+
+ assert(NAlign == 3);
+ for (i=0, o=0; i<NRClob; i++)
+ o ^= 1 & (fn->reg >> rclob[i]);
+ f = fn->slot;
+ f = (f + 3) & -4;
+ return 4*f + 8*o;
+}
+
+void
+emitfn(Fn *fn, FILE *f)
+{
+ static char *ctoa[] = {
+ [ICeq] = "z",
+ [ICule] = "be",
+ [ICult] = "b",
+ [ICsle] = "le",
+ [ICslt] = "l",
+ [ICsgt] = "g",
+ [ICsge] = "ge",
+ [ICugt] = "a",
+ [ICuge] = "ae",
+ [ICne] = "nz",
+ [ICXnp] = "np",
+ [ICXp] = "p"
+ };
+ Blk *b, *s;
+ Ins *i, itmp;
+ int *r, c, fs;
+
+ fprintf(f,
+ ".text\n"
+ ".globl %s\n"
+ ".type %s, @function\n"
+ "%s:\n"
+ "\tpush %%rbp\n"
+ "\tmov %%rsp, %%rbp\n",
+ fn->name, fn->name, fn->name
+ );
+ fs = framesz(fn);
+ if (fs)
+ fprintf(f, "\tsub $%d, %%rsp\n", fs);
+ for (r=rclob; r-rclob < NRClob; r++)
+ if (fn->reg & BIT(*r)) {
+ itmp.arg[0] = TMP(*r);
+ emitf("pushq %L0", &itmp, fn, f);
+ }
+
+ for (b=fn->start; b; b=b->link) {
+ fprintf(f, ".L%s:\n", b->name);
+ for (i=b->ins; i!=&b->ins[b->nins]; i++)
+ emitins(*i, fn, f);
+ switch (b->jmp.type) {
+ case JRet0:
+ for (r=&rclob[NRClob]; r>rclob;)
+ if (fn->reg & BIT(*--r)) {
+ itmp.arg[0] = TMP(*r);
+ emitf("popq %L0", &itmp, fn, f);
+ }
+ fprintf(f,
+ "\tleave\n"
+ "\tret\n"
+ );
+ break;
+ case JJmp:
+ if (b->s1 != b->link)
+ fprintf(f, "\tjmp .L%s\n", b->s1->name);
+ break;
+ default:
+ c = b->jmp.type - JXJc;
+ if (0 <= c && c <= NXICmp) {
+ if (b->link == b->s2) {
+ s = b->s1;
+ } else if (b->link == b->s1) {
+ c = cneg(c);
+ s = b->s2;
+ } else
+ diag("emit: unhandled jump (1)");
+ fprintf(f, "\tj%s .L%s\n", ctoa[c], s->name);
+ break;
+ }
+ diag("emit: unhandled jump (2)");
+ }
+ }
+
+}
+
+void
+emitdat(Dat *d, FILE *f)
+{
+ static int align;
+ static char *dtoa[] = {
+ [DAlign] = ".align",
+ [DB] = "\t.byte",
+ [DH] = "\t.value",
+ [DW] = "\t.long",
+ [DL] = "\t.quad"
+ };
+
+ switch (d->type) {
+ case DStart:
+ align = 0;
+ fprintf(f, ".data\n");
+ break;
+ case DEnd:
+ break;
+ case DName:
+ if (!align)
+ fprintf(f, ".align 8\n");
+ fprintf(f,
+ ".globl %s\n"
+ ".type %s, @object\n"
+ "%s:\n",
+ d->u.str, d->u.str, d->u.str
+ );
+ break;
+ case DZ:
+ fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
+ break;
+ default:
+ if (d->type == DAlign)
+ align = 1;
+
+ if (d->isstr) {
+ if (d->type != DB)
+ err("strings only supported for 'b' currently");
+ fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
+ }
+ else if (d->isref) {
+ fprintf(f, "%s %s%+"PRId64"\n",
+ dtoa[d->type], d->u.ref.nam,
+ d->u.ref.off);
+ }
+ else {
+ fprintf(f, "%s %"PRId64"\n",
+ dtoa[d->type], d->u.num);
+ }
+ break;
+ }
+}
+
+typedef struct FBits FBits;
+
+struct FBits {
+ int64_t bits;
+ int wide;
+ FBits *link;
+};
+
+static FBits *stash;
+
+int
+stashfp(int64_t n, int w)
+{
+ FBits **pb, *b;
+ int i;
+
+ /* does a dumb de-dup of fp constants
+ * this should be the linker's job */
+ for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
+ if (n == b->bits && w == b->wide)
+ return i;
+ b = emalloc(sizeof *b);
+ b->bits = n;
+ b->wide = w;
+ b->link = 0;
+ *pb = b;
+ return i;
+}
+
+void
+emitfin(FILE *f)
+{
+ FBits *b;
+ int i;
+
+ if (!stash)
+ return;
+ fprintf(f, "/* floating point constants */\n");
+ fprintf(f, ".data\n.align 8\n");
+ for (b=stash, i=0; b; b=b->link, i++)
+ if (b->wide)
+ fprintf(f,
+ ".Lfp%d:\n"
+ "\t.quad %"PRId64
+ " /* %f */\n",
+ i, b->bits,
+ *(double *)&b->bits
+ );
+ for (b=stash, i=0; b; b=b->link, i++)
+ if (!b->wide)
+ fprintf(f,
+ ".Lfp%d:\n"
+ "\t.long %"PRId64
+ " /* %lf */\n",
+ i, b->bits & 0xffffffff,
+ *(float *)&b->bits
+ );
+ while ((b=stash)) {
+ stash = b->link;
+ free(b);
+ }
+}
diff --git a/src/isel.c b/src/isel.c
@@ -0,0 +1,1135 @@
+#include "all.h"
+#include <limits.h>
+
+/* For x86_64, do the following:
+ *
+ * - lower calls
+ * - check that constants are used only in
+ * places allowed
+ * - ensure immediates always fit in 32b
+ * - explicit machine register contraints
+ * on instructions like division.
+ * - implement fast locals (the streak of
+ * constant allocX in the first basic block)
+ * - recognize complex addressing modes
+ *
+ * Invariant: the use counts that are used
+ * in sel() must be sound. This
+ * is not so trivial, maybe the
+ * dce should be moved out...
+ */
+
+typedef struct ANum ANum;
+typedef struct AClass AClass;
+typedef struct RAlloc RAlloc;
+
+struct ANum {
+ char n, l, r;
+ Ins *i;
+ Ref mem;
+};
+
+static void amatch(Addr *, Ref, ANum *, Fn *, int);
+
+static int
+fcmptoi(int fc)
+{
+ switch (fc) {
+ default: diag("isel: fcmptoi defaulted");
+ case FCle: return ICule;
+ case FClt: return ICult;
+ case FCgt: return ICugt;
+ case FCge: return ICuge;
+ case FCne: return ICne;
+ case FCeq: return ICeq;
+ case FCo: return ICXnp;
+ case FCuo: return ICXp;
+ }
+}
+
+static int
+iscmp(int op, int *pk, int *pc)
+{
+ int k, c;
+
+ if (OCmpw <= op && op <= OCmpw1) {
+ c = op - OCmpw;
+ k = Kw;
+ }
+ else if (OCmpl <= op && op <= OCmpl1) {
+ c = op - OCmpl;
+ k = Kl;
+ }
+ else if (OCmps <= op && op <= OCmps1) {
+ c = fcmptoi(op - OCmps);
+ k = Ks;
+ }
+ else if (OCmpd <= op && op <= OCmpd1) {
+ c = fcmptoi(op - OCmpd);
+ k = Kd;
+ }
+ else
+ return 0;
+ if (pk)
+ *pk = k;
+ if (pc)
+ *pc = c;
+ return 1;
+}
+
+static int
+noimm(Ref r, Fn *fn)
+{
+ int64_t val;
+
+ if (rtype(r) != RCon)
+ return 0;
+ switch (fn->con[r.val].type) {
+ default:
+ diag("isel: invalid constant");
+ case CAddr:
+ /* we only support the 'small'
+ * code model of the ABI, this
+ * means that we can always
+ * address data with 32bits
+ */
+ return 0;
+ case CBits:
+ val = fn->con[r.val].bits.i;
+ return (val < INT32_MIN || val > INT32_MAX);
+ }
+}
+
+static int
+rslot(Ref r, Fn *fn)
+{
+ if (rtype(r) != RTmp)
+ return -1;
+ return fn->tmp[r.val].slot;
+}
+
+static int
+argcls(Ins *i, int n)
+{
+ return opdesc[i->op].argcls[n][i->cls];
+}
+
+static void
+fixarg(Ref *r, int k, int phi, Fn *fn)
+{
+ Addr a;
+ Ref r0, r1;
+ int s, n;
+
+ r1 = r0 = *r;
+ s = rslot(r0, fn);
+ if (KBASE(k) == 1 && rtype(r0) == RCon) {
+ /* load floating points from memory
+ * slots, they can't be used as
+ * immediates
+ */
+ r1 = MEM(fn->nmem);
+ vgrow(&fn->mem, ++fn->nmem);
+ memset(&a, 0, sizeof a);
+ a.offset.type = CAddr;
+ n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
+ sprintf(a.offset.label, ".Lfp%d", n);
+ fn->mem[fn->nmem-1] = a;
+ }
+ else if (!phi && k == Kl && noimm(r0, fn)) {
+ /* load constants that do not fit in
+ * a 32bit signed integer into a
+ * long temporary
+ */
+ r1 = newtmp("isel", Kl, fn);
+ emit(OCopy, Kl, r1, r0, R);
+ }
+ else if (s != -1) {
+ /* load fast locals' addresses into
+ * temporaries right before the
+ * instruction
+ */
+ r1 = newtmp("isel", Kl, fn);
+ emit(OAddr, Kl, r1, SLOT(s), R);
+ }
+ *r = r1;
+}
+
+static void
+chuse(Ref r, int du, Fn *fn)
+{
+ if (rtype(r) == RTmp)
+ fn->tmp[r.val].nuse += du;
+}
+
+static void
+seladdr(Ref *r, ANum *an, Fn *fn)
+{
+ Addr a;
+ Ref r0, r1;
+
+ r0 = *r;
+ if (rtype(r0) == RTmp) {
+ chuse(r0, -1, fn);
+ r1 = an[r0.val].mem;
+ if (req(r1, R)) {
+ amatch(&a, r0, an, fn, 1);
+ vgrow(&fn->mem, ++fn->nmem);
+ fn->mem[fn->nmem-1] = a;
+ r1 = MEM(fn->nmem-1);
+ chuse(a.base, +1, fn);
+ chuse(a.index, +1, fn);
+ if (rtype(a.base) != RTmp)
+ if (rtype(a.index) != RTmp)
+ an[r0.val].mem = r1;
+ }
+ *r = r1;
+ }
+}
+
+static void
+selcmp(Ref arg[2], int k, Fn *fn)
+{
+ Ref r;
+
+ if (rtype(arg[0]) == RCon) {
+ r = arg[1];
+ arg[1] = arg[0];
+ arg[0] = r;
+ }
+ assert(rtype(arg[0]) != RCon);
+ emit(OXCmp, k, R, arg[1], arg[0]);
+ fixarg(&curi->arg[0], k, 0, fn);
+}
+
+static void
+sel(Ins i, ANum *an, Fn *fn)
+{
+ Ref r0, r1;
+ int x, k, kc;
+ int64_t val;
+ Ins *i0;
+
+ if (rtype(i.to) == RTmp)
+ if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
+ if (fn->tmp[i.to.val].nuse == 0) {
+ chuse(i.arg[0], -1, fn);
+ chuse(i.arg[1], -1, fn);
+ return;
+ }
+ i0 = curi;
+ k = i.cls;
+ switch (i.op) {
+ case ODiv:
+ case ORem:
+ case OUDiv:
+ case OURem:
+ if (i.op == ODiv || i.op == OUDiv)
+ r0 = TMP(RAX), r1 = TMP(RDX);
+ else
+ r0 = TMP(RDX), r1 = TMP(RAX);
+ emit(OCopy, k, i.to, r0, R);
+ emit(OCopy, k, R, r1, R);
+ if (rtype(i.arg[1]) == RCon) {
+ /* immediates not allowed for
+ * divisions in x86
+ */
+ r0 = newtmp("isel", k, fn);
+ } else
+ r0 = i.arg[1];
+ if (i.op == ODiv || i.op == ORem) {
+ emit(OXIDiv, k, R, r0, R);
+ emit(OSign, k, TMP(RDX), TMP(RAX), R);
+ } else {
+ emit(OXDiv, k, R, r0, R);
+ emit(OCopy, k, TMP(RDX), CON_Z, R);
+ }
+ emit(OCopy, k, TMP(RAX), i.arg[0], R);
+ if (rtype(i.arg[1]) == RCon)
+ emit(OCopy, k, r0, i.arg[1], R);
+ break;
+ case OSar:
+ case OShr:
+ case OShl:
+ if (rtype(i.arg[1]) == RCon)
+ goto Emit;
+ r0 = i.arg[1];
+ i.arg[1] = TMP(RCX);
+ emit(OCopy, Kw, R, TMP(RCX), R);
+ emiti(i);
+ emit(OCopy, Kw, TMP(RCX), r0, R);
+ break;
+ case ONop:
+ break;
+ case OStored:
+ case OStores:
+ case OStorel:
+ case OStorew:
+ case OStoreh:
+ case OStoreb:
+ if (rtype(i.arg[0]) == RCon) {
+ if (i.op == OStored)
+ i.op = OStorel;
+ if (i.op == OStores)
+ i.op = OStorew;
+ }
+ seladdr(&i.arg[1], an, fn);
+ goto Emit;
+ case_OLoad:
+ seladdr(&i.arg[0], an, fn);
+ goto Emit;
+ case OCall:
+ case OSAlloc:
+ case OCopy:
+ case OAdd:
+ case OSub:
+ case OMul:
+ case OAnd:
+ case OOr:
+ case OXor:
+ case OXTest:
+ case OFtosi:
+ case OSitof:
+ case OExts:
+ case OTruncd:
+ case OCast:
+ case_OExt:
+Emit:
+ emiti(i);
+ fixarg(&curi->arg[0], argcls(curi, 0), 0, fn);
+ fixarg(&curi->arg[1], argcls(curi, 1), 0, fn);
+ break;
+ case OAlloc:
+ case OAlloc+1:
+ case OAlloc+2: /* == OAlloc1 */
+ /* we need to make sure
+ * the stack remains aligned
+ * (rsp = 0) mod 16
+ */
+ if (rtype(i.arg[0]) == RCon) {
+ assert(fn->con[i.arg[0].val].type == CBits);
+ val = fn->con[i.arg[0].val].bits.i;
+ val = (val + 15) & ~INT64_C(15);
+ if (val < 0 || val > INT32_MAX)
+ diag("isel: alloc too large");
+ emit(OSAlloc, Kl, i.to, getcon(val, fn), R);
+ } else {
+ /* r0 = (i.arg[0] + 15) & -16 */
+ r0 = newtmp("isel", Kl, fn);
+ r1 = newtmp("isel", Kl, fn);
+ emit(OSAlloc, Kl, i.to, r0, R);
+ emit(OAnd, Kl, r0, r1, getcon(-16, fn));
+ emit(OAdd, Kl, r1, i.arg[0], getcon(15, fn));
+ }
+ break;
+ default:
+ if (isext(i.op))
+ goto case_OExt;
+ if (isload(i.op))
+ goto case_OLoad;
+ if (iscmp(i.op, &kc, &x)) {
+ if (rtype(i.arg[0]) == RCon)
+ x = icmpop(x);
+ emit(OXSet+x, k, i.to, R, R);
+ selcmp(i.arg, kc, fn);
+ break;
+ }
+ diag("isel: non-exhaustive implementation");
+ }
+
+ while (i0 > curi && --i0)
+ if (rslot(i0->arg[0], fn) != -1
+ || rslot(i0->arg[1], fn) != -1)
+ diag("isel: usupported address argument");
+}
+
+static Ins *
+flagi(Ins *i0, Ins *i)
+{
+ while (i>i0) {
+ i--;
+ if (opdesc[i->op].sflag)
+ return i;
+ if (opdesc[i->op].lflag)
+ continue;
+ return 0;
+ }
+ return 0;
+}
+
+struct AClass {
+ int inmem;
+ int align;
+ uint size;
+ int cls[2];
+};
+
+static void
+aclass(AClass *a, Typ *t)
+{
+ int e, s, n, cls;
+ uint sz, al;
+
+ sz = t->size;
+ al = 1u << t->align;
+
+ /* the ABI requires sizes to be rounded
+ * up to the nearest multiple of 8, moreover
+ * it makes it easy load and store structures
+ * in registers
+ */
+ if (al < 8)
+ al = 8;
+ sz = (sz + al-1) & -al;
+
+ a->size = sz;
+ a->align = t->align;
+
+ if (t->dark || sz > 16) {
+ /* large or unaligned structures are
+ * required to be passed in memory
+ */
+ a->inmem = 1;
+ return;
+ }
+
+ a->inmem = 0;
+ for (e=0, s=0; e<2; e++) {
+ cls = -1;
+ for (n=0; n<8 && t->seg[s].len; s++) {
+ if (t->seg[s].ispad) {
+ /* don't change anything */
+ }
+ else if (t->seg[s].isflt) {
+ if (cls == -1)
+ cls = Kd;
+ }
+ else
+ cls = Kl;
+ n += t->seg[s].len;
+ }
+ assert(n <= 8);
+ a->cls[e] = cls;
+ }
+}
+
+static void
+blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
+{
+ Ref r, r1;
+ uint boff;
+
+ /* it's an impolite blit, we might go across the end
+ * of the source object a little bit... */
+ for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
+ r = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(OStorel, 0, R, r, r1);
+ emit(OAdd, Kl, r1, rstk, getcon(soff, fn));
+ r1 = newtmp("abi", Kl, fn);
+ emit(OLoad, Kl, r, r1, R);
+ emit(OAdd, Kl, r1, rsrc, getcon(boff, fn));
+ chuse(rsrc, +1, fn);
+ chuse(rstk, +1, fn);
+ }
+}
+
+static int
+retr(Ref reg[2], AClass *aret)
+{
+ static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
+ int n, k, ca, nr[2];
+
+ nr[0] = nr[1] = 0;
+ ca = 0;
+ for (n=0; aret->cls[n]>=0 && n<2; n++) {
+ k = KBASE(aret->cls[n]);
+ reg[n] = TMP(retreg[k][nr[k]++]);
+ ca += 1 << (2 * k);
+ }
+ return ca;
+}
+
+static void
+selret(Blk *b, Fn *fn)
+{
+ int j, k, ca;
+ Ref r, r0, reg[2];
+ AClass aret;
+
+ j = b->jmp.type;
+
+ if (!isret(j) || j == JRet0)
+ return;
+
+ r0 = b->jmp.arg;
+ b->jmp.type = JRet0;
+
+ if (j == JRetc) {
+ aclass(&aret, &typ[fn->retty]);
+ if (aret.inmem) {
+ assert(rtype(fn->retr) == RTmp);
+ emit(OCopy, Kl, TMP(RAX), fn->retr, R);
+ chuse(fn->retr, +1, fn);
+ blit(fn->retr, 0, r0, aret.size, fn);
+ ca = 1;
+ } else {
+ ca = retr(reg, &aret);
+ if (aret.size > 8) {
+ r = newtmp("abi", Kl, fn);
+ emit(OLoad, Kl, reg[1], r, R);
+ emit(OAdd, Kl, r, r0, getcon(8, fn));
+ chuse(r0, +1, fn);
+ }
+ emit(OLoad, Kl, reg[0], r0, R);
+ }
+ } else {
+ k = j - JRetw;
+ if (KBASE(k) == 0) {
+ emit(OCopy, k, TMP(RAX), r0, R);
+ ca = 1;
+ } else {
+ emit(OCopy, k, TMP(XMM0), r0, R);
+ ca = 1 << 2;
+ }
+ }
+
+ b->jmp.arg = CALL(ca);
+}
+
+static void
+seljmp(Blk *b, Fn *fn)
+{
+ Ref r;
+ int c, k;
+ Ins *fi;
+
+ if (b->jmp.type == JRet0 || b->jmp.type == JJmp)
+ return;
+ assert(b->jmp.type == JJnz);
+ r = b->jmp.arg;
+ b->jmp.arg = R;
+ assert(!req(r, R));
+ if (rtype(r) == RCon) {
+ b->jmp.type = JJmp;
+ if (req(r, CON_Z))
+ b->s1 = b->s2;
+ b->s2 = 0;
+ return;
+ }
+ fi = flagi(b->ins, &b->ins[b->nins]);
+ if (fi && req(fi->to, r)) {
+ if (iscmp(fi->op, &k, &c)) {
+ if (rtype(fi->arg[0]) == RCon)
+ c = icmpop(c);
+ b->jmp.type = JXJc + c;
+ if (fn->tmp[r.val].nuse == 1) {
+ assert(fn->tmp[r.val].ndef == 1);
+ selcmp(fi->arg, k, fn);
+ *fi = (Ins){.op = ONop};
+ }
+ return;
+ }
+ if (fi->op == OAnd && fn->tmp[r.val].nuse == 1
+ && (rtype(fi->arg[0]) == RTmp ||
+ rtype(fi->arg[1]) == RTmp)) {
+ fi->op = OXTest;
+ fi->to = R;
+ b->jmp.type = JXJc + ICne;
+ if (rtype(fi->arg[1]) == RCon) {
+ r = fi->arg[1];
+ fi->arg[1] = fi->arg[0];
+ fi->arg[0] = r;
+ }
+ return;
+ }
+ /* since flags are not tracked in liveness,
+ * the result of the flag-setting instruction
+ * has to be marked as live
+ */
+ if (fn->tmp[r.val].nuse == 1)
+ emit(OCopy, Kw, R, r, R);
+ b->jmp.type = JXJc + ICne;
+ return;
+ }
+ selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */
+ b->jmp.type = JXJc + ICne;
+}
+
+static int
+classify(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret)
+{
+ int nint, ni, nsse, ns, n, *pn;
+ AClass *a;
+ Ins *i;
+
+ if (aret && aret->inmem)
+ nint = 5; /* hidden argument */
+ else
+ nint = 6;
+ nsse = 8;
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (i->op == op) {
+ if (KBASE(i->cls) == 0)
+ pn = &nint;
+ else
+ pn = &nsse;
+ if (*pn > 0) {
+ --*pn;
+ a->inmem = 0;
+ } else
+ a->inmem = 2;
+ a->align = 3;
+ a->size = 8;
+ a->cls[0] = i->cls;
+ } else {
+ n = i->arg[0].val & AMask;
+ aclass(a, &typ[n]);
+ if (a->inmem)
+ continue;
+ ni = ns = 0;
+ for (n=0; n<2; n++)
+ if (KBASE(a->cls[n]) == 0)
+ ni++;
+ else
+ ns++;
+ if (nint >= ni && nsse >= ns) {
+ nint -= ni;
+ nsse -= ns;
+ } else
+ a->inmem = 1;
+ }
+ }
+
+ return ((6-nint) << 4) | ((8-nsse) << 8);
+}
+
+int rsave[] = {
+ RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
+};
+int rclob[] = {RBX, R12, R13, R14, R15};
+
+MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
+MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
+
+bits
+retregs(Ref r, int p[2])
+{
+ bits b;
+ int ni, nf;
+
+ assert(rtype(r) == RACall);
+ b = 0;
+ ni = r.val & 3;
+ nf = (r.val >> 2) & 3;
+ if (ni >= 1)
+ b |= BIT(RAX);
+ if (ni >= 2)
+ b |= BIT(RDX);
+ if (nf >= 1)
+ b |= BIT(XMM0);
+ if (nf >= 2)
+ b |= BIT(XMM1);
+ if (p) {
+ p[0] = ni;
+ p[1] = nf;
+ }
+ return b;
+}
+
+bits
+argregs(Ref r, int p[2])
+{
+ bits b;
+ int j, ni, nf;
+
+ assert(rtype(r) == RACall);
+ b = 0;
+ ni = (r.val >> 4) & 15;
+ nf = (r.val >> 8) & 15;
+ for (j=0; j<ni; j++)
+ b |= BIT(rsave[j]);
+ for (j=0; j<nf; j++)
+ b |= BIT(XMM0+j);
+ if (p) {
+ p[0] = ni + 1;
+ p[1] = nf;
+ }
+ return b | BIT(RAX);
+}
+
+static Ref
+rarg(int ty, int *ni, int *ns)
+{
+ if (KBASE(ty) == 0)
+ return TMP(rsave[(*ni)++]);
+ else
+ return TMP(XMM0 + (*ns)++);
+}
+
+struct RAlloc {
+ Ins i;
+ RAlloc *link;
+};
+
+static void
+selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
+{
+ Ins *i;
+ AClass *ac, *a, aret;
+ int ca, ni, ns;
+ uint stk, off;
+ Ref r, r1, r2, reg[2], regcp[2];
+ RAlloc *ra;
+
+ ac = alloc((i1-i0) * sizeof ac[0]);
+ if (!req(i1->arg[1], R)) {
+ assert(rtype(i1->arg[1]) == RAType);
+ aclass(&aret, &typ[i1->arg[1].val & AMask]);
+ ca = classify(i0, i1, ac, OArg, &aret);
+ } else
+ ca = classify(i0, i1, ac, OArg, 0);
+
+ for (stk=0, a=&ac[i1-i0]; a>ac;)
+ if ((--a)->inmem) {
+ assert(a->align <= 4);
+ stk += a->size;
+ if (a->align == 4)
+ stk += stk & 15;
+ }
+ stk += stk & 15;
+ if (stk) {
+ r = getcon(-(int64_t)stk, fn);
+ emit(OSAlloc, Kl, R, r, R);
+ }
+
+ if (!req(i1->arg[1], R)) {
+ if (aret.inmem) {
+ /* get the return location from eax
+ * it saves one callee-save reg */
+ r1 = newtmp("abi", Kl, fn);
+ emit(OCopy, Kl, i1->to, TMP(RAX), R);
+ ca += 1;
+ } else {
+ if (aret.size > 8) {
+ r = newtmp("abi", Kl, fn);
+ regcp[1] = newtmp("abi", aret.cls[1], fn);
+ emit(OStorel, 0, R, regcp[1], r);
+ emit(OAdd, Kl, r, i1->to, getcon(8, fn));
+ chuse(i1->to, +1, fn);
+ ca += 1 << (2 * KBASE(aret.cls[1]));
+ }
+ regcp[0] = newtmp("abi", aret.cls[0], fn);
+ emit(OStorel, 0, R, regcp[0], i1->to);
+ ca += 1 << (2 * KBASE(aret.cls[0]));
+ retr(reg, &aret);
+ if (aret.size > 8)
+ emit(OCopy, aret.cls[1], regcp[1], reg[1], R);
+ emit(OCopy, aret.cls[0], regcp[0], reg[0], R);
+ r1 = i1->to;
+ }
+ /* allocate return pad */
+ ra = alloc(sizeof *ra);
+ assert(NAlign == 3);
+ aret.align -= 2;
+ if (aret.align < 0)
+ aret.align = 0;
+ ra->i.op = OAlloc + aret.align;
+ ra->i.cls = Kl;
+ ra->i.to = r1;
+ ra->i.arg[0] = getcon(aret.size, fn);
+ ra->link = (*rap);
+ *rap = ra;
+ } else {
+ ra = 0;
+ if (KBASE(i1->cls) == 0) {
+ emit(OCopy, i1->cls, i1->to, TMP(RAX), R);
+ ca += 1;
+ } else {
+ emit(OCopy, i1->cls, i1->to, TMP(XMM0), R);
+ ca += 1 << 2;
+ }
+ }
+ emit(OCall, i1->cls, R, i1->arg[0], CALL(ca));
+ emit(OCopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
+
+ ni = ns = 0;
+ if (ra && aret.inmem)
+ emit(OCopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (a->inmem)
+ continue;
+ r1 = rarg(a->cls[0], &ni, &ns);
+ if (i->op == OArgc) {
+ if (a->size > 8) {
+ r2 = rarg(a->cls[1], &ni, &ns);
+ r = newtmp("abi", Kl, fn);
+ emit(OLoad, a->cls[1], r2, r, R);
+ emit(OAdd, Kl, r, i->arg[1], getcon(8, fn));
+ chuse(i->arg[1], +1, fn);
+ }
+ emit(OLoad, a->cls[0], r1, i->arg[1], R);
+ } else
+ emit(OCopy, i->cls, r1, i->arg[0], R);
+ }
+
+ if (!stk)
+ return;
+
+ r = newtmp("abi", Kl, fn);
+ chuse(r, -1, fn);
+ for (i=i0, a=ac, off=0; i<i1; i++, a++) {
+ if (!a->inmem)
+ continue;
+ if (i->op == OArgc) {
+ if (a->align == 4)
+ off += off & 15;
+ blit(r, off, i->arg[1], a->size, fn);
+ } else {
+ r1 = newtmp("abi", Kl, fn);
+ emit(OStorel, 0, R, i->arg[0], r1);
+ emit(OAdd, Kl, r1, r, getcon(off, fn));
+ chuse(r, +1, fn);
+ }
+ off += a->size;
+ }
+ emit(OSAlloc, Kl, r, getcon(stk, fn), R);
+}
+
+static void
+selpar(Fn *fn, Ins *i0, Ins *i1)
+{
+ AClass *ac, *a, aret;
+ Ins *i;
+ int ni, ns, s, al;
+ Ref r, r1;
+
+ ac = alloc((i1-i0) * sizeof ac[0]);
+ curi = insb;
+ ni = ns = 0;
+
+ if (fn->retty >= 0) {
+ aclass(&aret, &typ[fn->retty]);
+ if (aret.inmem) {
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OCopy, r, {rarg(Kl, &ni, &ns)}, Kl};
+ fn->retr = r;
+ }
+ classify(i0, i1, ac, OPar, &aret);
+ } else
+ classify(i0, i1, ac, OPar, 0);
+
+ assert(NAlign == 3);
+
+ s = 4;
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ switch (a->inmem) {
+ case 1:
+ assert(a->align <= 4);
+ if (a->align == 4)
+ s = (s+3) & -4;
+ fn->tmp[i->to.val].slot = -s; /* HACK! */
+ s += a->size / 4;
+ continue;
+ case 2:
+ *curi++ = (Ins){OLoad, i->to, {SLOT(-s)}, i->cls};
+ s += 2;
+ continue;
+ }
+ r1 = rarg(a->cls[0], &ni, &ns);
+ if (i->op == OParc) {
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OCopy, r, {r1}, Kl};
+ a->cls[0] = r.val;
+ if (a->size > 8) {
+ r1 = rarg(a->cls[1], &ni, &ns);
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OCopy, r, {r1}, Kl};
+ a->cls[1] = r.val;
+ }
+ } else
+ *curi++ = (Ins){OCopy, i->to, {r1}, i->cls};
+ }
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (i->op != OParc || a->inmem)
+ continue;
+ assert(NAlign == 3);
+ for (al=0; a->align >> (al+2); al++)
+ ;
+ r = TMP(a->cls[0]);
+ r1 = i->to;
+ *curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl};
+ *curi++ = (Ins){OStorel, R, {r, r1}, 0};
+ if (a->size > 8) {
+ r = newtmp("abi", Kl, fn);
+ *curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl};
+ r1 = TMP(a->cls[1]);
+ *curi++ = (Ins){OStorel, R, {r1, r}, 0};
+ }
+ }
+}
+
+static int
+aref(Ref r, ANum *ai)
+{
+ switch (rtype(r)) {
+ default:
+ diag("isel: aref defaulted");
+ case RCon:
+ return 2;
+ case RTmp:
+ return ai[r.val].n;
+ }
+}
+
+static int
+ascale(Ref r, Con *con)
+{
+ int64_t n;
+
+ if (rtype(r) != RCon)
+ return 0;
+ if (con[r.val].type != CBits)
+ return 0;
+ n = con[r.val].bits.i;
+ return n == 1 || n == 2 || n == 4 || n == 8;
+}
+
+static void
+anumber(ANum *ai, Blk *b, Con *con)
+{
+ /* This should be made obsolete by a proper
+ * reassoc pass.
+ *
+ * Rules:
+ *
+ * RTmp(_) -> 0 tmp
+ * ( RTmp(_) -> 1 slot )
+ * RCon(_) -> 2 con
+ * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
+ */
+ static char add[10][10] = {
+ [2] [2] = 2, /* folding */
+ [2] [5] = 5, [5] [2] = 5,
+ [2] [6] = 6, [6] [2] = 6,
+ [2] [7] = 7, [7] [2] = 7,
+ [0] [0] = 4, /* 4: b + s * i */
+ [0] [3] = 4, [3] [0] = 4,
+ [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
+ [0] [2] = 6, [2] [0] = 6, /* 6: o + b */
+ [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
+ [0] [5] = 7, [5] [0] = 7,
+ [6] [3] = 7, [3] [6] = 7,
+
+ };
+ int a, a1, a2, n1, n2, t1, t2;
+ Ins *i;
+
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ if (rtype(i->to) == RTmp)
+ ai[i->to.val].i = i;
+ if (i->op != OAdd && i->op != OMul)
+ continue;
+ a1 = aref(i->arg[0], ai);
+ a2 = aref(i->arg[1], ai);
+ t1 = a1 != 1 && a1 != 2;
+ t2 = a2 != 1 && a2 != 2;
+ if (i->op == OAdd) {
+ a = add[n1 = a1][n2 = a2];
+ if (t1 && a < add[0][a2])
+ a = add[n1 = 0][n2 = a2];
+ if (t2 && a < add[a1][0])
+ a = add[n1 = a1][n2 = 0];
+ if (t1 && t2 && a < add[0][0])
+ a = add[n1 = 0][n2 = 0];
+ } else {
+ n1 = n2 = a = 0;
+ if (ascale(i->arg[0], con) && t2)
+ a = 3, n1 = 2, n2 = 0;
+ if (t1 && ascale(i->arg[1], con))
+ a = 3, n1 = 0, n2 = 2;
+ }
+ ai[i->to.val].n = a;
+ ai[i->to.val].l = n1;
+ ai[i->to.val].r = n2;
+ }
+}
+
+static void
+amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
+{
+ Ins *i;
+ int nl, nr, t, s;
+ Ref al, ar;
+
+ if (top)
+ memset(a, 0, sizeof *a);
+ if (rtype(r) == RCon) {
+ addcon(&a->offset, &fn->con[r.val]);
+ return;
+ }
+ assert(rtype(r) == RTmp);
+ i = ai[r.val].i;
+ nl = ai[r.val].l;
+ nr = ai[r.val].r;
+ if (i) {
+ if (nl > nr) {
+ al = i->arg[1];
+ ar = i->arg[0];
+ t = nl, nl = nr, nr = t;
+ } else {
+ al = i->arg[0];
+ ar = i->arg[1];
+ }
+ }
+ switch (ai[r.val].n) {
+ default:
+ diag("isel: amatch defaulted");
+ case 3: /* s * i */
+ if (!top) {
+ a->index = al;
+ a->scale = fn->con[ar.val].bits.i;
+ } else
+ a->base = r;
+ break;
+ case 4: /* b + s * i */
+ switch (nr) {
+ case 0:
+ if (fn->tmp[ar.val].slot != -1) {
+ al = i->arg[1];
+ ar = i->arg[0];
+ }
+ a->index = ar;
+ a->scale = 1;
+ break;
+ case 3:
+ amatch(a, ar, ai, fn, 0);
+ break;
+ }
+ r = al;
+ case 0:
+ s = fn->tmp[r.val].slot;
+ if (s != -1)
+ r = SLOT(s);
+ a->base = r;
+ break;
+ case 2: /* constants */
+ case 5: /* o + s * i */
+ case 6: /* o + b */
+ case 7: /* o + b + s * i */
+ amatch(a, ar, ai, fn, 0);
+ amatch(a, al, ai, fn, 0);
+ break;
+ }
+}
+
+/* instruction selection
+ * requires use counts (as given by parsing)
+ */
+void
+isel(Fn *fn)
+{
+ Blk *b, **sb;
+ Ins *i, *i0, *ip;
+ Phi *p;
+ uint a;
+ int n, al;
+ int64_t sz;
+ ANum *ainfo;
+ RAlloc *ral;
+
+ for (n=0; n<fn->ntmp; n++)
+ fn->tmp[n].slot = -1;
+ fn->slot = 0;
+
+ /* lower arguments */
+ for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
+ if (i->op != OPar && i->op != OParc)
+ break;
+ selpar(fn, b->ins, i);
+ n = b->nins - (i - b->ins) + (curi - insb);
+ i0 = alloc(n * sizeof(Ins));
+ ip = icpy(ip = i0, insb, curi - insb);
+ ip = icpy(ip, i, &b->ins[b->nins] - i);
+ b->nins = n;
+ b->ins = i0;
+
+ /* lower function calls and returns */
+ ral = 0;
+ b = fn->start;
+ do {
+ if (!(b = b->link))
+ b = fn->start; /* do it last */
+ curi = &insb[NIns];
+ selret(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ if ((--i)->op == OCall) {
+ for (i0=i; i0>b->ins; i0--)
+ if ((i0-1)->op != OArg)
+ if ((i0-1)->op != OArgc)
+ break;
+ selcall(fn, i0, i, &ral);
+ i = i0;
+ continue;
+ }
+ assert(i->op != OArg && i->op != OArgc);
+ emiti(*i);
+ }
+ if (b == fn->start)
+ for (; ral; ral=ral->link)
+ emiti(ral->i);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ } while (b != fn->start);
+
+ if (debug['A']) {
+ fprintf(stderr, "\n> After call lowering:\n");
+ printfn(fn, stderr);
+ }
+
+ /* assign slots to fast allocs */
+ b = fn->start;
+ assert(NAlign == 3 && "change n=4 and sz /= 4 below");
+ for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2)
+ for (i=b->ins; i-b->ins < b->nins; i++)
+ if (i->op == al) {
+ if (rtype(i->arg[0]) != RCon)
+ break;
+ sz = fn->con[i->arg[0].val].bits.i;
+ if (sz < 0 || sz >= INT_MAX-3)
+ diag("isel: invalid alloc size");
+ sz = (sz + n-1) & -n;
+ sz /= 4;
+ fn->tmp[i->to.val].slot = fn->slot;
+ fn->slot += sz;
+ *i = (Ins){.op = ONop};
+ }
+
+ /* process basic blocks */
+ n = fn->ntmp;
+ ainfo = emalloc(n * sizeof ainfo[0]);
+ for (b=fn->start; b; b=b->link) {
+ curi = &insb[NIns];
+ for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
+ for (p=(*sb)->phi; p; p=p->link) {
+ for (a=0; p->blk[a] != b; a++)
+ assert(a+1 < p->narg);
+ fixarg(&p->arg[a], p->cls, 1, fn);
+ }
+ memset(ainfo, 0, n * sizeof ainfo[0]);
+ anumber(ainfo, b, fn->con);
+ seljmp(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;)
+ sel(*--i, ainfo, fn);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ }
+ free(ainfo);
+
+ if (debug['I']) {
+ fprintf(stderr, "\n> After instruction selection:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/src/live.c b/src/live.c
@@ -0,0 +1,174 @@
+#include "all.h"
+
+void
+liveon(BSet *v, Blk *b, Blk *s)
+{
+ Phi *p;
+ uint a;
+
+ bscopy(v, s->in);
+ for (p=s->phi; p; p=p->link) {
+ bsclr(v, p->to.val);
+ for (a=0; a<p->narg; a++)
+ if (p->blk[a] == b)
+ if (rtype(p->arg[a]) == RTmp)
+ bsset(v, p->arg[a].val);
+ }
+}
+
+static int
+phitmp(int t, Tmp *tmp)
+{
+ int tp;
+
+ tp = tmp[t].phi;
+ return tp ? tp : t;
+}
+
+static void
+phifix(int t1, short *phi, Tmp *tmp)
+{
+ int t, t2;
+
+ /* detect temporaries arguments
+ * of the same phi node that
+ * interfere and separate them
+ */
+ t = phitmp(t1, tmp);
+ t2 = phi[t];
+ if (t2 && t2 != t1) {
+ if (t != t1) {
+ tmp[t1].phi = t1;
+ t = t1;
+ } else {
+ tmp[t2].phi = t2;
+ phi[t2] = t2;
+ }
+ }
+ phi[t] = t1;
+}
+
+static void
+bset(Ref r, Blk *b, int *nlv, short *phi, Tmp *tmp)
+{
+
+ if (rtype(r) != RTmp)
+ return;
+ bsset(b->gen, r.val);
+ phifix(r.val, phi, tmp);
+ if (!bshas(b->in, r.val)) {
+ nlv[KBASE(tmp[r.val].cls)]++;
+ bsset(b->in, r.val);
+ }
+}
+
+/* liveness analysis
+ * requires rpo computation
+ */
+void
+filllive(Fn *f)
+{
+ Blk *b;
+ Ins *i;
+ int k, t, m[2], n, chg, nlv[2];
+ short *phi;
+ BSet u[1], v[1];
+ Mem *ma;
+
+ bsinit(u, f->ntmp);
+ bsinit(v, f->ntmp);
+ phi = emalloc(f->ntmp * sizeof phi[0]);
+ for (b=f->start; b; b=b->link) {
+ bsinit(b->in, f->ntmp);
+ bsinit(b->out, f->ntmp);
+ bsinit(b->gen, f->ntmp);
+ }
+ chg = 1;
+Again:
+ for (n=f->nblk-1; n>=0; n--) {
+ b = f->rpo[n];
+
+ bscopy(u, b->out);
+ if (b->s1) {
+ liveon(v, b, b->s1);
+ bsunion(b->out, v);
+ }
+ if (b->s2) {
+ liveon(v, b, b->s2);
+ bsunion(b->out, v);
+ }
+ chg |= !bsequal(b->out, u);
+
+ memset(phi, 0, f->ntmp * sizeof phi[0]);
+ memset(nlv, 0, sizeof nlv);
+ bscopy(b->in, b->out);
+ for (t=0; t<f->ntmp; t++)
+ if (bshas(b->in, t)) {
+ phifix(t, phi, f->tmp);
+ nlv[KBASE(f->tmp[t].cls)]++;
+ }
+ if (rtype(b->jmp.arg) == RACall) {
+ assert(bscount(b->in) == 0 && nlv[0] == 0 && nlv[1] == 0);
+ b->in->t[0] |= retregs(b->jmp.arg, nlv);
+ } else
+ bset(b->jmp.arg, b, nlv, phi, f->tmp);
+ for (k=0; k<2; k++)
+ b->nlive[k] = nlv[k];
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ if ((--i)->op == OCall && rtype(i->arg[1]) == RACall) {
+ b->in->t[0] &= ~retregs(i->arg[1], m);
+ for (k=0; k<2; k++)
+ nlv[k] -= m[k];
+ if (nlv[0] + NISave > b->nlive[0])
+ b->nlive[0] = nlv[0] + NISave;
+ if (nlv[1] + NFSave > b->nlive[1])
+ b->nlive[1] = nlv[1] + NFSave;
+ b->in->t[0] |= argregs(i->arg[1], m);
+ for (k=0; k<2; k++)
+ nlv[k] += m[k];
+ }
+ if (!req(i->to, R)) {
+ assert(rtype(i->to) == RTmp);
+ t = i->to.val;
+ if (bshas(b->in, i->to.val))
+ nlv[KBASE(f->tmp[t].cls)]--;
+ bsset(b->gen, t);
+ bsclr(b->in, t);
+ phi[phitmp(t, f->tmp)] = 0;
+ }
+ for (k=0; k<2; k++)
+ switch (rtype(i->arg[k])) {
+ case RAMem:
+ ma = &f->mem[i->arg[k].val & AMask];
+ bset(ma->base, b, nlv, phi, f->tmp);
+ bset(ma->index, b, nlv, phi, f->tmp);
+ break;
+ default:
+ bset(i->arg[k], b, nlv, phi, f->tmp);
+ break;
+ }
+ for (k=0; k<2; k++)
+ if (nlv[k] > b->nlive[k])
+ b->nlive[k] = nlv[k];
+ }
+ }
+ if (chg) {
+ chg = 0;
+ goto Again;
+ }
+ free(phi);
+
+ if (debug['L']) {
+ fprintf(stderr, "\n> Liveness analysis:\n");
+ for (b=f->start; b; b=b->link) {
+ fprintf(stderr, "\t%-10sin: ", b->name);
+ dumpts(b->in, f->tmp, stderr);
+ fprintf(stderr, "\t out: ");
+ dumpts(b->out, f->tmp, stderr);
+ fprintf(stderr, "\t gen: ");
+ dumpts(b->gen, f->tmp, stderr);
+ fprintf(stderr, "\t live: ");
+ fprintf(stderr, "%d %d\n", b->nlive[0], b->nlive[1]);
+ }
+ }
+}
diff --git a/src/main.c b/src/main.c
@@ -0,0 +1,117 @@
+#include "all.h"
+#include <ctype.h>
+#include <getopt.h>
+
+char debug['Z'+1] = {
+ ['P'] = 0, /* parsing */
+ ['A'] = 0, /* abi lowering */
+ ['I'] = 0, /* instruction selection */
+ ['L'] = 0, /* liveness */
+ ['M'] = 0, /* memory optimization */
+ ['N'] = 0, /* ssa construction */
+ ['C'] = 0, /* copy elimination */
+ ['S'] = 0, /* spilling */
+ ['R'] = 0, /* reg. allocation */
+};
+
+static FILE *outf;
+static int dbg;
+
+static void
+data(Dat *d)
+{
+ if (dbg)
+ return;
+ if (d->type == DEnd) {
+ fputs("/* end data */\n\n", outf);
+ freeall();
+ }
+ emitdat(d, outf);
+}
+
+static void
+func(Fn *fn)
+{
+ int n;
+
+ if (dbg)
+ fprintf(stderr, "**** Function %s ****", fn->name);
+ if (debug['P']) {
+ fprintf(stderr, "\n> After parsing:\n");
+ printfn(fn, stderr);
+ }
+ fillrpo(fn);
+ fillpreds(fn);
+ filluse(fn);
+ memopt(fn);
+ ssa(fn);
+ filluse(fn);
+ copy(fn);
+ filluse(fn);
+ isel(fn);
+ filllive(fn);
+ fillcost(fn);
+ spill(fn);
+ rega(fn);
+ fillrpo(fn);
+ assert(fn->rpo[0] == fn->start);
+ for (n=0;; n++)
+ if (n == fn->nblk-1) {
+ fn->rpo[n]->link = 0;
+ break;
+ } else
+ fn->rpo[n]->link = fn->rpo[n+1];
+ if (!dbg) {
+ emitfn(fn, outf);
+ fprintf(outf, "/* end function %s */\n\n", fn->name);
+ } else
+ fprintf(stderr, "\n");
+ freeall();
+}
+
+int
+main(int ac, char *av[])
+{
+ FILE *inf;
+ char *f;
+ int c;
+
+ outf = stdout;
+ while ((c = getopt(ac, av, "d:o:")) != -1)
+ switch (c) {
+ case 'd':
+ for (; *optarg; optarg++)
+ if (isalpha(*optarg)) {
+ debug[toupper(*optarg)] = 1;
+ dbg = 1;
+ }
+ break;
+ case 'o':
+ if (strcmp(optarg, "-") != 0)
+ outf = fopen(optarg, "w");
+ break;
+ default:
+ fprintf(stderr, "usage: %s [-d <flags>] [-o out] {file.ssa, -}\n", av[0]);
+ exit(1);
+ }
+
+ do {
+ f = av[optind];
+ if (!f || strcmp(f, "-") == 0) {
+ inf = stdin;
+ f = "-";
+ } else {
+ inf = fopen(f, "r");
+ if (!inf) {
+ fprintf(stderr, "cannot open '%s'\n", f);
+ exit(1);
+ }
+ }
+ parse(inf, f, data, func);
+ } while (++optind < ac);
+
+ if (!dbg)
+ emitfin(outf);
+
+ exit(0);
+}
diff --git a/src/mem.c b/src/mem.c
@@ -0,0 +1,81 @@
+#include "all.h"
+
+/* Memory optimization:
+ *
+ * - replace alloced slots used only in
+ * load/store operations
+ * Assumption: all the accesses have the
+ * same size (this could be wrong...)
+ */
+
+/* require use, maintains use counts */
+void
+memopt(Fn *fn)
+{
+ Blk *b;
+ Ins *i, *l;
+ Tmp *t;
+ Use *u, *ue;
+ int a;
+
+ b = fn->start;
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ if (OAlloc > i->op || i->op > OAlloc1)
+ continue;
+ assert(NAlign == 3);
+ assert(rtype(i->to) == RTmp);
+ t = &fn->tmp[i->to.val];
+ for (u=t->use; u != &t->use[t->nuse]; u++) {
+ if (u->type != UIns)
+ goto NextIns;
+ l = u->u.ins;
+ if (!isload(l->op)
+ && (!isstore(l->op) || req(i->to, l->arg[0])))
+ goto NextIns;
+ }
+ /* get rid of the alloc and replace uses */
+ *i = (Ins){.op = ONop};
+ t->ndef--;
+ ue = &t->use[t->nuse];
+ for (u=t->use; u!=ue; u++) {
+ l = u->u.ins;
+ if (isstore(l->op)) {
+ if (l->op == OStores)
+ l->cls = Kd;
+ else if (l->op == OStored)
+ l->cls = Kd;
+ else if (l->op == OStorel)
+ l->cls = Kl;
+ else
+ l->cls = Kw;
+ l->op = OCopy;
+ l->to = l->arg[1];
+ l->arg[1] = R;
+ t->nuse--;
+ t->ndef++;
+ } else
+ /* try to turn loads into copies so we
+ * can eliminate them later */
+ switch(l->op) {
+ case OLoad:
+ l->op = OCopy;
+ break;
+ case OLoadsw:
+ case OLoaduw:
+ l->cls = Kw;
+ l->op = OCopy;
+ break;
+ default:
+ /* keep l->cls */
+ a = l->op - OLoadsw;
+ l->op = OExtsw + a;
+ break;
+ }
+ }
+ NextIns:;
+ }
+ if (debug['M']) {
+ fprintf(stderr, "\n> After memory optimization:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/src/parse.c b/src/parse.c
@@ -0,0 +1,1081 @@
+#include "all.h"
+#include <ctype.h>
+#include <stdarg.h>
+
+enum {
+ Kx = -1, /* Invalid operand */
+ Km = Kl, /* Memory pointer (for x64) */
+};
+
+OpDesc opdesc[NOp] = {
+#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}
+
+ /* NAME NM ARGCLS0 ARGCLS1 SF LF */
+ [OAdd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
+ [OSub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
+ [ODiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
+ [ORem] = { "rem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 },
+ [OUDiv] = { "udiv", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
+ [OURem] = { "urem", 2, {A(w,l,x,x), A(w,l,x,x)}, 0, 0 },
+ [OMul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
+ [OAnd] = { "and", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
+ [OOr] = { "or", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
+ [OXor] = { "xor", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
+ [OSar] = { "sar", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
+ [OShr] = { "shr", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
+ [OShl] = { "shl", 1, {A(w,l,x,x), A(w,w,x,x)}, 1, 0 },
+ [OStored] = { "stored", 0, {A(d,d,d,d), A(m,m,m,m)}, 0, 1 },
+ [OStores] = { "stores", 0, {A(s,s,s,s), A(m,m,m,m)}, 0, 1 },
+ [OStorel] = { "storel", 0, {A(l,l,l,l), A(m,m,m,m)}, 0, 1 },
+ [OStorew] = { "storew", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
+ [OStoreh] = { "storeh", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
+ [OStoreb] = { "storeb", 0, {A(w,w,w,w), A(m,m,m,m)}, 0, 1 },
+ [OLoad] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1 },
+ [OLoadsw] = { "loadsw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
+ [OLoaduw] = { "loaduw", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
+ [OLoadsh] = { "loadsh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
+ [OLoaduh] = { "loaduh", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
+ [OLoadsb] = { "loadsb", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
+ [OLoadub] = { "loadub", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
+ [OExtsw] = { "extsw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
+ [OExtuw] = { "extuw", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
+ [OExtsh] = { "extsh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
+ [OExtuh] = { "extuh", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
+ [OExtsb] = { "extsb", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
+ [OExtub] = { "extub", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
+ [OExts] = { "exts", 0, {A(w,w,w,w), A(x,x,x,x)}, 0, 1 },
+ [OTruncd] = { "truncd", 0, {A(d,d,d,d), A(x,x,x,x)}, 0, 1 },
+ [OFtosi] = { "ftosi", 0, {A(s,d,x,x), A(x,x,x,x)}, 0, 1 },
+ [OSitof] = { "sitof", 0, {A(x,x,w,l), A(x,x,x,x)}, 0, 1 },
+ [OCast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1 },
+ [OCopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1 },
+ [ONop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 },
+ [OSwap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0 },
+ [OSign] = { "sign", 0, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 },
+ [OSAlloc] = { "salloc", 0, {A(x,l,x,x), A(x,x,x,x)}, 0, 0 },
+ [OXDiv] = { "xdiv", 1, {A(w,l,x,x), A(x,x,x,x)}, 0, 0 },
+ [OXCmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0 },
+ [OXTest] = { "xtest", 1, {A(w,l,x,x), A(w,l,x,x)}, 1, 0 },
+ [OAddr] = { "addr", 0, {A(m,m,x,x), A(x,x,x,x)}, 0, 1 },
+ [OPar] = { "parn", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
+ [OParc] = { "parc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
+ [OArg] = { "arg", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
+ [OArgc] = { "argc", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
+ [OCall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0 },
+ [OXSetnp] = { "xsetnp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
+ [OXSetp] = { "xsetp", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0 },
+ [OAlloc] = { "alloc4", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
+ [OAlloc+1] = { "alloc8", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
+ [OAlloc+2] = { "alloc16", 1, {A(l,l,l,l), A(x,x,x,x)}, 0, 0 },
+#define X(c) \
+ [OCmpw+IC##c] = { "c" #c "w", 0, {A(w,w,x,x), A(w,w,x,x)}, 1, 0 }, \
+ [OCmpl+IC##c] = { "c" #c "l", 0, {A(l,l,x,x), A(l,l,x,x)}, 1, 0 }, \
+ [OXSet+IC##c] = { "xset" #c, 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1 },
+ ICMPS(X)
+#undef X
+#define X(c) \
+ [OCmps+FC##c] = { "c" #c "s", 0, {A(s,s,x,x), A(s,s,x,x)}, 1, 0 }, \
+ [OCmpd+FC##c] = { "c" #c "d", 0, {A(d,d,x,x), A(d,d,x,x)}, 1, 0 },
+ FCMPS(X)
+#undef X
+
+};
+#undef A
+
+typedef enum {
+ PXXX,
+ PLbl,
+ PPhi,
+ PIns,
+ PEnd,
+} PState;
+
+enum {
+ TXXX = NPubOp,
+ TCall,
+ TPhi,
+ TJmp,
+ TJnz,
+ TRet,
+ TFunc,
+ TType,
+ TData,
+ TAlign,
+ TL,
+ TW,
+ TH,
+ TB,
+ TD,
+ TS,
+ TZ,
+
+ TInt,
+ TFlts,
+ TFltd,
+ TTmp,
+ TLbl,
+ TGlo,
+ TTyp,
+ TStr,
+
+ TPlus,
+ TEq,
+ TComma,
+ TLParen,
+ TRParen,
+ TLBrace,
+ TRBrace,
+ TNL,
+ TEOF,
+};
+
+
+static FILE *inf;
+static char *inpath;
+static int thead;
+static struct {
+ char chr;
+ double fltd;
+ float flts;
+ int64_t num;
+ char *str;
+} tokval;
+static int lnum;
+
+static Tmp *tmp;
+static Con *con;
+static int ntmp;
+static int ncon;
+static Phi **plink;
+static Blk **bmap;
+static Blk *curb;
+static Blk **blink;
+static int nblk;
+static int rcls;
+static int ntyp;
+
+
+void
+err(char *s, ...)
+{
+ char buf[100], *p, *end;
+ va_list ap;
+
+ p = buf;
+ end = buf + sizeof(buf);
+
+ va_start(ap, s);
+ p += snprintf(p, end - p, "%s:%d: ", inpath, lnum);
+ p += vsnprintf(p, end - p, s, ap);
+ va_end(ap);
+
+ diag(buf);
+}
+
+static int
+lex()
+{
+ static struct {
+ char *str;
+ int tok;
+ } tmap[] = {
+ { "call", TCall },
+ { "phi", TPhi },
+ { "jmp", TJmp },
+ { "jnz", TJnz },
+ { "ret", TRet },
+ { "function", TFunc },
+ { "type", TType },
+ { "data", TData },
+ { "align", TAlign },
+ { "l", TL },
+ { "w", TW },
+ { "h", TH },
+ { "b", TB },
+ { "d", TD },
+ { "s", TS },
+ { "z", TZ },
+ { "loadw", OLoad }, /* for convenience */
+ { "loadl", OLoad },
+ { "loads", OLoad },
+ { "loadd", OLoad },
+ { "alloc1", OAlloc },
+ { "alloc2", OAlloc },
+ { 0, TXXX }
+ };
+ static char tok[NString];
+ int c, i;
+ int t;
+
+ do
+ c = fgetc(inf);
+ while (isblank(c));
+ t = TXXX;
+ tokval.chr = c;
+ switch (c) {
+ case EOF:
+ return TEOF;
+ case ',':
+ return TComma;
+ case '(':
+ return TLParen;
+ case ')':
+ return TRParen;
+ case '{':
+ return TLBrace;
+ case '}':
+ return TRBrace;
+ case '=':
+ return TEq;
+ case '+':
+ return TPlus;
+ case 's':
+ if (fscanf(inf, "_%f", &tokval.flts) != 1)
+ break;
+ return TFlts;
+ case 'd':
+ if (fscanf(inf, "_%lf", &tokval.fltd) != 1)
+ break;
+ return TFltd;
+ case '%':
+ t = TTmp;
+ goto Alpha;
+ case '@':
+ t = TLbl;
+ goto Alpha;
+ case '$':
+ t = TGlo;
+ goto Alpha;
+ case ':':
+ t = TTyp;
+ goto Alpha;
+ case '#':
+ while (fgetc(inf) != '\n')
+ ;
+ case '\n':
+ lnum++;
+ return TNL;
+ }
+ if (isdigit(c) || c == '-' || c == '+') {
+ ungetc(c, inf);
+ if (fscanf(inf, "%"SCNd64, &tokval.num) != 1)
+ err("invalid integer literal");
+ return TInt;
+ }
+ if (c == '"') {
+ tokval.str = vnew(0, 1);
+ for (i=0;; i++) {
+ c = fgetc(inf);
+ vgrow(&tokval.str, i+1);
+ if (c == '"')
+ if (!i || tokval.str[i-1] != '\\') {
+ tokval.str[i] = 0;
+ return TStr;
+ }
+ tokval.str[i] = c;
+ }
+ }
+ if (0)
+Alpha: c = fgetc(inf);
+ if (!isalpha(c) && c != '.' && c != '_')
+ err("lexing failure: invalid character %c (%d)", c, c);
+ i = 0;
+ do {
+ if (i >= NString-1)
+ err("identifier too long");
+ tok[i++] = c;
+ c = fgetc(inf);
+ } while (isalpha(c) || c == '$' || c == '.' || c == '_' || isdigit(c));
+ tok[i] = 0;
+ ungetc(c, inf);
+ tokval.str = tok;
+ if (t != TXXX) {
+ return t;
+ }
+ for (i=0; i<NPubOp; i++)
+ if (opdesc[i].name)
+ if (strcmp(tok, opdesc[i].name) == 0)
+ return i;
+ for (i=0; tmap[i].str; i++)
+ if (strcmp(tok, tmap[i].str) == 0)
+ return tmap[i].tok;
+ err("unknown keyword %s", tokval.str);
+ return TXXX;
+}
+
+static int
+peek()
+{
+ if (thead == TXXX)
+ thead = lex();
+ return thead;
+}
+
+static int
+next()
+{
+ int t;
+
+ t = peek();
+ thead = TXXX;
+ return t;
+}
+
+static int
+nextnl()
+{
+ int t;
+
+ while ((t = next()) == TNL)
+ ;
+ return t;
+}
+
+static void
+expect(int t)
+{
+ static char *ttoa[] = {
+ [TLbl] = "label",
+ [TComma] = ",",
+ [TEq] = "=",
+ [TNL] = "newline",
+ [TLParen] = "(",
+ [TRParen] = ")",
+ [TLBrace] = "{",
+ [TRBrace] = "}",
+ [TEOF] = 0,
+ };
+ char buf[128], *s1, *s2;
+ int t1;
+
+ t1 = next();
+ if (t == t1)
+ return;
+ s1 = ttoa[t] ? ttoa[t] : "??";
+ s2 = ttoa[t1] ? ttoa[t1] : "??";
+ sprintf(buf, "%s expected, got %s instead", s1, s2);
+ err(buf);
+}
+
+static Ref
+tmpref(char *v)
+{
+ int t;
+
+ for (t=Tmp0; t<ntmp; t++)
+ if (strcmp(v, tmp[t].name) == 0)
+ return TMP(t);
+ vgrow(&tmp, ++ntmp);
+ strcpy(tmp[t].name, v);
+ return TMP(t);
+}
+
+static Ref
+parseref()
+{
+ Con c;
+ int i;
+
+ memset(&c, 0, sizeof c);
+ switch (next()) {
+ case TTmp:
+ return tmpref(tokval.str);
+ case TInt:
+ c.type = CBits;
+ c.bits.i = tokval.num;
+ goto Look;
+ case TFlts:
+ c.type = CBits;
+ c.bits.s = tokval.flts;
+ c.flt = 1;
+ goto Look;
+ case TFltd:
+ c.type = CBits;
+ c.bits.d = tokval.fltd;
+ c.flt = 2;
+ goto Look;
+ case TGlo:
+ c.type = CAddr;
+ strcpy(c.label, tokval.str);
+ Look:
+ for (i=0; i<ncon; i++)
+ if (con[i].type == c.type
+ && con[i].bits.i == c.bits.i
+ && strcmp(con[i].label, c.label) == 0)
+ return CON(i);
+ vgrow(&con, ++ncon);
+ con[i] = c;
+ return CON(i);
+ default:
+ return R;
+ }
+}
+
+static int
+parsecls(int *tyn)
+{
+ int i;
+
+ switch (next()) {
+ default:
+ err("invalid class specifier");
+ case TTyp:
+ for (i=0; i<ntyp; i++)
+ if (strcmp(tokval.str, typ[i].name) == 0) {
+ *tyn = i;
+ return 4;
+ }
+ err("undefined type");
+ case TW:
+ return Kw;
+ case TL:
+ return Kl;
+ case TS:
+ return Ks;
+ case TD:
+ return Kd;
+ }
+}
+
+static void
+parserefl(int arg)
+{
+ int k, t, ty;
+ Ref r;
+
+ expect(TLParen);
+ if (peek() == TRParen) {
+ next();
+ return;
+ }
+ for (;;) {
+ if (curi - insb >= NIns)
+ err("too many instructions (1)");
+ k = parsecls(&ty);
+ r = parseref();
+ if (req(r, R))
+ err("invalid reference argument");
+ if (!arg && rtype(r) != RTmp)
+ err("invalid function parameter");
+ if (k == 4)
+ if (arg)
+ *curi = (Ins){OArgc, R, {TYPE(ty), r}, Kl};
+ else
+ *curi = (Ins){OParc, r, {TYPE(ty)}, Kl};
+ else
+ if (arg)
+ *curi = (Ins){OArg, R, {r}, k};
+ else
+ *curi = (Ins){OPar, r, {R}, k};
+ curi++;
+ t = next();
+ if (t == TRParen)
+ break;
+ if (t != TComma)
+ err(", or ) expected");
+ }
+}
+
+static Blk *
+findblk(char *name)
+{
+ int i;
+
+ for (i=0; i<nblk; i++)
+ if (strcmp(bmap[i]->name, name) == 0)
+ return bmap[i];
+ vgrow(&bmap, ++nblk);
+ bmap[i] = blknew();
+ strcpy(bmap[i]->name, name);
+ return bmap[i];
+}
+
+static void
+closeblk()
+{
+ curb->nins = curi - insb;
+ idup(&curb->ins, insb, curb->nins);
+ blink = &curb->link;
+ curi = insb;
+}
+
+static PState
+parseline(PState ps)
+{
+ Ref arg[NPred] = {R};
+ Blk *blk[NPred];
+ Phi *phi;
+ Ref r;
+ Blk *b;
+ int t, op, i, k, ty;
+
+ t = nextnl();
+ if (ps == PLbl && t != TLbl && t != TRBrace)
+ err("label or } expected");
+ switch (t) {
+ default:
+ if (isstore(t)) {
+ /* operations without result */
+ r = R;
+ k = 0;
+ op = t;
+ goto DoOp;
+ }
+ err("label, instruction or jump expected");
+ case TRBrace:
+ return PEnd;
+ case TTmp:
+ break;
+ case TLbl:
+ b = findblk(tokval.str);
+ if (b->jmp.type != JXXX)
+ err("multiple definitions of block");
+ if (curb && curb->jmp.type == JXXX) {
+ closeblk();
+ curb->jmp.type = JJmp;
+ curb->s1 = b;
+ }
+ *blink = b;
+ curb = b;
+ plink = &curb->phi;
+ expect(TNL);
+ return PPhi;
+ case TRet:
+ curb->jmp.type = (int[]){
+ JRetw, JRetl,
+ JRets, JRetd,
+ JRetc, JRet0
+ }[rcls];
+ if (rcls < 5) {
+ r = parseref();
+ if (req(r, R))
+ err("return value expected");
+ curb->jmp.arg = r;
+ }
+ goto Close;
+ case TJmp:
+ curb->jmp.type = JJmp;
+ goto Jump;
+ case TJnz:
+ curb->jmp.type = JJnz;
+ r = parseref();
+ if (req(r, R))
+ err("invalid argument for jnz jump");
+ curb->jmp.arg = r;
+ expect(TComma);
+ Jump:
+ expect(TLbl);
+ curb->s1 = findblk(tokval.str);
+ if (curb->jmp.type != JJmp) {
+ expect(TComma);
+ expect(TLbl);
+ curb->s2 = findblk(tokval.str);
+ }
+ Close:
+ expect(TNL);
+ closeblk();
+ return PLbl;
+ }
+ r = tmpref(tokval.str);
+ expect(TEq);
+ k = parsecls(&ty);
+ op = next();
+DoOp:
+ if (op == TPhi) {
+ if (ps != PPhi)
+ err("unexpected phi instruction");
+ op = -1;
+ }
+ if (op == TCall) {
+ arg[0] = parseref();
+ parserefl(1);
+ expect(TNL);
+ op = OCall;
+ if (k == 4) {
+ k = Kl;
+ arg[1] = TYPE(ty);
+ } else
+ arg[1] = R;
+ goto Ins;
+ }
+ if (k == 4)
+ err("size class must be w, l, s, or d");
+ if (op >= NPubOp)
+ err("invalid instruction");
+ i = 0;
+ if (peek() != TNL)
+ for (;;) {
+ if (i == NPred)
+ err("too many arguments");
+ if (op == -1) {
+ expect(TLbl);
+ blk[i] = findblk(tokval.str);
+ }
+ arg[i] = parseref();
+ if (req(arg[i], R))
+ err("invalid instruction argument");
+ i++;
+ t = peek();
+ if (t == TNL)
+ break;
+ if (t != TComma)
+ err(", or end of line expected");
+ next();
+ }
+ next();
+ if (op != -1) {
+ Ins:
+ if (curi - insb >= NIns)
+ err("too many instructions (2)");
+ curi->op = op;
+ curi->cls = k;
+ curi->to = r;
+ curi->arg[0] = arg[0];
+ curi->arg[1] = arg[1];
+ curi++;
+ return PIns;
+ } else {
+ phi = alloc(sizeof *phi);
+ phi->to = r;
+ phi->cls = k;
+ memcpy(phi->arg, arg, i * sizeof arg[0]);
+ memcpy(phi->blk, blk, i * sizeof blk[0]);
+ phi->narg = i;
+ *plink = phi;
+ plink = &phi->link;
+ return PPhi;
+ }
+}
+
+static Fn *
+parsefn()
+{
+ PState ps;
+ Fn *fn;
+
+ ntmp = Tmp0;
+ ncon = 1; /* first constant must be 0 */
+ curb = 0;
+ nblk = 0;
+ curi = insb;
+ tmp = vnew(ntmp, sizeof tmp[0]);
+ con = vnew(ncon, sizeof con[0]);
+ bmap = vnew(nblk, sizeof bmap[0]);
+ con[0].type = CBits;
+ fn = alloc(sizeof *fn);
+ blink = &fn->start;
+ fn->retty = -1;
+ if (peek() != TGlo)
+ rcls = parsecls(&fn->retty);
+ else
+ rcls = 5;
+ if (next() != TGlo)
+ err("function name expected");
+ strcpy(fn->name, tokval.str);
+ parserefl(0);
+ if (nextnl() != TLBrace)
+ err("function body must start with {");
+ ps = PLbl;
+ do
+ ps = parseline(ps);
+ while (ps != PEnd);
+ if (!curb)
+ err("empty file");
+ if (curb->jmp.type == JXXX)
+ err("last block misses jump");
+ fn->tmp = tmp;
+ fn->con = con;
+ fn->mem = vnew(0, sizeof fn->mem[0]);
+ fn->ntmp = ntmp;
+ fn->ncon = ncon;
+ fn->nmem = 0;
+ fn->nblk = nblk;
+ fn->rpo = 0;
+ return fn;
+}
+
+static void
+parsetyp()
+{
+ Typ *ty;
+ int t, n, sz, al, s, a, c, flt;
+
+ if (ntyp >= NTyp)
+ err("too many type definitions");
+ ty = &typ[ntyp++];
+ ty->align = -1;
+ if (nextnl() != TTyp || nextnl() != TEq)
+ err("type name, then = expected");
+ strcpy(ty->name, tokval.str);
+ t = nextnl();
+ if (t == TAlign) {
+ if (nextnl() != TInt)
+ err("alignment expected");
+ for (al=0; tokval.num /= 2; al++)
+ ;
+ ty->align = al;
+ t = nextnl();
+ }
+ if (t != TLBrace)
+ err("type body must start with {");
+ t = nextnl();
+ if (t == TInt) {
+ ty->dark = 1;
+ ty->size = tokval.num;
+ if (ty->align == -1)
+ err("dark types need alignment");
+ t = nextnl();
+ } else {
+ ty->dark = 0;
+ n = -1;
+ sz = 0;
+ al = 0;
+ for (;;) {
+ flt = 0;
+ switch (t) {
+ default: err("invalid size specifier %c", tokval.chr);
+ case TD: flt = 1;
+ case TL: s = 8; a = 3; break;
+ case TS: flt = 1;
+ case TW: s = 4; a = 2; break;
+ case TH: s = 2; a = 1; break;
+ case TB: s = 1; a = 0; break;
+ }
+ if (a > al)
+ al = a;
+ if ((a = sz & (s-1))) {
+ a = s - a;
+ if (++n < NSeg) {
+ /* padding segment */
+ ty->seg[n].ispad = 1;
+ ty->seg[n].len = a;
+ }
+ }
+ t = nextnl();
+ if (t == TInt) {
+ c = tokval.num;
+ t = nextnl();
+ } else
+ c = 1;
+ while (c-- > 0) {
+ if (++n < NSeg) {
+ ty->seg[n].isflt = flt;
+ ty->seg[n].ispad = 0;
+ ty->seg[n].len = s;
+ }
+ sz += a + s;
+ }
+ if (t != TComma)
+ break;
+ t = nextnl();
+ }
+ if (++n >= NSeg)
+ ty->dark = 1;
+ else
+ ty->seg[n].len = 0;
+ if (ty->align == -1)
+ ty->align = al;
+ else
+ al = ty->align;
+ a = (1 << al) - 1;
+ ty->size = (sz + a) & ~a;
+ }
+ if (t != TRBrace)
+ err("expected closing }");
+}
+
+static void
+parsedatref(Dat *d)
+{
+ int t;
+
+ d->isref = 1;
+ d->u.ref.nam = tokval.str;
+ d->u.ref.off = 0;
+ t = peek();
+ if (t == TPlus) {
+ next();
+ if (next() != TInt)
+ err("invalid token after offset in ref");
+ d->u.ref.off = tokval.num;
+ }
+}
+
+static void
+parsedatstr(Dat *d)
+{
+ d->isstr = 1;
+ d->u.str = tokval.str;
+}
+
+static void
+parsedat(void cb(Dat *))
+{
+ char s[NString];
+ int t;
+ Dat d;
+
+ d.type = DStart;
+ d.isstr = 0;
+ d.isref = 0;
+ cb(&d);
+ if (nextnl() != TGlo || nextnl() != TEq)
+ err("data name, then = expected");
+ strcpy(s, tokval.str);
+ t = nextnl();
+ if (t == TAlign) {
+ if (nextnl() != TInt)
+ err("alignment expected");
+ d.type = DAlign;
+ d.u.num = tokval.num;
+ cb(&d);
+ t = nextnl();
+ }
+ d.type = DName;
+ d.u.str = s;
+ cb(&d);
+
+ if (t != TLBrace)
+ err("expected data contents in { .. }");
+ for (;;) {
+ switch (nextnl()) {
+ default: err("invalid size specifier %c in data", tokval.chr);
+ case TRBrace: goto Done;
+ case TL: d.type = DL; break;
+ case TW: d.type = DW; break;
+ case TH: d.type = DH; break;
+ case TB: d.type = DB; break;
+ case TS: d.type = DW; break;
+ case TD: d.type = DL; break;
+ case TZ: d.type = DZ; break;
+ }
+ t = nextnl();
+ do {
+ d.isref = 0;
+ d.isstr = 0;
+ memset(&d.u, 0, sizeof d.u);
+ if (t == TFlts)
+ d.u.flts = tokval.flts;
+ else if (t == TFltd)
+ d.u.fltd = tokval.fltd;
+ else if (t == TInt)
+ d.u.num = tokval.num;
+ else if (t == TGlo)
+ parsedatref(&d);
+ else if (t == TStr)
+ parsedatstr(&d);
+ else
+ err("constant literal expected");
+ cb(&d);
+ t = nextnl();
+ } while (t == TInt || t == TFlts || t == TFltd);
+ if (t == TRBrace)
+ break;
+ if (t != TComma)
+ err(", or } expected");
+ }
+Done:
+ d.type = DEnd;
+ cb(&d);
+}
+
+void
+parse(FILE *f, char *path, void data(Dat *), void func(Fn *))
+{
+ inf = f;
+ inpath = path;
+ lnum = 1;
+ thead = TXXX;
+ ntyp = 0;
+ for (;;)
+ switch (nextnl()) {
+ case TFunc:
+ func(parsefn());
+ break;
+ case TType:
+ parsetyp();
+ break;
+ case TData:
+ parsedat(data);
+ break;
+ case TEOF:
+ return;
+ default:
+ err("top-level definition expected");
+ break;
+ }
+}
+
+static void
+printcon(Con *c, FILE *f)
+{
+ switch (c->type) {
+ case CUndef:
+ break;
+ case CAddr:
+ fprintf(f, "$%s", c->label);
+ if (c->bits.i)
+ fprintf(f, "%+"PRIi64, c->bits.i);
+ break;
+ case CBits:
+ if (c->flt == 1)
+ fprintf(f, "s_%f", c->bits.s);
+ else if (c->flt == 2)
+ fprintf(f, "d_%lf", c->bits.d);
+ else
+ fprintf(f, "%"PRIi64, c->bits.i);
+ break;
+ }
+}
+
+void
+printref(Ref r, Fn *fn, FILE *f)
+{
+ int i;
+ Mem *m;
+
+ switch (rtype(r)) {
+ case RTmp:
+ if (r.val < Tmp0)
+ fprintf(f, "R%d", r.val);
+ else
+ fprintf(f, "%%%s", fn->tmp[r.val].name);
+ break;
+ case RCon:
+ printcon(&fn->con[r.val], f);
+ break;
+ case RSlot:
+ fprintf(f, "S%d", r.val);
+ break;
+ case RACall:
+ fprintf(f, "%03x", r.val & AMask);
+ break;
+ case RAType:
+ fprintf(f, ":%s", typ[r.val & AMask].name);
+ break;
+ case RAMem:
+ i = 0;
+ m = &fn->mem[r.val & AMask];
+ fputc('[', f);
+ if (m->offset.type != CUndef) {
+ printcon(&m->offset, f);
+ i = 1;
+ }
+ if (!req(m->base, R)) {
+ if (i)
+ fprintf(f, " + ");
+ printref(m->base, fn, f);
+ i = 1;
+ }
+ if (!req(m->index, R)) {
+ if (i)
+ fprintf(f, " + ");
+ fprintf(f, "%d * ", m->scale);
+ printref(m->index, fn, f);
+ }
+ fputc(']', f);
+ break;
+ }
+}
+
+void
+printfn(Fn *fn, FILE *f)
+{
+ static char *jtoa[NJmp] = {
+ [JRet0] = "ret",
+ [JRetw] = "retw",
+ [JRetl] = "retl",
+ [JRetc] = "retc",
+ [JRets] = "rets",
+ [JRetd] = "retd",
+ [JJnz] = "jnz",
+ [JXJnp] = "xjnp",
+ [JXJp] = "xjp",
+ #define X(c) [JXJc+IC##c] = "xj" #c,
+ ICMPS(X)
+ #undef X
+ };
+ static char prcls[NOp] = {
+ [OArg] = 1,
+ [OSwap] = 1,
+ [OXCmp] = 1,
+ [OXTest] = 1,
+ [OXDiv] = 1,
+ [OXIDiv] = 1,
+ };
+ static char ktoc[] = "wlsd";
+ Blk *b;
+ Phi *p;
+ Ins *i;
+ uint n;
+
+ fprintf(f, "function $%s() {\n", fn->name);
+ for (b=fn->start; b; b=b->link) {
+ fprintf(f, "@%s\n", b->name);
+ for (p=b->phi; p; p=p->link) {
+ fprintf(f, "\t");
+ printref(p->to, fn, f);
+ fprintf(f, " =%c phi ", ktoc[p->cls]);
+ assert(p->narg);
+ for (n=0;; n++) {
+ fprintf(f, "@%s ", p->blk[n]->name);
+ printref(p->arg[n], fn, f);
+ if (n == p->narg-1) {
+ fprintf(f, "\n");
+ break;
+ } else
+ fprintf(f, ", ");
+ }
+ }
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ fprintf(f, "\t");
+ if (!req(i->to, R)) {
+ printref(i->to, fn, f);
+ fprintf(f, " =%c ", ktoc[i->cls]);
+ }
+ assert(opdesc[i->op].name);
+ fprintf(f, "%s", opdesc[i->op].name);
+ if (req(i->to, R) && prcls[i->op])
+ fputc(ktoc[i->cls], f);
+ if (!req(i->arg[0], R)) {
+ fprintf(f, " ");
+ printref(i->arg[0], fn, f);
+ }
+ if (!req(i->arg[1], R)) {
+ fprintf(f, ", ");
+ printref(i->arg[1], fn, f);
+ }
+ fprintf(f, "\n");
+ }
+ switch (b->jmp.type) {
+ case JRet0:
+ case JRetw:
+ case JRetl:
+ case JRets:
+ case JRetd:
+ case JRetc:
+ fprintf(f, "\t%s", jtoa[b->jmp.type]);
+ if (b->jmp.type != JRet0 || !req(b->jmp.arg, R)) {
+ fprintf(f, " ");
+ printref(b->jmp.arg, fn, f);
+ }
+ if (b->jmp.type == JRetc)
+ fprintf(f, ", :%s", typ[fn->retty].name);
+ fprintf(f, "\n");
+ break;
+ case JJmp:
+ if (b->s1 != b->link)
+ fprintf(f, "\tjmp @%s\n", b->s1->name);
+ break;
+ default:
+ fprintf(f, "\t%s ", jtoa[b->jmp.type]);
+ if (b->jmp.type == JJnz) {
+ printref(b->jmp.arg, fn, f);
+ fprintf(f, ", ");
+ }
+ fprintf(f, "@%s, @%s\n", b->s1->name, b->s2->name);
+ break;
+ }
+ }
+ fprintf(f, "}\n");
+}
diff --git a/src/rega.c b/src/rega.c
@@ -0,0 +1,598 @@
+#include "all.h"
+
+#ifdef TEST_PMOV
+ #undef assert
+ #define assert(x) assert_test(#x, x)
+#endif
+
+typedef struct RMap RMap;
+
+struct RMap {
+ int t[NIReg+NFReg];
+ int r[NIReg+NFReg];
+ BSet b[1];
+ int n;
+};
+
+static bits regu; /* registers used */
+static Tmp *tmp; /* function temporaries */
+static Mem *mem; /* function mem references */
+static struct {
+ Ref src, dst;
+ int cls;
+} *pm; /* parallel move constructed */
+static int cpm, npm; /* capacity and size of pm */
+
+static int *
+hint(int t)
+{
+ return &tmp[phicls(t, tmp)].hint.r;
+}
+
+static void
+sethint(int t, int r)
+{
+ bits m;
+
+ m = tmp[phicls(t, tmp)].hint.m;
+ if (*hint(t) == -1)
+ if (!(BIT(r) & m))
+ *hint(t) = r;
+}
+
+static void
+rcopy(RMap *ma, RMap *mb)
+{
+ memcpy(ma->t, mb->t, sizeof ma->t);
+ memcpy(ma->r, mb->r, sizeof ma->r);
+ bscopy(ma->b, mb->b);
+ ma->n = mb->n;
+}
+
+static int
+rfind(RMap *m, int t)
+{
+ int i;
+
+ for (i=0; i<m->n; i++)
+ if (m->t[i] == t)
+ return m->r[i];
+ return -1;
+}
+
+static Ref
+rref(RMap *m, int t)
+{
+ int r, s;
+
+ r = rfind(m, t);
+ if (r == -1) {
+ s = tmp[t].slot;
+ assert(s != -1 && "should have spilled");
+ return SLOT(s);
+ } else
+ return TMP(r);
+}
+
+static void
+radd(RMap *m, int t, int r)
+{
+ assert((t >= Tmp0 || t == r) && "invalid temporary");
+ assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
+ assert(!bshas(m->b, t) && "temporary has mapping");
+ assert(!bshas(m->b, r) && "register already allocated");
+ assert(m->n <= NIReg+NFReg && "too many mappings");
+ bsset(m->b, t);
+ bsset(m->b, r);
+ m->t[m->n] = t;
+ m->r[m->n] = r;
+ m->n++;
+ regu |= BIT(r);
+}
+
+static Ref
+ralloc(RMap *m, int t)
+{
+ bits regs;
+ int r, r0, r1;
+
+ if (t < Tmp0) {
+ assert(bshas(m->b, t));
+ return TMP(t);
+ }
+ if (bshas(m->b, t)) {
+ r = rfind(m, t);
+ assert(r != -1);
+ return TMP(r);
+ }
+ r = *hint(t);
+ if (r == -1 || bshas(m->b, r)) {
+ regs = tmp[phicls(t, tmp)].hint.m;
+ regs |= m->b->t[0];
+ switch (KBASE(tmp[t].cls)) {
+ case 0:
+ r0 = RAX;
+ r1 = RAX + NIReg;
+ break;
+ case 1:
+ r0 = XMM0;
+ r1 = XMM0 + NFReg;
+ break;
+ }
+ for (r=r0; r<r1; r++)
+ if (!(regs & BIT(r)))
+ goto Found;
+ for (r=r0; r<r1; r++)
+ if (!bshas(m->b, r))
+ goto Found;
+ diag("rega: no more regs");
+ }
+Found:
+ radd(m, t, r);
+ sethint(t, r);
+ return TMP(r);
+}
+
+static int
+rfree(RMap *m, int t)
+{
+ int i, r;
+
+ if (!bshas(m->b, t))
+ return -1;
+ for (i=0; m->t[i] != t; i++)
+ assert(i+1 < m->n);
+ r = m->r[i];
+ bsclr(m->b, t);
+ bsclr(m->b, r);
+ m->n--;
+ memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
+ memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
+ return r;
+}
+
+static void
+mdump(RMap *m)
+{
+ int i;
+
+ for (i=0; i<m->n; i++)
+ fprintf(stderr, " (%s, R%d)",
+ tmp[m->t[i]].name,
+ m->r[i]);
+ fprintf(stderr, "\n");
+}
+
+static void
+pmadd(Ref src, Ref dst, int k)
+{
+ if (npm == cpm) {
+ cpm = cpm * 2 + 16;
+ pm = realloc(pm, cpm * sizeof pm[0]);
+ if (!pm)
+ diag("pmadd: out of memory");
+ }
+ pm[npm].src = src;
+ pm[npm].dst = dst;
+ pm[npm].cls = k;
+ npm++;
+}
+
+enum PMStat { ToMove, Moving, Moved };
+
+static Ref
+pmrec(enum PMStat *status, int i, int *k)
+{
+ Ref swp, swp1;
+ int j, k1;
+
+ /* note, this routine might emit
+ * too many large instructions:
+ *
+ * , x -- x
+ * x -- x -- x |
+ * ` x -- x
+ *
+ * if only the first move is wide
+ * the whole cycle will be wide,
+ * this is safe but not necessary
+ */
+
+ if (req(pm[i].src, pm[i].dst))
+ return R;
+ status[i] = Moving;
+ assert(KBASE(*k) == KBASE(pm[i].cls));
+ assert((Kw|1) == Kl && (Ks|1) == Kd);
+ *k |= KWIDE(pm[i].cls); /* see above */
+ swp = R;
+ for (j=0; j<npm; j++) {
+ if (req(pm[j].src, pm[i].dst))
+ switch (status[j]) {
+ case ToMove:
+ k1 = *k;
+ swp1 = pmrec(status, j, &k1);
+ if (!req(swp1, R)) {
+ assert(req(swp, R));
+ swp = swp1;
+ *k = k1;
+ }
+ break;
+ case Moving:
+ assert(req(swp, R));
+ swp = pm[i].dst;
+ break;
+ case Moved:
+ break;
+ }
+ }
+ status[i] = Moved;
+ if (req(swp, R)) {
+ *curi++ = (Ins){OCopy, pm[i].dst, {pm[i].src}, pm[i].cls};
+ return R;
+ } else if (!req(swp, pm[i].src)) {
+ *curi++ = (Ins){OSwap, R, {pm[i].src, pm[i].dst}, *k};
+ return swp;
+ } else
+ return R;
+
+}
+
+static void
+pmgen()
+{
+ int i, k;
+ enum PMStat *status;
+
+ status = alloc(npm * sizeof status[0]);
+ assert(!npm || status[npm-1] == ToMove);
+ curi = insb;
+ for (i=0; i<npm; i++)
+ if (status[i] == ToMove) {
+ k = pm[i].cls;
+ pmrec(status, i, &k);
+ }
+}
+
+static void
+move(int r, Ref to, RMap *m)
+{
+ int n, t, r1;
+
+ r1 = req(to, R) ? -1 : rfree(m, to.val);
+ if (bshas(m->b, r) && r1 != r) {
+ /* r is used and not by to */
+ for (n=0; m->r[n] != r; n++)
+ assert(n+1 < m->n);
+ t = m->t[n];
+ rfree(m, t);
+ bsset(m->b, r);
+ ralloc(m, t);
+ bsclr(m->b, r);
+ }
+ t = req(to, R) ? r : to.val;
+ radd(m, t, r);
+}
+
+static int
+regcpy(Ins *i)
+{
+ return i->op == OCopy && isreg(i->arg[0]);
+}
+
+static Ins *
+dopm(Blk *b, Ins *i, RMap *m)
+{
+ RMap m0;
+ int n, r, r1, t, s;
+ Ins *i0, *i1, *ip, *ir;
+ bits def;
+
+ m0 = *m;
+ i1 = ++i;
+ do {
+ i--;
+ move(i->arg[0].val, i->to, m);
+ } while (i != b->ins && regcpy(i-1));
+ assert(m0.n <= m->n);
+ if (i != b->ins && (i-1)->op == OCall) {
+ def = retregs((i-1)->arg[1], 0);
+ for (r=0; r<NRSave; r++)
+ if (!(BIT(rsave[r]) & def))
+ move(rsave[r], R, m);
+ }
+ for (npm=0, n=0; n<m->n; n++) {
+ t = m->t[n];
+ s = tmp[t].slot;
+ r1 = m->r[n];
+ r = rfind(&m0, t);
+ if (r != -1)
+ pmadd(TMP(r1), TMP(r), tmp[t].cls);
+ else if (s != -1)
+ pmadd(TMP(r1), SLOT(s), tmp[t].cls);
+ }
+ for (ip=i; ip<i1; ip++) {
+ if (!req(ip->to, R))
+ rfree(m, ip->to.val);
+ r = ip->arg[0].val;
+ if (rfind(m, r) == -1)
+ radd(m, r, r);
+ }
+ pmgen();
+#ifdef TEST_PMOV
+ return 0;
+#endif
+ n = b->nins - (i1 - i) + (curi - insb);
+ i0 = alloc(n * sizeof(Ins));
+ ip = icpy(ip = i0, b->ins, i - b->ins);
+ ip = icpy(ir = ip, insb, curi - insb);
+ ip = icpy(ip, i1, &b->ins[b->nins] - i1);
+ b->nins = n;
+ b->ins = i0;
+ return ir;
+}
+
+static int
+prio(Ref r1, Ref r2)
+{
+ /* trivial heuristic to begin with,
+ * later we can use the distance to
+ * the definition instruction
+ */
+ (void) r2;
+ return *hint(r1.val) != -1;
+}
+
+static void
+insert(Ref *r, Ref **rs, int p)
+{
+ int i;
+
+ rs[i = p] = r;
+ while (i-- > 0 && prio(*r, *rs[i])) {
+ rs[i+1] = rs[i];
+ rs[i] = r;
+ }
+}
+
+static void
+doblk(Blk *b, RMap *cur)
+{
+ int x, r, nr;
+ bits rs;
+ Ins *i;
+ Mem *m;
+ Ref *ra[4];
+
+ if (rtype(b->jmp.arg) == RTmp)
+ b->jmp.arg = ralloc(cur, b->jmp.arg.val);
+ else if (rtype(b->jmp.arg) == RACall) {
+ /* add return registers */
+ rs = retregs(b->jmp.arg, 0);
+ for (r=0; rs; rs/=2, r++)
+ if (rs & 1)
+ radd(cur, r, r);
+ }
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ switch ((--i)->op) {
+ case OCall:
+ rs = argregs(i->arg[1], 0);
+ for (r=0; r<NRSave; r++)
+ if (!(BIT(rsave[r]) & rs))
+ rfree(cur, rsave[r]);
+ break;
+ case OCopy:
+ if (isreg(i->arg[0])) {
+ i = dopm(b, i, cur);
+ continue;
+ }
+ if (isreg(i->to))
+ if (rtype(i->arg[0]) == RTmp)
+ sethint(i->arg[0].val, i->to.val);
+ /* fall through */
+ default:
+ if (!req(i->to, R)) {
+ assert(rtype(i->to) == RTmp);
+ r = rfree(cur, i->to.val);
+ if (r == -1 && !isreg(i->to)) {
+ *i = (Ins){.op = ONop};
+ continue;
+ }
+ if (i->to.val >= Tmp0)
+ i->to = TMP(r);
+ }
+ break;
+ }
+ for (x=0, nr=0; x<2; x++)
+ switch (rtype(i->arg[x])) {
+ case RAMem:
+ m = &mem[i->arg[x].val & AMask];
+ if (rtype(m->base) == RTmp)
+ insert(&m->base, ra, nr++);
+ if (rtype(m->index) == RTmp)
+ insert(&m->index, ra, nr++);
+ break;
+ case RTmp:
+ insert(&i->arg[x], ra, nr++);
+ break;
+ }
+ for (r=0; r<nr; r++)
+ *ra[r] = ralloc(cur, ra[r]->val);
+ }
+}
+
+/* register allocation
+ * depends on rpo, phi, cost, (and obviously spill)
+ */
+void
+rega(Fn *fn)
+{
+ int j, n, t, r, r1, x, rl[Tmp0];
+ Blk *b, *b1, *s, ***ps, *blist;
+ RMap *end, *beg, cur, old;
+ Ins *i;
+ Phi *p;
+ uint u;
+ Ref src, dst;
+
+ /* 1. setup */
+ regu = 0;
+ tmp = fn->tmp;
+ mem = fn->mem;
+ end = alloc(fn->nblk * sizeof end[0]);
+ beg = alloc(fn->nblk * sizeof beg[0]);
+ for (n=0; n<fn->nblk; n++) {
+ bsinit(end[n].b, fn->ntmp);
+ bsinit(beg[n].b, fn->ntmp);
+ }
+ bsinit(cur.b, fn->ntmp);
+ bsinit(old.b, fn->ntmp);
+
+ for (t=Tmp0; t<fn->ntmp; t++)
+ *hint(t) = -1;
+ for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
+ if (i->op != OCopy || !isreg(i->arg[0]))
+ break;
+ else {
+ assert(rtype(i->to) == RTmp);
+ sethint(i->to.val, i->arg[0].val);
+ }
+
+ /* 2. assign registers following post-order */
+ for (n=fn->nblk-1; n>=0; n--) {
+ b = fn->rpo[n];
+ cur.n = 0;
+ bszero(cur.b);
+ for (x=0; x<2; x++)
+ for (t=Tmp0; t<fn->ntmp; t++) {
+ assert(bshas(b->out, t) ||
+ !bshas(cur.b, t));
+ if (bshas(b->out, t))
+ if (!bshas(cur.b, t))
+ if (x || (r=*hint(t)) != -1)
+ if (x || !bshas(cur.b, r))
+ ralloc(&cur, t);
+ }
+ rcopy(&end[n], &cur);
+ doblk(b, &cur);
+ bscopy(b->in, cur.b);
+ for (p=b->phi; p; p=p->link)
+ if (rtype(p->to) == RTmp) {
+ bsclr(b->in, p->to.val);
+ /* heuristic 0:
+ * if the phi destination has an
+ * argument from a frequent block
+ * that was already allocated to
+ * 'r', use 'r' as the new hint
+ */
+ memset(rl, 0, sizeof rl);
+ for (u=0; u<p->narg; u++) {
+ t = p->arg[u].val;
+ b1 = p->blk[u];
+ if (rtype(p->arg[u]) == RTmp)
+ if ((r=rfind(&end[b1->id], t)) != -1)
+ rl[r] += b1->loop;
+ }
+ for (x=0, j=0; j<Tmp0; j++)
+ if (rl[j] > rl[x])
+ x = j;
+ if (rl[x] >= b->loop)
+ *hint(p->to.val) = x;
+ }
+ if (b->npred > 1) {
+ /* heuristic 1:
+ * attempt to satisfy hints
+ * when it's simple and we have
+ * multiple predecessors
+ */
+ rcopy(&old, &cur);
+ curi = &insb[NIns];
+ for (j=0; j<old.n; j++) {
+ t = old.t[j];
+ r = *hint(t);
+ r1 = rfind(&cur, t);
+ if (r != -1 && r != r1)
+ if (!bshas(cur.b, r)) {
+ rfree(&cur, t);
+ radd(&cur, t, r);
+ x = tmp[t].cls;
+ emit(OCopy, x, TMP(r1), TMP(r), R);
+ }
+ }
+ if ((j = &insb[NIns] - curi)) {
+ b->nins += j;
+ i = alloc(b->nins * sizeof(Ins));
+ icpy(icpy(i, curi, j), b->ins, b->nins-j);
+ b->ins = i;
+ }
+ }
+ rcopy(&beg[n], &cur);
+ }
+ if (debug['R']) {
+ fprintf(stderr, "\n> Register mappings:\n");
+ for (n=0; n<fn->nblk; n++) {
+ b = fn->rpo[n];
+ fprintf(stderr, "\t%-10s beg", b->name);
+ mdump(&beg[n]);
+ fprintf(stderr, "\t end");
+ mdump(&end[n]);
+ }
+ fprintf(stderr, "\n");
+ }
+
+ /* 3. compose glue code */
+ blist = 0;
+ for (b=fn->start;; b=b->link) {
+ ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}};
+ for (; (s=**ps); ps++) {
+ npm = 0;
+ for (p=s->phi; p; p=p->link) {
+ dst = p->to;
+ assert(rtype(dst)==RSlot || rtype(dst)==RTmp);
+ if (rtype(dst) == RTmp) {
+ r = rfind(&beg[s->id], dst.val);
+ if (r == -1)
+ continue;
+ dst = TMP(r);
+ }
+ for (u=0; p->blk[u]!=b; u++)
+ assert(u+1 < p->narg);
+ src = p->arg[u];
+ if (rtype(src) == RTmp)
+ src = rref(&end[b->id], src.val);
+ pmadd(src, dst, p->cls);
+ }
+ for (t=Tmp0; t<fn->ntmp; t++)
+ if (bshas(s->in, t)) {
+ src = rref(&end[b->id], t);
+ dst = rref(&beg[s->id], t);
+ pmadd(src, dst, tmp[t].cls);
+ }
+ pmgen();
+ if (curi == insb)
+ continue;
+ b1 = blknew();
+ b1->loop = (b->loop+s->loop) / 2;
+ b1->link = blist;
+ blist = b1;
+ fn->nblk++;
+ sprintf(b1->name, "%s_%s", b->name, s->name);
+ b1->nins = curi - insb;
+ idup(&b1->ins, insb, b1->nins);
+ b1->jmp.type = JJmp;
+ b1->s1 = s;
+ **ps = b1;
+ }
+ if (!b->link) {
+ b->link = blist;
+ break;
+ }
+ }
+ for (b=fn->start; b; b=b->link)
+ b->phi = 0;
+ fn->reg = regu;
+
+ if (debug['R']) {
+ fprintf(stderr, "\n> After register allocation:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/src/spill.c b/src/spill.c
@@ -0,0 +1,507 @@
+#include "all.h"
+
+static void
+loopmark(Blk *hd, Blk *b, Phi *p)
+{
+ int k, head;
+ uint n, a;
+
+ head = hd->id;
+ if (b->id < head)
+ return;
+ for (; p; p=p->link)
+ for (a=0; a<p->narg; a++)
+ if (p->blk[a] == b)
+ if (rtype(p->arg[a]) == RTmp)
+ bsset(hd->gen, p->arg[a].val);
+ if (b->visit == head)
+ return;
+ b->visit = head;
+ b->loop *= 10;
+ /* aggregate looping information at
+ * loop headers */
+ bsunion(hd->gen, b->gen);
+ for (k=0; k<2; k++)
+ if (b->nlive[k] > hd->nlive[k])
+ hd->nlive[k] = b->nlive[k];
+ for (n=0; n<b->npred; n++)
+ loopmark(hd, b->pred[n], b->phi);
+}
+
+static void
+tmpuse(Ref r, int use, int loop, Fn *fn)
+{
+ Mem *m;
+ Tmp *t;
+
+ if (rtype(r) == RAMem) {
+ m = &fn->mem[r.val & AMask];
+ tmpuse(m->base, 1, loop, fn);
+ tmpuse(m->index, 1, loop, fn);
+ }
+ else if (rtype(r) == RTmp && r.val >= Tmp0) {
+ t = &fn->tmp[r.val];
+ t->nuse += use;
+ t->ndef += !use;
+ t->cost += loop;
+ }
+}
+
+/* evaluate spill costs of temporaries,
+ * this also fills usage information
+ * requires rpo, preds
+ */
+void
+fillcost(Fn *fn)
+{
+ int n, hd;
+ uint a;
+ Blk *b;
+ Ins *i;
+ Tmp *t;
+ Phi *p;
+
+ for (b=fn->start; b; b=b->link) {
+ b->loop = 1;
+ b->visit = -1;
+ }
+ if (debug['S'])
+ fprintf(stderr, "\n> Loop information:\n");
+ for (n=0; n<fn->nblk; n++) {
+ b = fn->rpo[n];
+ hd = 0;
+ for (a=0; a<b->npred; a++)
+ if (b->pred[a]->id >= n) {
+ loopmark(b, b->pred[a], b->phi);
+ hd = 1;
+ }
+ if (hd && debug['S']) {
+ fprintf(stderr, "\t%-10s", b->name);
+ fprintf(stderr, " (% 3d ", b->nlive[0]);
+ fprintf(stderr, "% 3d) ", b->nlive[1]);
+ dumpts(b->gen, fn->tmp, stderr);
+ }
+ }
+ for (t=fn->tmp; t-fn->tmp < fn->ntmp; t++) {
+ t->cost = t-fn->tmp < Tmp0 ? 1e6 : 0;
+ t->nuse = 0;
+ t->ndef = 0;
+ }
+ for (b=fn->start; b; b=b->link) {
+ for (p=b->phi; p; p=p->link) {
+ /* todo, the cost computation
+ * for p->to is not great... */
+ tmpuse(p->to, 0, 0, fn);
+ for (a=0; a<p->narg; a++) {
+ n = p->blk[a]->loop;
+ assert(b->npred==p->narg &&
+ "wrong cfg");
+ n /= b->npred;
+ tmpuse(p->arg[a], 1, n, fn);
+ }
+ }
+ n = b->loop;
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ tmpuse(i->to, 0, n, fn);
+ tmpuse(i->arg[0], 1, n, fn);
+ tmpuse(i->arg[1], 1, n, fn);
+ }
+ tmpuse(b->jmp.arg, 1, n, fn);
+ }
+ if (debug['S']) {
+ fprintf(stderr, "\n> Spill costs:\n");
+ for (n=Tmp0; n<fn->ntmp; n++)
+ fprintf(stderr, "\t%-10s %d\n",
+ fn->tmp[n].name,
+ fn->tmp[n].cost);
+ fprintf(stderr, "\n");
+ }
+}
+
+static BSet *fst; /* temps to prioritize in registers (for tcmp1) */
+static Tmp *tmp; /* current temporaries (for tcmpX) */
+static int ntmp; /* current # of temps (for limit) */
+static int locs; /* stack size used by locals */
+static int slot4; /* next slot of 4 bytes */
+static int slot8; /* ditto, 8 bytes */
+static BSet mask[2][1]; /* class masks */
+
+static int
+tcmp0(const void *pa, const void *pb)
+{
+ return tmp[*(int *)pb].cost - tmp[*(int *)pa].cost;
+}
+
+static int
+tcmp1(const void *pa, const void *pb)
+{
+ int c;
+
+ c = bshas(fst, *(int *)pb) - bshas(fst, *(int *)pa);
+ return c ? c : tcmp0(pa, pb);
+}
+
+static Ref
+slot(int t)
+{
+ int s;
+
+ if (t < Tmp0)
+ diag("spill: cannot spill register");
+ s = tmp[t].slot;
+ if (s == -1) {
+ assert(NAlign == 3);
+ /* nice logic to pack stack slots
+ * on demand, there can be only
+ * one hole and slot4 points to it
+ *
+ * invariant: slot4 <= slot8
+ */
+ if (KWIDE(tmp[t].cls)) {
+ s = slot8;
+ if (slot4 == slot8)
+ slot4 += 2;
+ slot8 += 2;
+ } else {
+ s = slot4;
+ if (slot4 == slot8) {
+ slot8 += 2;
+ slot4 += 1;
+ } else
+ slot4 = slot8;
+ }
+ s += locs;
+ tmp[t].slot = s;
+ }
+ return SLOT(s);
+}
+
+static void
+limit(BSet *b, int k, BSet *f)
+{
+ static int *tarr, maxt;
+ int i, nt;
+ uint t;
+
+ nt = bscount(b);
+ if (nt <= k)
+ return;
+ if (nt > maxt) {
+ free(tarr);
+ tarr = emalloc(nt * sizeof tarr[0]);
+ maxt = nt;
+ }
+ for (i=0, t=0; bsiter(b, &t); t++) {
+ bsclr(b, t);
+ tarr[i++] = t;
+ }
+ if (!f)
+ qsort(tarr, nt, sizeof tarr[0], tcmp0);
+ else {
+ fst = f;
+ qsort(tarr, nt, sizeof tarr[0], tcmp1);
+ }
+ for (i=0; i<k && i<nt; i++)
+ bsset(b, tarr[i]);
+ for (; i<nt; i++)
+ slot(tarr[i]);
+}
+
+static void
+limit2(BSet *b1, int k1, int k2, BSet *fst)
+{
+ BSet b2[1];
+
+ bsinit(b2, ntmp); /* todo, free those */
+ bscopy(b2, b1);
+ bsinter(b1, mask[0]);
+ bsinter(b2, mask[1]);
+ limit(b1, NIReg - k1, fst);
+ limit(b2, NFReg - k2, fst);
+ bsunion(b1, b2);
+}
+
+static void
+sethint(BSet *u, bits r)
+{
+ uint t;
+
+ for (t=Tmp0; bsiter(u, &t); t++)
+ tmp[phicls(t, tmp)].hint.m |= r;
+}
+
+static void
+reloads(BSet *u, BSet *v)
+{
+ uint t;
+
+ for (t=Tmp0; bsiter(u, &t); t++)
+ if (!bshas(v, t))
+ emit(OLoad, tmp[t].cls, TMP(t), slot(t), R);
+}
+
+static void
+store(Ref r, int s)
+{
+ static int kstore[] = {
+ [Kw] = OStorew, [Kl] = OStorel,
+ [Ks] = OStores, [Kd] = OStored,
+ };
+
+ if (s != -1)
+ emit(kstore[tmp[r.val].cls], 0, R, r, SLOT(s));
+}
+
+static int
+regcpy(Ins *i)
+{
+ return i->op == OCopy && isreg(i->arg[0]);
+}
+
+static Ins *
+dopm(Blk *b, Ins *i, BSet *v)
+{
+ int n, t;
+ BSet u[1];
+ Ins *i1;
+ bits r;
+
+ bsinit(u, ntmp); /* todo, free those */
+ /* consecutive copies from
+ * registers need to be handled
+ * as one large instruction
+ *
+ * fixme: there is an assumption
+ * that calls are always followed
+ * by copy instructions here, this
+ * might not be true if previous
+ * passes change
+ */
+ i1 = ++i;
+ do {
+ i--;
+ t = i->to.val;
+ if (!req(i->to, R))
+ if (bshas(v, t)) {
+ bsclr(v, t);
+ store(i->to, tmp[t].slot);
+ }
+ bsset(v, i->arg[0].val);
+ } while (i != b->ins && regcpy(i-1));
+ bscopy(u, v);
+ if (i != b->ins && (i-1)->op == OCall) {
+ v->t[0] &= ~retregs((i-1)->arg[1], 0);
+ limit2(v, NISave, NFSave, 0);
+ for (r=0, n=0; n<NRSave; n++)
+ r |= BIT(rsave[n]);
+ v->t[0] |= argregs((i-1)->arg[1], 0);
+ } else {
+ limit2(v, 0, 0, 0);
+ r = v->t[0];
+ }
+ sethint(v, r);
+ reloads(u, v);
+ do
+ emiti(*--i1);
+ while (i1 != i);
+ return i;
+}
+
+/* spill code insertion
+ * requires spill costs, rpo, liveness
+ *
+ * Note: this will replace liveness
+ * information (in, out) with temporaries
+ * that must be in registers at block
+ * borders
+ *
+ * Be careful with:
+ * - OCopy instructions to ensure register
+ * constraints
+ */
+void
+spill(Fn *fn)
+{
+ Blk *b, *s1, *s2, *hd, **bp;
+ int j, n, l, t, k, lvarg[2];
+ BSet u[1], v[1], w[1];
+ Ins *i;
+ Phi *p;
+ Mem *m;
+ bits r;
+
+ tmp = fn->tmp;
+ ntmp = fn->ntmp;
+ bsinit(u, ntmp);
+ bsinit(v, ntmp);
+ bsinit(w, ntmp);
+ bsinit(mask[0], ntmp);
+ bsinit(mask[1], ntmp);
+ locs = fn->slot;
+ slot4 = 0;
+ slot8 = 0;
+ for (t=0; t<ntmp; t++) {
+ k = 0;
+ if (t >= XMM0 && t < XMM0 + NFReg)
+ k = 1;
+ else if (t >= Tmp0)
+ k = KBASE(tmp[t].cls);
+ bsset(mask[k], t);
+ }
+
+ for (bp=&fn->rpo[fn->nblk]; bp!=fn->rpo;) {
+ b = *--bp;
+ /* invariant: all bocks with bigger rpo got
+ * their in,out updated. */
+
+ /* 1. find temporaries in registers at
+ * the end of the block (put them in v) */
+ curi = 0;
+ s1 = b->s1;
+ s2 = b->s2;
+ hd = 0;
+ if (s1 && s1->id <= n)
+ hd = s1;
+ if (s2 && s2->id <= n)
+ if (!hd || s2->id >= hd->id)
+ hd = s2;
+ r = 0;
+ bszero(v);
+ if (hd) {
+ /* back-edge */
+ for (k=0; k<2; k++) {
+ n = k == 0 ? NIReg : NFReg;
+ bscopy(u, b->out);
+ bsinter(u, mask[k]);
+ bscopy(w, u);
+ bsinter(u, hd->gen);
+ bsdiff(w, hd->gen);
+ if ((int)bscount(u) < n) { /* fixme */
+ j = bscount(w); /* live through */
+ l = hd->nlive[k];
+ limit(w, n - (l - j), 0);
+ bsunion(u, w);
+ } else
+ limit(u, n, 0);
+ bsunion(v, u);
+ }
+ } else if (s1) {
+ liveon(v, b, s1);
+ if (s2) {
+ liveon(u, b, s2);
+ bscopy(w, u);
+ bsinter(w, v);
+ bsunion(v, u);
+ }
+ limit2(v, 0, 0, w);
+ } else if (rtype(b->jmp.arg) == RACall) {
+ /* return */
+ r = retregs(b->jmp.arg, 0);
+ v->t[0] |= r;
+ }
+ bscopy(b->out, v);
+
+ /* 2. process the block instructions */
+ curi = &insb[NIns];
+ for (i=&b->ins[b->nins]; i!=b->ins;) {
+ i--;
+ if (regcpy(i)) {
+ i = dopm(b, i, v);
+ continue;
+ }
+ bszero(w);
+ if (!req(i->to, R)) {
+ assert(rtype(i->to) == RTmp);
+ t = i->to.val;
+ if (bshas(v, t))
+ bsclr(v, t);
+ else {
+ /* make sure we have a reg
+ * for the result */
+ bsset(v, t);
+ bsset(w, t);
+ }
+ }
+ j = opdesc[i->op].nmem;
+ for (n=0; n<2; n++)
+ if (rtype(i->arg[n]) == RAMem)
+ j--;
+ for (n=0; n<2; n++)
+ switch (rtype(i->arg[n])) {
+ case RAMem:
+ t = i->arg[n].val;
+ m = &fn->mem[t & AMask];
+ if (rtype(m->base) == RTmp) {
+ bsset(v, m->base.val);
+ bsset(w, m->base.val);
+ }
+ if (rtype(m->index) == RTmp) {
+ bsset(v, m->index.val);
+ bsset(w, m->index.val);
+ }
+ break;
+ case RTmp:
+ t = i->arg[n].val;
+ lvarg[n] = bshas(v, t);
+ bsset(v, t);
+ if (j-- <= 0)
+ bsset(w, t);
+ break;
+ }
+ bscopy(u, v);
+ limit2(v, 0, 0, w);
+ for (n=0; n<2; n++)
+ if (rtype(i->arg[n]) == RTmp) {
+ t = i->arg[n].val;
+ if (!bshas(v, t)) {
+ /* do not reload if the
+ * the temporary was dead
+ */
+ if (!lvarg[n])
+ bsclr(u, t);
+ i->arg[n] = slot(t);
+ }
+ }
+ reloads(u, v);
+ if (!req(i->to, R)) {
+ t = i->to.val;
+ store(i->to, tmp[t].slot);
+ bsclr(v, t);
+ }
+ emiti(*i);
+ r = v->t[0] & (BIT(Tmp0)-1);
+ if (r)
+ sethint(v, r);
+ }
+ assert(!r || b==fn->start);
+
+ for (p=b->phi; p; p=p->link) {
+ assert(rtype(p->to) == RTmp);
+ t = p->to.val;
+ if (bshas(v, t)) {
+ bsclr(v, t);
+ store(p->to, tmp[t].slot);
+ } else if (bshas(b->in, t))
+ /* only if the phi is live */
+ p->to = slot(p->to.val);
+ }
+ bscopy(b->in, v);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ }
+
+ /* align the locals to a 16 byte boundary */
+ assert(NAlign == 3);
+ slot8 += slot8 & 3;
+ fn->slot += slot8;
+
+ if (debug['S']) {
+ fprintf(stderr, "\n> Block information:\n");
+ for (b=fn->start; b; b=b->link) {
+ printf("\t%-10s (% 5d) ", b->name, b->loop);
+ dumpts(b->out, fn->tmp, stdout);
+ }
+ fprintf(stderr, "\n> After spilling:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/src/ssa.c b/src/ssa.c
@@ -0,0 +1,516 @@
+#include "all.h"
+#include <stdarg.h>
+
+static void
+adduse(Tmp *tmp, int ty, Blk *b, ...)
+{
+ Use *u;
+ int n;
+ va_list ap;
+
+ va_start(ap, b);
+ n = tmp->nuse;
+ vgrow(&tmp->use, ++tmp->nuse);
+ u = &tmp->use[n];
+ u->type = ty;
+ u->bid = b->id;
+ switch (ty) {
+ default:
+ diag("ssa: adduse defaulted");
+ case UPhi:
+ u->u.phi = va_arg(ap, Phi *);
+ break;
+ case UIns:
+ u->u.ins = va_arg(ap, Ins *);
+ break;
+ case UJmp:
+ break;
+ }
+ va_end(ap);
+}
+
+/* fill usage, phi, and class information
+ */
+void
+filluse(Fn *fn)
+{
+ Blk *b;
+ Phi *p;
+ Ins *i;
+ int m, t;
+ uint a;
+ Tmp *tmp;
+
+ /* todo, is this the correct file? */
+ tmp = fn->tmp;
+ for (t=0; t<fn->ntmp; t++) {
+ tmp[t].ndef = 0;
+ tmp[t].nuse = 0;
+ tmp[t].phi = 0;
+ tmp[t].cls = 0;
+ if (tmp[t].use == 0)
+ tmp[t].use = vnew(0, sizeof(Use));
+ }
+ for (b=fn->start; b; b=b->link) {
+ for (p=b->phi; p; p=p->link) {
+ assert(rtype(p->to) == RTmp);
+ t = p->to.val;
+ tmp[t].ndef++;
+ tmp[t].cls = p->cls;
+ tmp[t].phi = p->to.val;
+ for (a=0; a<p->narg; a++)
+ if (rtype(p->arg[a]) == RTmp) {
+ t = p->arg[a].val;
+ adduse(&tmp[t], UPhi, b, p);
+ if (!tmp[t].phi)
+ tmp[t].phi = p->to.val;
+ }
+ }
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ if (!req(i->to, R)) {
+ assert(rtype(i->to) == RTmp);
+ t = i->to.val;
+ tmp[t].ndef++;
+ tmp[t].cls = i->cls;
+ }
+ for (m=0; m<2; m++)
+ if (rtype(i->arg[m]) == RTmp) {
+ t = i->arg[m].val;
+ adduse(&tmp[t], UIns, b, i);
+ }
+ }
+ if (rtype(b->jmp.arg) == RTmp)
+ adduse(&tmp[b->jmp.arg.val], UJmp, b);
+ }
+}
+
+static void
+addpred(Blk *bp, Blk *bc)
+{
+ uint i;
+
+ if (!bc->pred) {
+ bc->pred = alloc(bc->npred * sizeof bc->pred[0]);
+ for (i=0; i<bc->npred; i++)
+ bc->pred[i] = 0;
+ }
+ for (i=0; bc->pred[i]; i++)
+ ;
+ bc->pred[i] = bp;
+}
+
+/* fill predecessors information in blocks
+ */
+void
+fillpreds(Fn *f)
+{
+ Blk *b;
+
+ for (b=f->start; b; b=b->link) {
+ b->npred = 0;
+ b->pred = 0;
+ }
+ for (b=f->start; b; b=b->link) {
+ if (b->s1)
+ b->s1->npred++;
+ if (b->s2)
+ b->s2->npred++;
+ }
+ for (b=f->start; b; b=b->link) {
+ if (b->s1)
+ addpred(b, b->s1);
+ if (b->s2)
+ addpred(b, b->s2);
+ }
+}
+
+static int
+rporec(Blk *b, int x)
+{
+ Blk *s1, *s2;
+
+ if (!b || b->id >= 0)
+ return x;
+ b->id = 1;
+ s1 = b->s1;
+ s2 = b->s2;
+ if (s1 && s2 && s1->loop > s2->loop) {
+ s1 = b->s2;
+ s2 = b->s1;
+ }
+ x = rporec(s1, x);
+ x = rporec(s2, x);
+ b->id = x;
+ assert(x >= 0);
+ return x - 1;
+}
+
+/* fill the rpo information in blocks
+ */
+void
+fillrpo(Fn *f)
+{
+ int n;
+ Blk *b, **p;
+
+ for (b=f->start; b; b=b->link)
+ b->id = -1;
+ n = 1 + rporec(f->start, f->nblk-1);
+ f->nblk -= n;
+ f->rpo = alloc(f->nblk * sizeof f->rpo[0]);
+ for (p=&f->start; *p;) {
+ b = *p;
+ if (b->id == -1) {
+ *p = b->link;
+ /* todo, free block */
+ } else {
+ b->id -= n;
+ f->rpo[b->id] = b;
+ p=&(*p)->link;
+ }
+ }
+}
+
+/* for dominators computation, read
+ * "A Simple, Fast Dominance Algorithm"
+ * by K. Cooper, T. Harvey, and K. Kennedy.
+ */
+
+static Blk *
+inter(Blk *b1, Blk *b2)
+{
+ Blk *bt;
+
+ if (b1 == 0)
+ return b2;
+ while (b1 != b2) {
+ if (b1->id < b2->id) {
+ bt = b1;
+ b1 = b2;
+ b2 = bt;
+ }
+ while (b1->id > b2->id) {
+ b1 = b1->idom;
+ assert(b1);
+ }
+ }
+ return b1;
+}
+
+static void
+filldom(Fn *fn)
+{
+ Blk *b, *d;
+ int ch, n;
+ uint p;
+
+ for (b=fn->start; b; b=b->link) {
+ b->idom = 0;
+ b->dom = 0;
+ b->dlink = 0;
+ }
+ do {
+ ch = 0;
+ for (n=1; n<fn->nblk; n++) {
+ b = fn->rpo[n];
+ d = 0;
+ for (p=0; p<b->npred; p++)
+ if (b->pred[p]->idom
+ || b->pred[p] == fn->start)
+ d = inter(d, b->pred[p]);
+ if (d != b->idom) {
+ ch++;
+ b->idom = d;
+ }
+ }
+ } while (ch);
+ for (b=fn->start; b; b=b->link)
+ if ((d=b->idom)) {
+ assert(d != b);
+ b->dlink = d->dom;
+ d->dom = b;
+ }
+}
+
+static int
+sdom(Blk *b1, Blk *b2)
+{
+ assert(b1 && b2);
+ if (b1 == b2)
+ return 0;
+ while (b2->id > b1->id)
+ b2 = b2->idom;
+ return b1 == b2;
+}
+
+static int
+dom(Blk *b1, Blk *b2)
+{
+ return b1 == b2 || sdom(b1, b2);
+}
+
+static void
+addfron(Blk *a, Blk *b)
+{
+ int n;
+
+ for (n=0; n<a->nfron; n++)
+ if (a->fron[n] == b)
+ return;
+ if (!a->nfron)
+ a->fron = vnew(++a->nfron, sizeof a->fron[0]);
+ else
+ vgrow(&a->fron, ++a->nfron);
+ a->fron[a->nfron-1] = b;
+}
+
+static void
+fillfron(Fn *fn)
+{
+ Blk *a, *b;
+
+ for (b=fn->start; b; b=b->link) {
+ if (b->s1)
+ for (a=b; !sdom(a, b->s1); a=a->idom)
+ addfron(a, b->s1);
+ if (b->s2)
+ for (a=b; !sdom(a, b->s2); a=a->idom)
+ addfron(a, b->s2);
+ }
+}
+
+static Ref
+refindex(int t, Fn *fn)
+{
+ return newtmp(fn->tmp[t].name, fn->tmp[t].cls, fn);
+}
+
+static void
+phiins(Fn *fn)
+{
+ BSet u[1], defs[1];
+ Blk *a, *b, **blist, **be, **bp;
+ Ins *i;
+ Phi *p;
+ Ref r;
+ int t, n, k, nt;
+
+ bsinit(u, fn->nblk);
+ bsinit(defs, fn->nblk);
+ blist = emalloc(fn->nblk * sizeof blist[0]);
+ be = &blist[fn->nblk];
+ nt = fn->ntmp;
+ for (t=Tmp0; t<nt; t++) {
+ fn->tmp[t].visit = 0;
+ if (fn->tmp[t].phi != 0)
+ continue;
+ bszero(u);
+ k = -1;
+ bp = be;
+ for (b=fn->start; b; b=b->link) {
+ b->visit = 0;
+ r = R;
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ if (!req(r, R)) {
+ if (req(i->arg[0], TMP(t)))
+ i->arg[0] = r;
+ if (req(i->arg[1], TMP(t)))
+ i->arg[1] = r;
+ }
+ if (req(i->to, TMP(t))) {
+ if (!bshas(b->out, t)) {
+ if (fn->tmp[t].ndef == 1)
+ r = TMP(t);
+ else
+ r = refindex(t, fn);
+ i->to = r;
+ } else {
+ if (!bshas(u, b->id)) {
+ bsset(u, b->id);
+ *--bp = b;
+ }
+ if (k == -1)
+ k = i->cls;
+ assert(k == i->cls);
+ }
+ }
+ }
+ if (!req(r, R) && req(b->jmp.arg, TMP(t)))
+ b->jmp.arg = r;
+ }
+ bscopy(defs, u);
+ while (bp != be) {
+ fn->tmp[t].visit = t;
+ b = *bp++;
+ bsclr(u, b->id);
+ for (n=0; n<b->nfron; n++) {
+ a = b->fron[n];
+ if (a->visit++ == 0)
+ if (bshas(a->in, t)) {
+ p = alloc(sizeof *p);
+ p->cls = k;
+ p->to = TMP(t);
+ p->link = a->phi;
+ a->phi = p;
+ if (!bshas(defs, a->id))
+ if (!bshas(u, a->id)) {
+ bsset(u, a->id);
+ *--bp = a;
+ }
+ }
+ }
+ }
+ }
+ free(blist);
+}
+
+typedef struct Name Name;
+struct Name {
+ Ref r;
+ Blk *b;
+ Name *up;
+};
+
+static Name *namel;
+
+static Name *
+nnew(Ref r, Blk *b, Name *up)
+{
+ Name *n;
+
+ if (namel) {
+ n = namel;
+ namel = n->up;
+ } else
+ /* could use alloc, here
+ * but namel should be reset
+ */
+ n = emalloc(sizeof *n);
+ n->r = r;
+ n->b = b;
+ n->up = up;
+ return n;
+}
+
+static void
+nfree(Name *n)
+{
+ n->up = namel;
+ namel = n;
+}
+
+static void
+rendef(Ref *r, Blk *b, Name **stk, Fn *fn)
+{
+ Ref r1;
+ int t;
+
+ t = r->val;
+ if (req(*r, R) || !fn->tmp[t].visit)
+ return;
+ r1 = refindex(t, fn);
+ fn->tmp[r1.val].visit = t;
+ stk[t] = nnew(r1, b, stk[t]);
+ *r = r1;
+}
+
+static Ref
+getstk(int t, Blk *b, Name **stk)
+{
+ Name *n, *n1;
+
+ n = stk[t];
+ while (n && !dom(n->b, b)) {
+ n1 = n;
+ n = n->up;
+ nfree(n1);
+ }
+ stk[t] = n;
+ if (!n) {
+ /* uh, oh, warn */
+ return CON_Z;
+ } else
+ return n->r;
+}
+
+static void
+renblk(Blk *b, Name **stk, Fn *fn)
+{
+ Phi *p;
+ Ins *i;
+ Blk *s, **ps, *succ[3];
+ int t, m;
+
+ for (p=b->phi; p; p=p->link)
+ rendef(&p->to, b, stk, fn);
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ for (m=0; m<2; m++) {
+ t = i->arg[m].val;
+ if (rtype(i->arg[m]) == RTmp)
+ if (fn->tmp[t].visit)
+ i->arg[m] = getstk(t, b, stk);
+ }
+ rendef(&i->to, b, stk, fn);
+ }
+ t = b->jmp.arg.val;
+ if (rtype(b->jmp.arg) == RTmp)
+ if (fn->tmp[t].visit)
+ b->jmp.arg = getstk(t, b, stk);
+ succ[0] = b->s1;
+ succ[1] = b->s2;
+ succ[2] = 0;
+ for (ps=succ; (s=*ps); ps++)
+ for (p=s->phi; p; p=p->link) {
+ t = p->to.val;
+ if ((t=fn->tmp[t].visit)) {
+ m = p->narg++;
+ if (m == NPred)
+ diag("ssa: too many phi arguments");
+ p->arg[m] = getstk(t, b, stk);
+ p->blk[m] = b;
+ }
+ }
+ for (s=b->dom; s; s=s->dlink)
+ renblk(s, stk, fn);
+}
+
+/* require ndef */
+void
+ssa(Fn *fn)
+{
+ Name **stk, *n;
+ int d, nt;
+ Blk *b, *b1;
+
+ nt = fn->ntmp;
+ stk = emalloc(nt * sizeof stk[0]);
+ d = debug['L'];
+ debug['L'] = 0;
+ filldom(fn);
+ if (debug['N']) {
+ fprintf(stderr, "\n> Dominators:\n");
+ for (b1=fn->start; b1; b1=b1->link) {
+ if (!b1->dom)
+ continue;
+ fprintf(stderr, "%10s:", b1->name);
+ for (b=b1->dom; b; b=b->dlink)
+ fprintf(stderr, " %s", b->name);
+ fprintf(stderr, "\n");
+ }
+ }
+ fillfron(fn);
+ filllive(fn);
+ phiins(fn);
+ renblk(fn->start, stk, fn);
+ while (nt--)
+ while ((n=stk[nt])) {
+ stk[nt] = n->up;
+ nfree(n);
+ }
+ debug['L'] = d;
+ free(stk);
+ if (debug['N']) {
+ fprintf(stderr, "\n> After SSA construction:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/lisc/test/_alt.ssa b/src/test/_alt.ssa
diff --git a/lisc/test/_dragon.ssa b/src/test/_dragon.ssa
diff --git a/lisc/test/_fix1.ssa b/src/test/_fix1.ssa
diff --git a/lisc/test/_fix2.ssa b/src/test/_fix2.ssa
diff --git a/lisc/test/_fix3.ssa b/src/test/_fix3.ssa
diff --git a/lisc/test/_fix4.ssa b/src/test/_fix4.ssa
diff --git a/lisc/test/_live.ssa b/src/test/_live.ssa
diff --git a/lisc/test/_rpo.ssa b/src/test/_rpo.ssa
diff --git a/lisc/test/_spill1.ssa b/src/test/_spill1.ssa
diff --git a/lisc/test/_spill2.ssa b/src/test/_spill2.ssa
diff --git a/lisc/test/_spill3.ssa b/src/test/_spill3.ssa
diff --git a/lisc/test/abi1.ssa b/src/test/abi1.ssa
diff --git a/lisc/test/abi2.ssa b/src/test/abi2.ssa
diff --git a/lisc/test/abi3.ssa b/src/test/abi3.ssa
diff --git a/lisc/test/abi4.ssa b/src/test/abi4.ssa
diff --git a/lisc/test/abi5.ssa b/src/test/abi5.ssa
diff --git a/lisc/test/align.ssa b/src/test/align.ssa
diff --git a/lisc/test/collatz.ssa b/src/test/collatz.ssa
diff --git a/lisc/test/cprime.ssa b/src/test/cprime.ssa
diff --git a/lisc/test/cup.ssa b/src/test/cup.ssa
diff --git a/lisc/test/dark.ssa b/src/test/dark.ssa
diff --git a/lisc/test/double.ssa b/src/test/double.ssa
diff --git a/lisc/test/echo.ssa b/src/test/echo.ssa
diff --git a/lisc/test/eucl.ssa b/src/test/eucl.ssa
diff --git a/lisc/test/euclc.ssa b/src/test/euclc.ssa
diff --git a/lisc/test/fpcnv.ssa b/src/test/fpcnv.ssa
diff --git a/src/test/go.sh b/src/test/go.sh
@@ -0,0 +1,116 @@
+#!/bin/sh
+
+TMP=/tmp/qbe.zzzz
+
+DRV=$TMP.c
+ASM=$TMP.s
+BIN=$TMP.bin
+OUT=$TMP.out
+
+cleanup() {
+ rm -f $DRV $ASM $BIN $OUT
+}
+
+extract() {
+ WHAT="$1"
+ FILE="$2"
+
+ awk "
+ /^# >>> $WHAT/ {
+ p = 1
+ next
+ }
+ /^# <<</ {
+ if (p)
+ p = 0
+ }
+ p
+ " $FILE \
+ | sed -e 's/# //' \
+ | sed -e 's/#$//'
+}
+
+once() {
+ T="$1"
+
+ if ! test -f $T
+ then
+ echo "invalid test file $T" >&2
+ exit 1
+ fi
+
+ echo "$T... "
+
+ if ! ./qbe $T -o $ASM
+ then
+ echo "[qbe fail]"
+ return 1
+ fi
+
+ extract driver $T > $DRV
+ extract output $T > $OUT
+
+ if test -s $DRV
+ then
+ LNK="$DRV $ASM"
+ else
+ LNK="$ASM"
+ fi
+
+ if ! cc -g -o $BIN $LNK
+ then
+ echo "[cc fail]"
+ return 1
+ fi
+
+ if test -s $OUT
+ then
+ $BIN a b c | diff - $OUT
+ RET=$?
+ REASON="output"
+ else
+ $BIN a b c
+ RET=$?
+ REASON="returned $RET"
+ fi
+
+ if test $RET -ne 0
+ then
+ echo "[$REASON fail]"
+ return 1
+ fi
+
+ printf "\033[1A\033[45C[ok]\n"
+}
+
+
+#trap cleanup TERM QUIT
+
+if test -z "$1"
+then
+ echo "usage: test/go.sh {all, SSAFILE}" 2>&1
+ exit 1
+fi
+
+case $1 in
+ "all")
+ F=0
+ for T in test/[!_]*.ssa
+ do
+ once $T
+ F=`expr $F + $?`
+ done
+ if test $F -ge 1
+ then
+ echo
+ echo "$F test(s) failed!"
+ else
+ echo
+ echo "All is fine!"
+ fi
+ ;;
+ *)
+ once $1
+ exit $?
+ ;;
+esac
diff --git a/lisc/test/loop.ssa b/src/test/loop.ssa
diff --git a/lisc/test/mandel.ssa b/src/test/mandel.ssa
diff --git a/lisc/test/max.ssa b/src/test/max.ssa
diff --git a/lisc/test/prime.ssa b/src/test/prime.ssa
diff --git a/lisc/test/puts10.ssa b/src/test/puts10.ssa
diff --git a/lisc/test/sum.ssa b/src/test/sum.ssa
diff --git a/lisc/tools/abi.ml b/src/tools/abi.ml
diff --git a/src/tools/abitest.sh b/src/tools/abitest.sh
@@ -0,0 +1,104 @@
+#!/bin/sh
+
+OCAMLC=/usr/bin/ocamlc
+QBE=`pwd`/qbe
+
+failure() {
+ echo "Failure at stage:" $1 >&2
+ exit 1
+}
+
+cleanup() {
+ rm -fr $TMP
+}
+
+init() {
+ cp tools/abi.ml $TMP
+ pushd $TMP > /dev/null
+
+ cat > Makefile << EOM
+
+.PHONY: test
+test: caller.o callee.o
+ c99 -o \$@ caller.o callee.o
+%.o: %.c
+ c99 -c -o \$@ \$<
+%.o: %.ssa
+ $QBE -o \$*.s \$<
+ c99 -c -o \$@ \$*.s
+
+EOM
+
+ if ! $OCAMLC abi.ml -o gentest
+ then
+ popd > /dev/null
+ cleanup
+ failure "abifuzz compilation"
+ fi
+ popd > /dev/null
+}
+
+once() {
+ if test -z "$3"
+ then
+ $TMP/gentest $TMP $1 $2
+ else
+ $TMP/gentest -s $3 $TMP $1 $2
+ fi
+ make -C $TMP test > /dev/null || failure "building"
+ $TMP/test || failure "runtime"
+}
+
+usage() {
+ echo "usage: abitest.sh [-callssa] [-callc] [-s SEED] [-n ITERATIONS]" >&2
+ exit 1
+}
+
+N=1
+CALLER=c
+CALLEE=ssa
+
+while test -n "$1"
+do
+ case "$1" in
+ "-callssa")
+ ;;
+ "-callc")
+ CALLER=ssa
+ CALLEE=c
+ ;;
+ "-s")
+ test -n "$2" || usage
+ shift
+ SEED="$1"
+ ;;
+ "-n")
+ test -n "$2" || usage
+ shift
+ N="$1"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+ shift
+done
+
+TMP=`mktemp -d abifuzz.XXXXXX`
+
+init
+
+if test -n "$S"
+then
+ once $CALLER $CALLEE $SEED
+else
+ for n in `seq $N`
+ do
+ once $CALLER $CALLEE
+ echo "$n" | grep "00$"
+ done
+fi
+
+echo "All done."
+
+cleanup
diff --git a/lisc/tools/fptox.c b/src/tools/fptox.c
diff --git a/lisc/tools/pmov.c b/src/tools/pmov.c
diff --git a/src/tools/regress.sh b/src/tools/regress.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+for t in test/*
+do
+ printf "Test $t ... "
+
+ ./qbe $t >/tmp/out.0 2>&1
+ ./qbe.1 $t >/tmp/out.1 2>&1
+
+ if diff /tmp/out.0 /tmp/out.1 > /dev/null
+ then
+ echo "OK"
+ else
+ echo "KO"
+ break
+ fi
+done
diff --git a/src/util.c b/src/util.c
@@ -0,0 +1,329 @@
+#include "all.h"
+
+typedef struct Bitset Bitset;
+typedef struct Vec Vec;
+
+struct Vec {
+ ulong mag;
+ size_t esz;
+ ulong cap;
+ union {
+ long long ll;
+ long double ld;
+ void *ptr;
+ } align[];
+};
+
+enum {
+ VMin = 2,
+ VMag = 0xcabba9e,
+ NPtr = 256,
+};
+
+Typ typ[NTyp];
+Ins insb[NIns], *curi;
+
+static void *ptr[NPtr];
+static void **pool = ptr;
+static int nptr = 1;
+
+void
+diag(char *s)
+{
+ fputs(s, stderr);
+ fputc('\n', stderr);
+ abort();
+}
+
+void *
+emalloc(size_t n)
+{
+ void *p;
+
+ p = calloc(1, n);
+ if (!p)
+ diag("emalloc: out of memory");
+ return p;
+}
+
+void *
+alloc(size_t n)
+{
+ void **pp;
+
+ if (n == 0)
+ return 0;
+ if (nptr >= NPtr) {
+ pp = emalloc(NPtr * sizeof(void *));
+ pp[0] = pool;
+ pool = pp;
+ nptr = 1;
+ }
+ return pool[nptr++] = emalloc(n);
+}
+
+void
+freeall()
+{
+ void **pp;
+
+ for (;;) {
+ for (pp = &pool[1]; pp < &pool[nptr]; pp++)
+ free(*pp);
+ pp = pool[0];
+ if (!pp)
+ break;
+ free(pool);
+ pool = pp;
+ nptr = NPtr;
+ }
+ nptr = 1;
+}
+
+Blk *
+blknew()
+{
+ static Blk z;
+ Blk *b;
+
+ b = alloc(sizeof *b);
+ *b = z;
+ return b;
+}
+
+void
+emit(int op, int k, Ref to, Ref arg0, Ref arg1)
+{
+ if (curi == insb)
+ diag("emit: too many instructions");
+ *--curi = (Ins){
+ .op = op, .cls = k,
+ .to = to, .arg = {arg0, arg1}
+ };
+}
+
+void
+emiti(Ins i)
+{
+ emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]);
+}
+
+void
+idup(Ins **pd, Ins *s, ulong n)
+{
+ *pd = alloc(n * sizeof(Ins));
+ memcpy(*pd, s, n * sizeof(Ins));
+}
+
+Ins *
+icpy(Ins *d, Ins *s, ulong n)
+{
+ memcpy(d, s, n * sizeof(Ins));
+ return d + n;
+}
+
+void *
+vnew(ulong len, size_t esz)
+{
+ ulong cap;
+ Vec *v;
+
+ for (cap=VMin; cap<len; cap*=2)
+ ;
+ v = alloc(cap * esz + sizeof(Vec));
+ v->mag = VMag;
+ v->cap = cap;
+ v->esz = esz;
+ return v + 1;
+}
+
+void
+vgrow(void *vp, ulong len)
+{
+ Vec *v;
+ void *v1;
+
+ v = *(Vec **)vp - 1;
+ assert(v+1 && v->mag == VMag);
+ if (v->cap >= len)
+ return;
+ v1 = vnew(len, v->esz);
+ memcpy(v1, v+1, v->cap * v->esz);
+ *(Vec **)vp = v1;
+}
+
+int
+phicls(int t, Tmp *tmp /*, int c*/)
+{
+ if (tmp[t].phi)
+ return tmp[t].phi;
+ return t;
+#if 0
+ int t1;
+
+ t1 = tmp[t].phi;
+ if (!t1)
+ t1 = t;
+ if (t != t1) {
+ t1 = phitmp(t1, tmp, c);
+ if (c)
+ tmp[t].phi = t1;
+ }
+ return t1;
+#endif
+}
+
+Ref
+newtmp(char *prfx, int k, Fn *fn)
+{
+ static int n;
+ int t;
+
+ t = fn->ntmp++;
+ vgrow(&fn->tmp, fn->ntmp);
+ sprintf(fn->tmp[t].name, "%s%d", prfx, ++n);
+ fn->tmp[t].cls = k;
+ fn->tmp[t].slot = -1;
+ fn->tmp[t].nuse = +1;
+ fn->tmp[t].ndef = +1;
+ return TMP(t);
+}
+
+Ref
+getcon(int64_t val, Fn *fn)
+{
+ int c;
+
+ for (c=0; c<fn->ncon; c++)
+ if (fn->con[c].type == CBits && fn->con[c].bits.i == val)
+ return CON(c);
+ fn->ncon++;
+ vgrow(&fn->con, fn->ncon);
+ fn->con[c] = (Con){.type = CBits, .bits.i = val};
+ return CON(c);
+}
+
+void
+addcon(Con *c0, Con *c1)
+{
+ if (c0->type == CUndef)
+ *c0 = *c1;
+ else {
+ if (c1->type == CAddr) {
+ if (c0->type == CAddr)
+ diag("addcon: adding two addresses");
+ c0->type = CAddr;
+ strcpy(c0->label, c1->label);
+ }
+ c0->bits.i += c1->bits.i;
+ }
+}
+
+void
+bsinit(BSet *bs, uint n)
+{
+ n = (n + NBit-1) / NBit;
+ bs->nt = n;
+ bs->t = alloc(n * sizeof bs->t[0]);
+}
+
+uint
+bscount(BSet *bs)
+{
+ uint i, j, n;
+
+ n = 0;
+ for (i=0; i<bs->nt; i++)
+ for (j=0; j<NBit; j++)
+ if (bs->t[i] & BIT(j))
+ n++;
+ return n;
+}
+
+static inline uint
+bsmax(BSet *bs)
+{
+ return bs->nt * NBit;
+}
+
+void
+bsset(BSet *bs, uint elt)
+{
+ assert(elt < bsmax(bs));
+ bs->t[elt/NBit] |= BIT(elt%NBit);
+}
+
+void
+bsclr(BSet *bs, uint elt)
+{
+ assert(elt < bsmax(bs));
+ bs->t[elt/NBit] &= ~BIT(elt%NBit);
+}
+
+#define BSOP(f, op) \
+ void \
+ f(BSet *a, BSet *b) \
+ { \
+ uint i; \
+ \
+ assert(a->nt == b->nt); \
+ for (i=0; i<a->nt; i++) \
+ a->t[i] op b->t[i]; \
+ }
+
+BSOP(bscopy, =)
+BSOP(bsunion, |=)
+BSOP(bsinter, &=)
+BSOP(bsdiff, &= ~)
+
+int
+bsequal(BSet *a, BSet *b)
+{
+ uint i;
+
+ assert(a->nt == b->nt);
+ for (i=0; i<a->nt; i++)
+ if (a->t[i] != b->t[i])
+ return 0;
+ return 1;
+}
+
+void
+bszero(BSet *bs)
+{
+ memset(bs->t, 0, bs->nt * sizeof bs->t[0]);
+}
+
+/* iterates on a bitset, use as follows
+ *
+ * for (i=0; bsiter(set, &i); i++)
+ * use(i);
+ *
+ */
+int
+bsiter(BSet *bs, uint *elt)
+{
+ uint i;
+
+ for (i=*elt;; i++) {
+ while (i < bsmax(bs) && !bs->t[i/NBit])
+ i = (i + NBit) & -NBit;
+ if (i >= bsmax(bs))
+ return 0;
+ if (bshas(bs, i)) {
+ *elt = i;
+ return 1;
+ }
+ }
+}
+
+void
+dumpts(BSet *bs, Tmp *tmp, FILE *f)
+{
+ uint t;
+
+ fprintf(f, "[");
+ for (t=Tmp0; bsiter(bs, &t); t++)
+ fprintf(f, " %s", tmp[t].name);
+ fprintf(f, " ]\n");
+}