commit 49a4593c335126ba279f47328824abfef379725e
parent 9d1c38d69547d835f7228651e71e8a7d744c456d
Author: Quentin Carbonneaux <quentin@c9x.me>
Date: Sat, 8 Apr 2017 21:06:33 -0400
prepare for multi-target
This big diff does multiple changes to allow
the addition of new targets to qbe. The
changes are listed below in decreasing order
of impact.
1. Add a new Target structure.
To add support for a given target, one has to
implement all the members of the Target
structure. All the source files where changed
to use this interface where needed.
2. Single out amd64-specific code.
In this commit, the amd64 target T_amd64_sysv
is the only target available, it is implemented
in the amd64/ directory. All the non-static
items in this directory are prefixed with either
amd64_ or amd64_sysv (for items that are
specific to the System V ABI).
3. Centralize Ops information.
There is now a file 'ops.h' that must be used to
store all the available operations together with
their metadata. The various targets will only
select what they need; but it is beneficial that
there is only *one* place to change to add a new
instruction.
One good side effect of this change is that any
operation 'xyz' in the IL now as a corresponding
'Oxyz' in the code.
4. Misc fixes.
One notable change is that instruction selection
now generates generic comparison operations and
the lowering to the target's comparisons is done
in the emitter.
GAS directives for data are the same for many
targets, so data emission was extracted in a
file 'gas.c'.
5. Modularize the Makefile.
The Makefile now has a list of C files that
are target-independent (SRC), and one list
of C files per target. Each target can also
use its own 'all.h' header (for example to
define registers).
Diffstat:
M | Makefile | | | 28 | ++++++++++++++++++++-------- |
M | all.h | | | 334 | +++++++++++++++++++++++++------------------------------------------------------ |
A | amd64/all.h | | | 70 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | amd64/emit.c | | | 561 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | amd64/isel.c | | | 603 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | amd64/sysv.c | | | 701 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | amd64/targ.c | | | 30 | ++++++++++++++++++++++++++++++ |
M | cfg.c | | | 4 | ++-- |
D | emit.c | | | 696 | ------------------------------------------------------------------------------- |
M | fold.c | | | 62 | +++++++++++++++++++++++++++++++------------------------------- |
A | gas.c | | | 122 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
D | isel.c | | | 649 | ------------------------------------------------------------------------------- |
M | live.c | | | 30 | +++++++++++++++++++----------- |
M | main.c | | | 68 | ++++++++++++++++++++++++++++++++++++++++++++++++++------------------ |
M | mem.c | | | 4 | ++-- |
A | ops.h | | | 167 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | parse.c | | | 144 | +++++++++++++++++-------------------------------------------------------------- |
M | rega.c | | | 38 | ++++++++++++++++++++------------------ |
M | spill.c | | | 32 | ++++++++++++++++---------------- |
D | sysv.c | | | 718 | ------------------------------------------------------------------------------- |
M | util.c | | | 90 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
21 files changed, 2641 insertions(+), 2510 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,11 +1,15 @@
BIN = qbe
-ABI = sysv
V = @
OBJDIR = obj
-SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c fold.c live.c $(ABI).c isel.c spill.c rega.c emit.c
-OBJ = $(SRC:%.c=$(OBJDIR)/%.o)
+SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c \
+ fold.c live.c spill.c rega.c gas.c
+AMD64SRC = amd64/targ.c amd64/sysv.c amd64/isel.c amd64/emit.c
+SRCALL = $(SRC) $(AMD64SRC)
+
+AMD64OBJ = $(AMD64SRC:%.c=$(OBJDIR)/%.o)
+OBJ = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ)
CFLAGS += -Wall -Wextra -std=c99 -g -pedantic
@@ -19,15 +23,23 @@ $(OBJDIR)/%.o: %.c $(OBJDIR)/timestamp
$(OBJDIR)/timestamp:
@mkdir -p $(OBJDIR)
+ @mkdir -p $(OBJDIR)/amd64
@touch $@
-$(OBJ): all.h
+$(OBJ): all.h ops.h
+$(AMD64OBJ): amd64/all.h
obj/main.o: config.h
config.h:
- @case `uname` in \
- *Darwin*) echo "#define Defaultasm Gasmacho" ;; \
- *) echo "#define Defaultasm Gaself" ;; \
+ @case `uname` in \
+ *Darwin*) \
+ echo "#define Defasm Gasmacho"; \
+ echo "#define Deftgt T_amd64_sysv"; \
+ ;; \
+ *) \
+ echo "#define Defasm Gaself"; \
+ echo "#define Deftgt T_amd64_sysv"; \
+ ;; \
esac > $@
install: $(OBJDIR)/$(BIN)
@@ -47,7 +59,7 @@ check: $(OBJDIR)/$(BIN)
tools/unit.sh all
80:
- @for F in $(SRC); \
+ @for F in $(SRCALL); \
do \
awk "{ \
gsub(/\\t/, \" \"); \
diff --git a/all.h b/all.h
@@ -8,13 +8,14 @@
#define MAKESURE(what, x) typedef char make_sure_##what[(x)?1:-1]
#define die(...) die_(__FILE__, __VA_ARGS__)
+typedef unsigned char uchar;
typedef unsigned int uint;
typedef unsigned long ulong;
typedef unsigned long long bits;
typedef struct BSet BSet;
typedef struct Ref Ref;
-typedef struct OpDesc OpDesc;
+typedef struct Op Op;
typedef struct Ins Ins;
typedef struct Phi Phi;
typedef struct Blk Blk;
@@ -27,6 +28,7 @@ typedef struct Fn Fn;
typedef struct Typ Typ;
typedef struct Seg Seg;
typedef struct Dat Dat;
+typedef struct Target Target;
enum {
NString = 32,
@@ -38,61 +40,29 @@ enum {
NBit = CHAR_BIT * sizeof(bits),
};
-#define BIT(n) ((bits)1 << (n))
-
-enum Reg {
- RXX,
-
- RAX, /* caller-save */
- RCX,
- RDX,
- RSI,
- RDI,
- R8,
- R9,
- R10,
- R11,
-
- RBX, /* callee-save */
- R12,
- R13,
- R14,
- R15,
-
- RBP, /* globally live */
- RSP,
-#define RGLOB (BIT(RBP)|BIT(RSP))
-
- XMM0, /* sse */
- XMM1,
- XMM2,
- XMM3,
- XMM4,
- XMM5,
- XMM6,
- XMM7,
- XMM8,
- XMM9,
- XMM10,
- XMM11,
- XMM12,
- XMM13,
- XMM14,
- XMM15,
-
- Tmp0, /* first non-reg temporary */
-
- NRGlob = 2,
- NIReg = R15 - RAX + 1 + NRGlob,
- NFReg = XMM14 - XMM0 + 1, /* XMM15 is reserved */
- NISave = R11 - RAX + 1,
- NFSave = NFReg,
- NRSave = NISave + NFSave,
- NRClob = R15 - RBX + 1,
+struct Target {
+ int gpr0; /* first general purpose reg */
+ int ngpr;
+ int fpr0; /* first floating point reg */
+ int nfpr;
+ bits rglob; /* globally live regs (e.g., sp, fp) */
+ int nrglob;
+ int *rsave; /* caller-save */
+ int nrsave[2];
+ bits (*retregs)(Ref, int[2]);
+ bits (*argregs)(Ref, int[2]);
+ int (*memargs)(int);
+ void (*abi)(Fn *);
+ void (*isel)(Fn *);
+ void (*emitfn)(Fn *, FILE *);
};
-MAKESURE(NBit_is_enough, NBit >= (int)Tmp0);
+#define BIT(n) ((bits)1 << (n))
+enum {
+ RXX = 0,
+ Tmp0 = NBit, /* first non-reg temporary */
+};
struct BSet {
uint nt;
@@ -139,51 +109,81 @@ static inline int isreg(Ref r)
return rtype(r) == RTmp && r.val < Tmp0;
}
-enum ICmp {
-#define ICMPS(X) \
- X(ule) \
- X(ult) \
- X(sle) \
- X(slt) \
- X(sgt) \
- X(sge) \
- X(ugt) \
- X(uge) \
- X(eq) \
- X(ne) /* make sure icmpop() below works! */
-
-#define X(c) IC##c,
- ICMPS(X)
-#undef X
- NICmp,
+enum CmpI {
+ Cieq,
+ Cine,
+ Cisge,
+ Cisgt,
+ Cisle,
+ Cislt,
+ Ciuge,
+ Ciugt,
+ Ciule,
+ Ciult,
+ NCmpI,
+};
- ICxnp = NICmp, /* x64 specific */
- ICxp,
- NXICmp
+enum CmpF {
+ Cfeq,
+ Cfge,
+ Cfgt,
+ Cfle,
+ Cflt,
+ Cfne,
+ Cfo,
+ Cfuo,
+ NCmpF,
+ NCmp = NCmpI + NCmpF,
};
-static inline int icmpop(int c)
-{
- return c >= ICeq ? c : ICuge - c;
-}
+enum O {
+ Oxxx,
+#define O(op, x, y) O##op,
+ #include "ops.h"
+ NOp,
+};
-enum FCmp {
-#define FCMPS(X) \
- X(le) \
- X(lt) \
- X(gt) \
- X(ge) \
- X(ne) \
- X(eq) \
- X(o) \
- X(uo)
-
-#define X(c) FC##c,
- FCMPS(X)
+enum J {
+ Jxxx,
+#define JMPS(X) \
+ X(ret0) X(retw) X(retl) X(rets) \
+ X(retd) X(retc) X(jmp) X(jnz) \
+ X(jfieq) X(jfine) X(jfisge) X(jfisgt) \
+ X(jfisle) X(jfislt) X(jfiuge) X(jfiugt) \
+ X(jfiule) X(jfiult) X(jffeq) X(jffge) \
+ X(jffgt) X(jffle) X(jfflt) X(jffne) \
+ X(jffo) X(jffuo)
+#define X(j) J##j,
+ JMPS(X)
#undef X
- NFCmp
+ NJmp
+};
+
+enum {
+ Ocmpw = Oceqw,
+ Ocmpw1 = Ocultw,
+ Ocmpl = Oceql,
+ Ocmpl1 = Ocultl,
+ Ocmps = Oceqs,
+ Ocmps1 = Ocuos,
+ Ocmpd = Oceqd,
+ Ocmpd1 = Ocuod,
+ Oalloc = Oalloc4,
+ Oalloc1 = Oalloc16,
+ Oflag = Oflagieq,
+ Oflag1 = Oflagfuo,
+ NPubOp = Onop,
+ Jjf = Jjfieq,
+ Jjf1 = Jjffuo,
};
+#define isstore(o) (Ostoreb <= o && o <= Ostored)
+#define isload(o) (Oloadsb <= o && o <= Oload)
+#define isext(o) (Oextsb <= o && o <= Oextuw)
+#define ispar(o) (Opar <= o && o <= Opare)
+#define isarg(o) (Oarg <= o && o <= Oarge)
+#define isret(j) (Jret0 <= j && j <= Jretc)
+
enum Class {
Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
Kw,
@@ -195,124 +195,10 @@ enum Class {
#define KWIDE(k) ((k)&1)
#define KBASE(k) ((k)>>1)
-enum Op {
- Oxxx,
-
- /* public instructions */
- Oadd,
- Osub,
- Odiv,
- Orem,
- Oudiv,
- Ourem,
- Omul,
- Oand,
- Oor,
- Oxor,
- Osar,
- Oshr,
- Oshl,
- Ocmpw,
- Ocmpw1 = Ocmpw + NICmp-1,
- Ocmpl,
- Ocmpl1 = Ocmpl + NICmp-1,
- Ocmps,
- Ocmps1 = Ocmps + NFCmp-1,
- Ocmpd,
- Ocmpd1 = Ocmpd + NFCmp-1,
-
- Ostoreb,
- Ostoreh,
- Ostorew,
- Ostorel,
- Ostores,
- Ostored,
-#define isstore(o) (Ostoreb <= o && o <= Ostored)
- Oloadsb, /* must match Oext and Tmp.width */
- Oloadub,
- Oloadsh,
- Oloaduh,
- Oloadsw,
- Oloaduw,
- Oload,
-#define isload(o) (Oloadsb <= o && o <= Oload)
- Oextsb,
- Oextub,
- Oextsh,
- Oextuh,
- Oextsw,
- Oextuw,
-#define isext(o) (Oextsb <= o && o <= Oextuw)
-
- Oexts,
- Otruncd,
- Ostosi,
- Odtosi,
- Oswtof,
- Osltof,
- Ocast,
-
- Oalloc,
- Oalloc1 = Oalloc + NAlign-1,
-
- Ovastart,
- Ovaarg,
-
- Ocopy,
- NPubOp,
-
- /* function instructions */
- Opar = NPubOp,
- Oparc,
- Opare,
-#define ispar(o) (Opar <= o && o <= Opare)
- Oarg,
- Oargc,
- Oarge,
-#define isarg(o) (Oarg <= o && o <= Oarge)
- Ocall,
- Ovacall,
-
- /* reserved instructions */
- Onop,
- Oaddr,
- Oswap,
- Osign,
- Osalloc,
- Oxidiv,
- Oxdiv,
- Oxcmp,
- Oxset,
- Oxsetnp = Oxset + ICxnp,
- Oxsetp = Oxset + ICxp,
- Oxtest,
- NOp
-};
-
-enum Jmp {
- Jxxx,
- Jret0,
- Jretw,
- Jretl,
- Jrets,
- Jretd,
- Jretc,
-#define isret(j) (Jret0 <= j && j <= Jretc)
- Jjmp,
- Jjnz,
- Jxjc,
- Jxjnp = Jxjc + ICxnp,
- Jxjp = Jxjc + ICxp,
- NJmp
-};
-
-struct OpDesc {
+struct Op {
char *name;
- int nmem;
short argcls[2][4];
- uint sflag:1; /* sets the zero flag */
- uint lflag:1; /* leaves flags */
- uint cfold:1; /* can fold */
+ int canfold;
};
struct Ins {
@@ -437,7 +323,7 @@ struct Con {
typedef struct Addr Addr;
-struct Addr { /* x64 addressing */
+struct Addr { /* amd64 addressing */
Con offset;
Ref base;
Ref index;
@@ -508,8 +394,8 @@ struct Dat {
char export;
};
-
/* main.c */
+extern Target T;
extern char debug['Z'+1];
/* util.c */
@@ -524,6 +410,8 @@ void die_(char *, char *, ...) __attribute__((noreturn));
void *emalloc(size_t);
void *alloc(size_t);
void freeall(void);
+int argcls(Ins *, int);
+int iscmp(int, int *, int *);
void emit(int, int, Ref, Ref, Ref);
void emiti(Ins);
void idup(Ins **, Ins *, ulong);
@@ -531,12 +419,15 @@ Ins *icpy(Ins *, Ins *, ulong);
void *vnew(ulong, size_t, Pool);
void vfree(void *);
void vgrow(void *, ulong);
+int cmpop(int);
+int cmpneg(int);
int clsmerge(short *, short);
int phicls(int, Tmp *);
Ref newtmp(char *, int, Fn *);
void chuse(Ref, int, Fn *);
Ref getcon(int64_t, Fn *);
void addcon(Con *, Con *);
+void blit(Ref, uint, Ref, uint, Fn *);
void dumpts(BSet *, Tmp *, FILE *);
void bsinit(BSet *, uint);
@@ -559,7 +450,7 @@ bshas(BSet *bs, uint elt)
}
/* parse.c */
-extern OpDesc opdesc[NOp];
+extern Op optab[NOp];
void parse(FILE *, char *, void (Dat *), void (Fn *));
void printfn(Fn *, FILE *);
void printref(Ref, Fn *, FILE *);
@@ -611,16 +502,6 @@ void fold(Fn *);
void liveon(BSet *, Blk *, Blk *);
void filllive(Fn *);
-/* abi: sysv.c */
-extern int rsave[/* NRSave */];
-extern int rclob[/* NRClob */];
-bits retregs(Ref, int[2]);
-bits argregs(Ref, int[2]);
-void abi(Fn *);
-
-/* isel.c */
-void isel(Fn *);
-
/* spill.c */
void fillcost(Fn *);
void spill(Fn *);
@@ -628,10 +509,9 @@ void spill(Fn *);
/* rega.c */
void rega(Fn *);
-/* emit.c */
-extern char *locprefix;
-extern char *symprefix;
-void emitfn(Fn *, FILE *);
-void emitdat(Dat *, FILE *);
-int stashfp(int64_t, int);
-void emitfin(FILE *);
+/* gas.c */
+extern char *gasloc;
+extern char *gassym;
+void gasemitdat(Dat *, FILE *);
+int gasstashfp(int64_t, int);
+void gasemitfin(FILE *);
diff --git a/amd64/all.h b/amd64/all.h
@@ -0,0 +1,70 @@
+#include "../all.h"
+
+typedef struct Amd64Op Amd64Op;
+
+enum Amd64Reg {
+ RAX = RXX+1, /* caller-save */
+ RCX,
+ RDX,
+ RSI,
+ RDI,
+ R8,
+ R9,
+ R10,
+ R11,
+
+ RBX, /* callee-save */
+ R12,
+ R13,
+ R14,
+ R15,
+
+ RBP, /* globally live */
+ RSP,
+
+ XMM0, /* sse */
+ XMM1,
+ XMM2,
+ XMM3,
+ XMM4,
+ XMM5,
+ XMM6,
+ XMM7,
+ XMM8,
+ XMM9,
+ XMM10,
+ XMM11,
+ XMM12,
+ XMM13,
+ XMM14,
+ XMM15,
+
+ NFPR = XMM14 - XMM0 + 1, /* reserve XMM15 */
+ NGPR = RSP - RAX + 1,
+ NGPS = R11 - RAX + 1,
+ NFPS = NFPR,
+ NCLR = R15 - RBX + 1,
+};
+MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0);
+
+struct Amd64Op {
+ char nmem;
+ char zflag;
+ char lflag;
+};
+
+/* targ.c */
+extern Amd64Op amd64_op[];
+
+/* sysv.c (abi) */
+extern int amd64_sysv_rsave[];
+extern int amd64_sysv_rclob[];
+bits amd64_sysv_retregs(Ref, int[2]);
+bits amd64_sysv_argregs(Ref, int[2]);
+void amd64_sysv_abi(Fn *);
+
+/* isel.c */
+void amd64_isel(Fn *);
+
+/* emit.c */
+void amd64_emitfn(Fn *, FILE *);
diff --git a/amd64/emit.c b/amd64/emit.c
@@ -0,0 +1,561 @@
+#include "all.h"
+
+
+#define CMP(X) \
+ X(Ciule, "be") \
+ X(Ciult, "b") \
+ X(Cisle, "le") \
+ X(Cislt, "l") \
+ X(Cisgt, "g") \
+ X(Cisge, "ge") \
+ X(Ciugt, "a") \
+ X(Ciuge, "ae") \
+ X(Cieq, "z") \
+ X(Cine, "nz") \
+ X(NCmpI+Cfle, "be") \
+ X(NCmpI+Cflt, "b") \
+ X(NCmpI+Cfgt, "a") \
+ X(NCmpI+Cfge, "ae") \
+ X(NCmpI+Cfeq, "z") \
+ X(NCmpI+Cfne, "nz") \
+ X(NCmpI+Cfo, "np") \
+ X(NCmpI+Cfuo, "p")
+
+enum {
+ SLong = 0,
+ SWord = 1,
+ SShort = 2,
+ SByte = 3,
+
+ Ki = -1, /* matches Kw and Kl */
+ Ka = -2, /* matches all classes */
+};
+
+/* Instruction format strings:
+ *
+ * if the format string starts with -, the instruction
+ * is assumed to be 3-address and is put in 2-address
+ * mode using an extra mov if necessary
+ *
+ * if the format string starts with +, the same as the
+ * above applies, but commutativity is also assumed
+ *
+ * %k is used to set the class of the instruction,
+ * it'll expand to "l", "q", "ss", "sd", depending
+ * on the instruction class
+ * %0 designates the first argument
+ * %1 designates the second argument
+ * %= designates the result
+ *
+ * if %k is not used, a prefix to 0, 1, or = must be
+ * added, it can be:
+ * M - memory reference
+ * L - long (64 bits)
+ * W - word (32 bits)
+ * H - short (16 bits)
+ * B - byte (8 bits)
+ * S - single precision float
+ * D - double precision float
+ */
+static struct {
+ short op;
+ short cls;
+ char *asm;
+} omap[] = {
+ { Oadd, Ka, "+add%k %1, %=" },
+ { Osub, Ka, "-sub%k %1, %=" },
+ { Oand, Ki, "+and%k %1, %=" },
+ { Oor, Ki, "+or%k %1, %=" },
+ { Oxor, Ki, "+xor%k %1, %=" },
+ { Osar, Ki, "-sar%k %B1, %=" },
+ { Oshr, Ki, "-shr%k %B1, %=" },
+ { Oshl, Ki, "-shl%k %B1, %=" },
+ { Omul, Ki, "+imul%k %1, %=" },
+ { Omul, Ks, "+mulss %1, %=" },
+ { Omul, Kd, "+mulsd %1, %=" },
+ { Odiv, Ka, "-div%k %1, %=" },
+ { Ostorel, Ka, "movq %L0, %M1" },
+ { Ostorew, Ka, "movl %W0, %M1" },
+ { Ostoreh, Ka, "movw %H0, %M1" },
+ { Ostoreb, Ka, "movb %B0, %M1" },
+ { Ostores, Ka, "movss %S0, %M1" },
+ { Ostored, Ka, "movsd %D0, %M1" },
+ { Oload, Ka, "mov%k %M0, %=" },
+ { Oloadsw, Kl, "movslq %M0, %L=" },
+ { Oloadsw, Kw, "movl %M0, %W=" },
+ { Oloaduw, Ki, "movl %M0, %W=" },
+ { Oloadsh, Ki, "movsw%k %M0, %=" },
+ { Oloaduh, Ki, "movzw%k %M0, %=" },
+ { Oloadsb, Ki, "movsb%k %M0, %=" },
+ { Oloadub, Ki, "movzb%k %M0, %=" },
+ { Oextsw, Kl, "movslq %W0, %L=" },
+ { Oextuw, Kl, "movl %W0, %W=" },
+ { Oextsh, Ki, "movsw%k %H0, %=" },
+ { Oextuh, Ki, "movzw%k %H0, %=" },
+ { Oextsb, Ki, "movsb%k %B0, %=" },
+ { Oextub, Ki, "movzb%k %B0, %=" },
+
+ { Oexts, Kd, "cvtss2sd %0, %=" },
+ { Otruncd, Ks, "cvttsd2ss %0, %=" },
+ { Ostosi, Ki, "cvttss2si%k %0, %=" },
+ { Odtosi, Ki, "cvttsd2si%k %0, %=" },
+ { Oswtof, Ka, "cvtsi2%k %W0, %=" },
+ { Osltof, Ka, "cvtsi2%k %L0, %=" },
+ { Ocast, Ki, "movq %D0, %L=" },
+ { Ocast, Ka, "movq %L0, %D=" },
+
+ { Oaddr, Ki, "lea%k %M0, %=" },
+ { Oswap, Ki, "xchg%k %0, %1" },
+ { Osign, Kl, "cqto" },
+ { Osign, Kw, "cltd" },
+ { Oxdiv, Ki, "div%k %0" },
+ { Oxidiv, Ki, "idiv%k %0" },
+ { Oxcmp, Ks, "comiss %S0, %S1" },
+ { Oxcmp, Kd, "comisd %D0, %D1" },
+ { Oxcmp, Ki, "cmp%k %0, %1" },
+ { Oxtest, Ki, "test%k %0, %1" },
+#define X(c, s) \
+ { Oflag+c, Ki, "set" s " %B=\n\tmovzb%k %B=, %=" },
+ CMP(X)
+#undef X
+ { NOp, 0, 0 }
+};
+
+static char *rname[][4] = {
+ [RAX] = {"rax", "eax", "ax", "al"},
+ [RBX] = {"rbx", "ebx", "bx", "bl"},
+ [RCX] = {"rcx", "ecx", "cx", "cl"},
+ [RDX] = {"rdx", "edx", "dx", "dl"},
+ [RSI] = {"rsi", "esi", "si", "sil"},
+ [RDI] = {"rdi", "edi", "di", "dil"},
+ [RBP] = {"rbp", "ebp", "bp", "bpl"},
+ [RSP] = {"rsp", "esp", "sp", "spl"},
+ [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
+ [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
+ [R10] = {"r10", "r10d", "r10w", "r10b"},
+ [R11] = {"r11", "r11d", "r11w", "r11b"},
+ [R12] = {"r12", "r12d", "r12w", "r12b"},
+ [R13] = {"r13", "r13d", "r13w", "r13b"},
+ [R14] = {"r14", "r14d", "r14w", "r14b"},
+ [R15] = {"r15", "r15d", "r15w", "r15b"},
+};
+
+
+static int
+slot(int s, Fn *fn)
+{
+ struct { int i:29; } x;
+
+ /* sign extend s using a bitfield */
+ x.i = s;
+ assert(x.i <= fn->slot);
+ /* specific to NAlign == 3 */
+ if (x.i < 0)
+ return -4 * x.i;
+ else if (fn->vararg)
+ return -176 + -4 * (fn->slot - x.i);
+ else
+ return -4 * (fn->slot - x.i);
+}
+
+static void
+emitcon(Con *con, FILE *f)
+{
+ switch (con->type) {
+ case CAddr:
+ if (con->local)
+ fprintf(f, "%s%s", gasloc, con->label);
+ else
+ fprintf(f, "%s%s", gassym, con->label);
+ if (con->bits.i)
+ fprintf(f, "%+"PRId64, con->bits.i);
+ break;
+ case CBits:
+ fprintf(f, "%"PRId64, con->bits.i);
+ break;
+ default:
+ die("unreachable");
+ }
+}
+
+static char *
+regtoa(int reg, int sz)
+{
+ static char buf[6];
+
+ if (reg >= XMM0) {
+ sprintf(buf, "xmm%d", reg-XMM0);
+ return buf;
+ } else
+ return rname[reg][sz];
+}
+
+static Ref
+getarg(char c, Ins *i)
+{
+ switch (c) {
+ case '0':
+ return i->arg[0];
+ case '1':
+ return i->arg[1];
+ case '=':
+ return i->to;
+ default:
+ die("invalid arg letter %c", c);
+ }
+}
+
+static void emitins(Ins, Fn *, FILE *);
+
+static void
+emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
+{
+ Ins icp;
+
+ icp.op = Ocopy;
+ icp.arg[0] = r2;
+ icp.to = r1;
+ icp.cls = k;
+ emitins(icp, fn, f);
+}
+
+static void
+emitf(char *s, Ins *i, Fn *fn, FILE *f)
+{
+ static char clstoa[][3] = {"l", "q", "ss", "sd"};
+ char c;
+ int sz;
+ Ref ref;
+ Mem *m;
+ Con off;
+
+ switch (*s) {
+ case '+':
+ if (req(i->arg[1], i->to)) {
+ ref = i->arg[0];
+ i->arg[0] = i->arg[1];
+ i->arg[1] = ref;
+ }
+ /* fall through */
+ case '-':
+ assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
+ "cannot convert to 2-address");
+ emitcopy(i->to, i->arg[0], i->cls, fn, f);
+ s++;
+ break;
+ }
+
+ fputc('\t', f);
+Next:
+ while ((c = *s++) != '%')
+ if (!c) {
+ fputc('\n', f);
+ return;
+ } else
+ fputc(c, f);
+ switch ((c = *s++)) {
+ case '%':
+ fputc('%', f);
+ break;
+ case 'k':
+ fputs(clstoa[i->cls], f);
+ break;
+ case '0':
+ case '1':
+ case '=':
+ sz = KWIDE(i->cls) ? SLong : SWord;
+ s--;
+ goto Ref;
+ case 'D':
+ case 'S':
+ sz = SLong; /* does not matter for floats */
+ Ref:
+ c = *s++;
+ ref = getarg(c, i);
+ switch (rtype(ref)) {
+ case RTmp:
+ assert(isreg(ref));
+ fprintf(f, "%%%s", regtoa(ref.val, sz));
+ break;
+ case RSlot:
+ fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
+ break;
+ case RMem:
+ Mem:
+ m = &fn->mem[ref.val];
+ if (rtype(m->base) == RSlot) {
+ off.type = CBits;
+ off.bits.i = slot(m->base.val, fn);
+ addcon(&m->offset, &off);
+ m->base = TMP(RBP);
+ }
+ if (m->offset.type != CUndef)
+ emitcon(&m->offset, f);
+ fputc('(', f);
+ if (req(m->base, R))
+ fprintf(f, "%%rip");
+ else
+ fprintf(f, "%%%s", regtoa(m->base.val, SLong));
+ if (!req(m->index, R))
+ fprintf(f, ", %%%s, %d",
+ regtoa(m->index.val, SLong),
+ m->scale
+ );
+ fputc(')', f);
+ break;
+ case RCon:
+ fputc('$', f);
+ emitcon(&fn->con[ref.val], f);
+ break;
+ default:
+ die("unreachable");
+ }
+ break;
+ case 'L':
+ sz = SLong;
+ goto Ref;
+ case 'W':
+ sz = SWord;
+ goto Ref;
+ case 'H':
+ sz = SShort;
+ goto Ref;
+ case 'B':
+ sz = SByte;
+ goto Ref;
+ case 'M':
+ c = *s++;
+ ref = getarg(c, i);
+ switch (rtype(ref)) {
+ case RMem:
+ goto Mem;
+ case RSlot:
+ fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
+ break;
+ case RCon:
+ emitcon(&fn->con[ref.val], f);
+ fprintf(f, "(%%rip)");
+ break;
+ case RTmp:
+ assert(isreg(ref));
+ fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
+ break;
+ default:
+ die("unreachable");
+ }
+ break;
+ default:
+ die("invalid format specifier %%%c", c);
+ }
+ goto Next;
+}
+
+static void
+emitins(Ins i, Fn *fn, FILE *f)
+{
+ Ref r;
+ int64_t val;
+ int o;
+
+ switch (i.op) {
+ default:
+ Table:
+ /* most instructions are just pulled out of
+ * the table omap[], some special cases are
+ * detailed below */
+ for (o=0;; o++) {
+ /* this linear search should really be a binary
+ * search */
+ if (omap[o].op == NOp)
+ die("no match for %s(%d)",
+ optab[i.op].name, "wlsd"[i.cls]);
+ if (omap[o].op == i.op)
+ if (omap[o].cls == i.cls
+ || (omap[o].cls == Ki && KBASE(i.cls) == 0)
+ || (omap[o].cls == Ka))
+ break;
+ }
+ emitf(omap[o].asm, &i, fn, f);
+ break;
+ case Onop:
+ /* just do nothing for nops, they are inserted
+ * by some passes */
+ break;
+ case Omul:
+ /* here, we try to use the 3-addresss form
+ * of multiplication when possible */
+ if (rtype(i.arg[1]) == RCon) {
+ r = i.arg[0];
+ i.arg[0] = i.arg[1];
+ i.arg[1] = r;
+ }
+ if (KBASE(i.cls) == 0 /* only available for ints */
+ && rtype(i.arg[0]) == RCon
+ && rtype(i.arg[1]) == RTmp) {
+ emitf("imul%k %0, %1, %=", &i, fn, f);
+ break;
+ }
+ goto Table;
+ case Osub:
+ /* we have to use the negation trick to handle
+ * some 3-address substractions */
+ if (req(i.to, i.arg[1])) {
+ emitf("neg%k %=", &i, fn, f);
+ emitf("add%k %0, %=", &i, fn, f);
+ break;
+ }
+ goto Table;
+ case Ocopy:
+ /* make sure we don't emit useless copies,
+ * also, we can use a trick to load 64-bits
+ * registers, it's detailed in my note below
+ * http://c9x.me/art/notes.html?09/19/2015 */
+ if (req(i.to, R) || req(i.arg[0], R))
+ break;
+ if (isreg(i.to)
+ && rtype(i.arg[0]) == RCon
+ && i.cls == Kl
+ && fn->con[i.arg[0].val].type == CBits
+ && (val = fn->con[i.arg[0].val].bits.i) >= 0
+ && val <= UINT32_MAX) {
+ emitf("movl %W0, %W=", &i, fn, f);
+ } else if (isreg(i.to)
+ && rtype(i.arg[0]) == RCon
+ && fn->con[i.arg[0].val].type == CAddr) {
+ emitf("lea%k %M0, %=", &i, fn, f);
+ } else if (!req(i.arg[0], i.to))
+ emitf("mov%k %0, %=", &i, fn, f);
+ break;
+ case Ocall:
+ /* calls simply have a weird syntax in AT&T
+ * assembly... */
+ switch (rtype(i.arg[0])) {
+ case RCon:
+ fprintf(f, "\tcallq ");
+ emitcon(&fn->con[i.arg[0].val], f);
+ fprintf(f, "\n");
+ break;
+ case RTmp:
+ emitf("callq *%L0", &i, fn, f);
+ break;
+ default:
+ die("invalid call argument");
+ }
+ break;
+ case Osalloc:
+ /* there is no good reason why this is here
+ * maybe we should split Osalloc in 2 different
+ * instructions depending on the result
+ */
+ emitf("subq %L0, %%rsp", &i, fn, f);
+ if (!req(i.to, R))
+ emitcopy(i.to, TMP(RSP), Kl, fn, f);
+ break;
+ case Oswap:
+ if (KBASE(i.cls) == 0)
+ goto Table;
+ /* for floats, there is no swap instruction
+ * so we use xmm15 as a temporary
+ */
+ emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
+ emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
+ emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
+ break;
+ }
+}
+
+static int
+framesz(Fn *fn)
+{
+ int i, o, f;
+
+ /* specific to NAlign == 3 */
+ for (i=0, o=0; i<NCLR; i++)
+ o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
+ f = fn->slot;
+ f = (f + 3) & -4;
+ return 4*f + 8*o + 176*fn->vararg;
+}
+
+void
+amd64_emitfn(Fn *fn, FILE *f)
+{
+ static char *ctoa[] = {
+ #define X(c, s) [c] = s,
+ CMP(X)
+ #undef X
+ };
+ static int id0;
+ Blk *b, *s;
+ Ins *i, itmp;
+ int *r, c, fs, o, n, lbl;
+
+ fprintf(f, ".text\n");
+ if (fn->export)
+ fprintf(f, ".globl %s%s\n", gassym, fn->name);
+ fprintf(f,
+ "%s%s:\n"
+ "\tpushq %%rbp\n"
+ "\tmovq %%rsp, %%rbp\n",
+ gassym, fn->name
+ );
+ fs = framesz(fn);
+ if (fs)
+ fprintf(f, "\tsub $%d, %%rsp\n", fs);
+ if (fn->vararg) {
+ o = -176;
+ for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
+ fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
+ for (n=0; n<8; ++n, o+=16)
+ fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
+ }
+ for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
+ if (fn->reg & BIT(*r)) {
+ itmp.arg[0] = TMP(*r);
+ emitf("pushq %L0", &itmp, fn, f);
+ }
+
+ for (lbl=0, b=fn->start; b; b=b->link) {
+ if (lbl || b->npred > 1)
+ fprintf(f, "%sbb%d:\n", gasloc, id0+b->id);
+ for (i=b->ins; i!=&b->ins[b->nins]; i++)
+ emitins(*i, fn, f);
+ lbl = 1;
+ switch (b->jmp.type) {
+ case Jret0:
+ for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
+ if (fn->reg & BIT(*--r)) {
+ itmp.arg[0] = TMP(*r);
+ emitf("popq %L0", &itmp, fn, f);
+ }
+ fprintf(f,
+ "\tleave\n"
+ "\tret\n"
+ );
+ break;
+ case Jjmp:
+ Jmp:
+ if (b->s1 != b->link)
+ fprintf(f, "\tjmp %sbb%d\n",
+ gasloc, id0+b->s1->id);
+ else
+ lbl = 0;
+ break;
+ default:
+ c = b->jmp.type - Jjf;
+ if (0 <= c && c <= NCmp) {
+ if (b->link == b->s2) {
+ s = b->s1;
+ b->s1 = b->s2;
+ b->s2 = s;
+ } else
+ c = cmpneg(c);
+ fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
+ gasloc, id0+b->s2->id);
+ goto Jmp;
+ }
+ die("unhandled jump %d", b->jmp.type);
+ }
+ }
+ id0 += fn->nblk;
+}
diff --git a/amd64/isel.c b/amd64/isel.c
@@ -0,0 +1,603 @@
+#include "all.h"
+#include <limits.h>
+
+/* For x86_64, do the following:
+ *
+ * - check that constants are used only in
+ * places allowed
+ * - ensure immediates always fit in 32b
+ * - expose machine register contraints
+ * on instructions like division.
+ * - implement fast locals (the streak of
+ * constant allocX in the first basic block)
+ * - recognize complex addressing modes
+ *
+ * Invariant: the use counts that are used
+ * in sel() must be sound. This
+ * is not so trivial, maybe the
+ * dce should be moved out...
+ */
+
+typedef struct ANum ANum;
+
+struct ANum {
+ char n, l, r;
+ Ins *i;
+};
+
+static void amatch(Addr *, Ref, ANum *, Fn *, int);
+
+static int
+noimm(Ref r, Fn *fn)
+{
+ int64_t val;
+
+ if (rtype(r) != RCon)
+ return 0;
+ switch (fn->con[r.val].type) {
+ case CAddr:
+ /* we only support the 'small'
+ * code model of the ABI, this
+ * means that we can always
+ * address data with 32bits
+ */
+ return 0;
+ case CBits:
+ val = fn->con[r.val].bits.i;
+ return (val < INT32_MIN || val > INT32_MAX);
+ default:
+ die("invalid constant");
+ }
+}
+
+static int
+rslot(Ref r, Fn *fn)
+{
+ if (rtype(r) != RTmp)
+ return -1;
+ return fn->tmp[r.val].slot;
+}
+
+static void
+fixarg(Ref *r, int k, int cpy, Fn *fn)
+{
+ Addr a, *m;
+ Ref r0, r1;
+ int s, n;
+
+ r1 = r0 = *r;
+ s = rslot(r0, fn);
+ if (KBASE(k) == 1 && rtype(r0) == RCon) {
+ /* load floating points from memory
+ * slots, they can't be used as
+ * immediates
+ */
+ r1 = MEM(fn->nmem);
+ vgrow(&fn->mem, ++fn->nmem);
+ memset(&a, 0, sizeof a);
+ a.offset.type = CAddr;
+ a.offset.local = 1;
+ n = gasstashfp(fn->con[r0.val].bits.i, KWIDE(k));
+ sprintf(a.offset.label, "fp%d", n);
+ fn->mem[fn->nmem-1] = a;
+ }
+ else if (!cpy && k == Kl && noimm(r0, fn)) {
+ /* load constants that do not fit in
+ * a 32bit signed integer into a
+ * long temporary
+ */
+ r1 = newtmp("isel", Kl, fn);
+ emit(Ocopy, Kl, r1, r0, R);
+ }
+ else if (s != -1) {
+ /* load fast locals' addresses into
+ * temporaries right before the
+ * instruction
+ */
+ r1 = newtmp("isel", Kl, fn);
+ emit(Oaddr, Kl, r1, SLOT(s), R);
+ }
+ else if (rtype(r0) == RMem) {
+ /* apple asm fix */
+ m = &fn->mem[r0.val];
+ if (req(m->base, R)) {
+ n = fn->ncon;
+ vgrow(&fn->con, ++fn->ncon);
+ fn->con[n] = m->offset;
+ m->offset.type = CUndef;
+ r0 = newtmp("isel", Kl, fn);
+ emit(Oaddr, Kl, r0, CON(n), R);
+ m->base = r0;
+ }
+ }
+ *r = r1;
+}
+
+static void
+seladdr(Ref *r, ANum *an, Fn *fn)
+{
+ Addr a;
+ Ref r0;
+
+ r0 = *r;
+ if (rtype(r0) == RTmp) {
+ amatch(&a, r0, an, fn, 1);
+ if (req(a.base, r0))
+ return;
+ if (a.offset.type == CAddr)
+ if (!req(a.base, R)) {
+ /* apple asm fix */
+ if (!req(a.index, R))
+ return;
+ else {
+ a.index = a.base;
+ a.scale = 1;
+ a.base = R;
+ }
+ }
+ chuse(r0, -1, fn);
+ vgrow(&fn->mem, ++fn->nmem);
+ fn->mem[fn->nmem-1] = a;
+ chuse(a.base, +1, fn);
+ chuse(a.index, +1, fn);
+ *r = MEM(fn->nmem-1);
+ }
+}
+
+static int
+selcmp(Ref arg[2], int k, Fn *fn)
+{
+ int swap;
+ Ref r, *iarg;
+
+ swap = rtype(arg[0]) == RCon;
+ if (swap) {
+ r = arg[1];
+ arg[1] = arg[0];
+ arg[0] = r;
+ }
+ emit(Oxcmp, k, R, arg[1], arg[0]);
+ iarg = curi->arg;
+ if (rtype(arg[0]) == RCon) {
+ assert(k == Kl);
+ iarg[1] = newtmp("isel", k, fn);
+ emit(Ocopy, k, iarg[1], arg[0], R);
+ }
+ fixarg(&iarg[0], k, 0, fn);
+ fixarg(&iarg[1], k, 0, fn);
+ return swap;
+}
+
+static void
+sel(Ins i, ANum *an, Fn *fn)
+{
+ Ref r0, r1, *iarg;
+ int x, k, kc;
+ int64_t sz;
+ Ins *i0, *i1;
+
+ if (rtype(i.to) == RTmp)
+ if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
+ if (fn->tmp[i.to.val].nuse == 0) {
+ chuse(i.arg[0], -1, fn);
+ chuse(i.arg[1], -1, fn);
+ return;
+ }
+ i0 = curi;
+ k = i.cls;
+ switch (i.op) {
+ case Odiv:
+ case Orem:
+ case Oudiv:
+ case Ourem:
+ if (i.op == Odiv || i.op == Oudiv)
+ r0 = TMP(RAX), r1 = TMP(RDX);
+ else
+ r0 = TMP(RDX), r1 = TMP(RAX);
+ emit(Ocopy, k, i.to, r0, R);
+ emit(Ocopy, k, R, r1, R);
+ if (rtype(i.arg[1]) == RCon) {
+ /* immediates not allowed for
+ * divisions in x86
+ */
+ r0 = newtmp("isel", k, fn);
+ } else
+ r0 = i.arg[1];
+ if (fn->tmp[r0.val].slot != -1)
+ err("unlikely argument %%%s in %s",
+ fn->tmp[r0.val].name, optab[i.op].name);
+ if (i.op == Odiv || i.op == Orem) {
+ emit(Oxidiv, k, R, r0, R);
+ emit(Osign, k, TMP(RDX), TMP(RAX), R);
+ } else {
+ emit(Oxdiv, k, R, r0, R);
+ emit(Ocopy, k, TMP(RDX), CON_Z, R);
+ }
+ emit(Ocopy, k, TMP(RAX), i.arg[0], R);
+ fixarg(&curi->arg[0], k, 0, fn);
+ if (rtype(i.arg[1]) == RCon)
+ emit(Ocopy, k, r0, i.arg[1], R);
+ break;
+ case Osar:
+ case Oshr:
+ case Oshl:
+ if (rtype(i.arg[1]) == RCon)
+ goto Emit;
+ r0 = i.arg[1];
+ i.arg[1] = TMP(RCX);
+ emit(Ocopy, Kw, R, TMP(RCX), R);
+ emiti(i);
+ emit(Ocopy, Kw, TMP(RCX), r0, R);
+ break;
+ case Onop:
+ break;
+ case Ostored:
+ case Ostores:
+ case Ostorel:
+ case Ostorew:
+ case Ostoreh:
+ case Ostoreb:
+ if (rtype(i.arg[0]) == RCon) {
+ if (i.op == Ostored)
+ i.op = Ostorel;
+ if (i.op == Ostores)
+ i.op = Ostorew;
+ }
+ seladdr(&i.arg[1], an, fn);
+ goto Emit;
+ case_Oload:
+ seladdr(&i.arg[0], an, fn);
+ goto Emit;
+ case Ocall:
+ case Osalloc:
+ case Ocopy:
+ case Oadd:
+ case Osub:
+ case Omul:
+ case Oand:
+ case Oor:
+ case Oxor:
+ case Oxtest:
+ case Ostosi:
+ case Odtosi:
+ case Oswtof:
+ case Osltof:
+ case Oexts:
+ case Otruncd:
+ case Ocast:
+ case_OExt:
+Emit:
+ emiti(i);
+ iarg = curi->arg; /* fixarg() can change curi */
+ fixarg(&iarg[0], argcls(&i, 0), 0, fn);
+ fixarg(&iarg[1], argcls(&i, 1), 0, fn);
+ break;
+ case Oalloc:
+ case Oalloc+1:
+ case Oalloc+2: /* == Oalloc1 */
+ /* we need to make sure
+ * the stack remains aligned
+ * (rsp = 0) mod 16
+ */
+ if (rtype(i.arg[0]) == RCon) {
+ sz = fn->con[i.arg[0].val].bits.i;
+ if (sz < 0 || sz >= INT_MAX-15)
+ err("invalid alloc size %"PRId64, sz);
+ sz = (sz + 15) & -16;
+ emit(Osalloc, Kl, i.to, getcon(sz, fn), R);
+ } else {
+ /* r0 = (i.arg[0] + 15) & -16 */
+ r0 = newtmp("isel", Kl, fn);
+ r1 = newtmp("isel", Kl, fn);
+ emit(Osalloc, Kl, i.to, r0, R);
+ emit(Oand, Kl, r0, r1, getcon(-16, fn));
+ emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn));
+ if (fn->tmp[i.arg[0].val].slot != -1)
+ err("unlikely argument %%%s in %s",
+ fn->tmp[i.arg[0].val].name, optab[i.op].name);
+ }
+ break;
+ default:
+ if (isext(i.op))
+ goto case_OExt;
+ if (isload(i.op))
+ goto case_Oload;
+ if (iscmp(i.op, &kc, &x)) {
+ emit(Oflag+x, k, i.to, R, R);
+ i1 = curi;
+ if (selcmp(i.arg, kc, fn))
+ i1->op = Oflag + cmpop(x);
+ break;
+ }
+ die("unknown instruction %s", optab[i.op].name);
+ }
+
+ while (i0 > curi && --i0) {
+ assert(rslot(i0->arg[0], fn) == -1);
+ assert(rslot(i0->arg[1], fn) == -1);
+ }
+}
+
+static Ins *
+flagi(Ins *i0, Ins *i)
+{
+ while (i>i0) {
+ i--;
+ if (amd64_op[i->op].zflag)
+ return i;
+ if (amd64_op[i->op].lflag)
+ continue;
+ return 0;
+ }
+ return 0;
+}
+
+static void
+seljmp(Blk *b, Fn *fn)
+{
+ Ref r;
+ int c, k;
+ Ins *fi;
+ Tmp *t;
+
+ if (b->jmp.type == Jret0 || b->jmp.type == Jjmp)
+ return;
+ assert(b->jmp.type == Jjnz);
+ r = b->jmp.arg;
+ t = &fn->tmp[r.val];
+ b->jmp.arg = R;
+ assert(!req(r, R) && rtype(r) != RCon);
+ if (b->s1 == b->s2) {
+ chuse(r, -1, fn);
+ b->jmp.type = Jjmp;
+ b->s2 = 0;
+ return;
+ }
+ fi = flagi(b->ins, &b->ins[b->nins]);
+ if (!fi || !req(fi->to, r)) {
+ selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */
+ b->jmp.type = Jjf + Cine;
+ }
+ else if (iscmp(fi->op, &k, &c)) {
+ if (t->nuse == 1) {
+ if (selcmp(fi->arg, k, fn))
+ c = cmpop(c);
+ *fi = (Ins){.op = Onop};
+ }
+ b->jmp.type = Jjf + c;
+ }
+ else if (fi->op == Oand && t->nuse == 1
+ && (rtype(fi->arg[0]) == RTmp ||
+ rtype(fi->arg[1]) == RTmp)) {
+ fi->op = Oxtest;
+ fi->to = R;
+ b->jmp.type = Jjf + Cine;
+ if (rtype(fi->arg[1]) == RCon) {
+ r = fi->arg[1];
+ fi->arg[1] = fi->arg[0];
+ fi->arg[0] = r;
+ }
+ }
+ else {
+ /* since flags are not tracked in liveness,
+ * the result of the flag-setting instruction
+ * has to be marked as live
+ */
+ if (t->nuse == 1)
+ emit(Ocopy, Kw, R, r, R);
+ b->jmp.type = Jjf + Cine;
+ }
+}
+
+static int
+aref(Ref r, ANum *ai)
+{
+ switch (rtype(r)) {
+ case RCon:
+ return 2;
+ case RTmp:
+ return ai[r.val].n;
+ default:
+ die("constant or temporary expected");
+ }
+}
+
+static int
+ascale(Ref r, Con *con)
+{
+ int64_t n;
+
+ if (rtype(r) != RCon)
+ return 0;
+ if (con[r.val].type != CBits)
+ return 0;
+ n = con[r.val].bits.i;
+ return n == 1 || n == 2 || n == 4 || n == 8;
+}
+
+static void
+anumber(ANum *ai, Blk *b, Con *con)
+{
+ /* This should be made obsolete by a proper
+ * reassoc pass.
+ *
+ * Rules:
+ *
+ * RTmp(_) -> 0 tmp
+ * ( RTmp(_) -> 1 slot )
+ * RCon(_) -> 2 con
+ * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
+ */
+ static char add[10][10] = {
+ [2] [2] = 2, /* folding */
+ [2] [5] = 5, [5] [2] = 5,
+ [2] [6] = 6, [6] [2] = 6,
+ [2] [7] = 7, [7] [2] = 7,
+ [0] [0] = 4, /* 4: b + s * i */
+ [0] [3] = 4, [3] [0] = 4,
+ [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
+ [0] [2] = 6, [2] [0] = 6, /* 6: o + b */
+ [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
+ [0] [5] = 7, [5] [0] = 7,
+ [6] [3] = 7, [3] [6] = 7,
+
+ };
+ int a, a1, a2, n1, n2, t1, t2;
+ Ins *i;
+
+ for (i=b->ins; i-b->ins < b->nins; i++) {
+ if (rtype(i->to) == RTmp)
+ ai[i->to.val].i = i;
+ if (i->op != Oadd && i->op != Omul)
+ continue;
+ a1 = aref(i->arg[0], ai);
+ a2 = aref(i->arg[1], ai);
+ t1 = a1 != 1 && a1 != 2;
+ t2 = a2 != 1 && a2 != 2;
+ if (i->op == Oadd) {
+ a = add[n1 = a1][n2 = a2];
+ if (t1 && a < add[0][a2])
+ a = add[n1 = 0][n2 = a2];
+ if (t2 && a < add[a1][0])
+ a = add[n1 = a1][n2 = 0];
+ if (t1 && t2 && a < add[0][0])
+ a = add[n1 = 0][n2 = 0];
+ } else {
+ n1 = n2 = a = 0;
+ if (ascale(i->arg[0], con) && t2)
+ a = 3, n1 = 2, n2 = 0;
+ if (t1 && ascale(i->arg[1], con))
+ a = 3, n1 = 0, n2 = 2;
+ }
+ ai[i->to.val].n = a;
+ ai[i->to.val].l = n1;
+ ai[i->to.val].r = n2;
+ }
+}
+
+static void
+amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
+{
+ Ins *i;
+ int nl, nr, t, s;
+ Ref al, ar;
+
+ if (top)
+ memset(a, 0, sizeof *a);
+ if (rtype(r) == RCon) {
+ addcon(&a->offset, &fn->con[r.val]);
+ return;
+ }
+ assert(rtype(r) == RTmp);
+ i = ai[r.val].i;
+ nl = ai[r.val].l;
+ nr = ai[r.val].r;
+ if (i) {
+ if (nl > nr) {
+ al = i->arg[1];
+ ar = i->arg[0];
+ t = nl, nl = nr, nr = t;
+ } else {
+ al = i->arg[0];
+ ar = i->arg[1];
+ }
+ }
+ switch (ai[r.val].n) {
+ case 3: /* s * i */
+ if (!top) {
+ a->index = al;
+ a->scale = fn->con[ar.val].bits.i;
+ } else
+ a->base = r;
+ break;
+ case 4: /* b + s * i */
+ switch (nr) {
+ case 0:
+ if (fn->tmp[ar.val].slot != -1) {
+ al = i->arg[1];
+ ar = i->arg[0];
+ }
+ a->index = ar;
+ a->scale = 1;
+ break;
+ case 3:
+ amatch(a, ar, ai, fn, 0);
+ break;
+ }
+ r = al;
+ case 0:
+ s = fn->tmp[r.val].slot;
+ if (s != -1)
+ r = SLOT(s);
+ a->base = r;
+ break;
+ case 2: /* constants */
+ case 5: /* o + s * i */
+ case 6: /* o + b */
+ case 7: /* o + b + s * i */
+ amatch(a, ar, ai, fn, 0);
+ amatch(a, al, ai, fn, 0);
+ break;
+ default:
+ die("unreachable");
+ }
+}
+
+/* instruction selection
+ * requires use counts (as given by parsing)
+ */
+void
+amd64_isel(Fn *fn)
+{
+ Blk *b, **sb;
+ Ins *i;
+ Phi *p;
+ uint a;
+ int n, al;
+ int64_t sz;
+ ANum *ainfo;
+
+ /* assign slots to fast allocs */
+ b = fn->start;
+ /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
+ for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
+ for (i=b->ins; i-b->ins < b->nins; i++)
+ if (i->op == al) {
+ if (rtype(i->arg[0]) != RCon)
+ break;
+ sz = fn->con[i->arg[0].val].bits.i;
+ if (sz < 0 || sz >= INT_MAX-15)
+ err("invalid alloc size %"PRId64, sz);
+ sz = (sz + n-1) & -n;
+ sz /= 4;
+ fn->tmp[i->to.val].slot = fn->slot;
+ fn->slot += sz;
+ *i = (Ins){.op = Onop};
+ }
+
+ /* process basic blocks */
+ n = fn->ntmp;
+ ainfo = emalloc(n * sizeof ainfo[0]);
+ for (b=fn->start; b; b=b->link) {
+ curi = &insb[NIns];
+ for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
+ for (p=(*sb)->phi; p; p=p->link) {
+ for (a=0; p->blk[a] != b; a++)
+ assert(a+1 < p->narg);
+ fixarg(&p->arg[a], p->cls, 1, fn);
+ }
+ memset(ainfo, 0, n * sizeof ainfo[0]);
+ anumber(ainfo, b, fn->con);
+ seljmp(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;)
+ sel(*--i, ainfo, fn);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ }
+ free(ainfo);
+
+ if (debug['I']) {
+ fprintf(stderr, "\n> After instruction selection:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/amd64/sysv.c b/amd64/sysv.c
@@ -0,0 +1,701 @@
+#include "all.h"
+
+typedef struct AClass AClass;
+typedef struct RAlloc RAlloc;
+
+struct AClass {
+ int inmem;
+ int align;
+ uint size;
+ int cls[2];
+ Ref ref[2];
+};
+
+struct RAlloc {
+ Ins i;
+ RAlloc *link;
+};
+
+static void
+classify(AClass *a, Typ *t, int *pn, int *pe)
+{
+ Seg *seg;
+ int n, s, *cls;
+
+ for (n=0; n<t->nunion; n++) {
+ seg = t->seg[n];
+ for (s=0; *pe<2; (*pe)++) {
+ cls = &a->cls[*pe];
+ for (; *pn<8; s++) {
+ switch (seg[s].type) {
+ case SEnd:
+ goto Done;
+ case SPad:
+ /* don't change anything */
+ break;
+ case SFlt:
+ if (*cls == Kx)
+ *cls = Kd;
+ break;
+ case SInt:
+ *cls = Kl;
+ break;
+ case STyp:
+ classify(a, &typ[seg[s].len], pn, pe);
+ continue;
+ }
+ *pn += seg[s].len;
+ }
+ Done:
+ assert(*pn <= 8);
+ *pn = 0;
+ }
+ }
+}
+
+static void
+typclass(AClass *a, Typ *t)
+{
+ int e, n;
+ uint sz, al;
+
+ sz = t->size;
+ al = 1u << t->align;
+
+ /* the ABI requires sizes to be rounded
+ * up to the nearest multiple of 8, moreover
+ * it makes it easy load and store structures
+ * in registers
+ */
+ if (al < 8)
+ al = 8;
+ sz = (sz + al-1) & -al;
+
+ a->size = sz;
+ a->align = t->align;
+
+ if (t->dark || sz > 16 || sz == 0) {
+ /* large or unaligned structures are
+ * required to be passed in memory
+ */
+ a->inmem = 1;
+ return;
+ }
+
+ a->cls[0] = Kx;
+ a->cls[1] = Kx;
+ a->inmem = 0;
+ n = 0;
+ e = 0;
+ classify(a, t, &n, &e);
+}
+
+static int
+retr(Ref reg[2], AClass *aret)
+{
+ static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
+ int n, k, ca, nr[2];
+
+ nr[0] = nr[1] = 0;
+ ca = 0;
+ for (n=0; (uint)n*8<aret->size; n++) {
+ k = KBASE(aret->cls[n]);
+ reg[n] = TMP(retreg[k][nr[k]++]);
+ ca += 1 << (2 * k);
+ }
+ return ca;
+}
+
+static void
+selret(Blk *b, Fn *fn)
+{
+ int j, k, ca;
+ Ref r, r0, reg[2];
+ AClass aret;
+
+ j = b->jmp.type;
+
+ if (!isret(j) || j == Jret0)
+ return;
+
+ r0 = b->jmp.arg;
+ b->jmp.type = Jret0;
+
+ if (j == Jretc) {
+ typclass(&aret, &typ[fn->retty]);
+ if (aret.inmem) {
+ assert(rtype(fn->retr) == RTmp);
+ emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
+ blit(fn->retr, 0, r0, aret.size, fn);
+ ca = 1;
+ } else {
+ ca = retr(reg, &aret);
+ if (aret.size > 8) {
+ r = newtmp("abi", Kl, fn);
+ emit(Oload, Kl, reg[1], r, R);
+ emit(Oadd, Kl, r, r0, getcon(8, fn));
+ }
+ emit(Oload, Kl, reg[0], r0, R);
+ }
+ } else {
+ k = j - Jretw;
+ if (KBASE(k) == 0) {
+ emit(Ocopy, k, TMP(RAX), r0, R);
+ ca = 1;
+ } else {
+ emit(Ocopy, k, TMP(XMM0), r0, R);
+ ca = 1 << 2;
+ }
+ }
+
+ b->jmp.arg = CALL(ca);
+}
+
+static int
+argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
+{
+ int nint, ni, nsse, ns, n, *pn;
+ AClass *a;
+ Ins *i;
+
+ if (aret && aret->inmem)
+ nint = 5; /* hidden argument */
+ else
+ nint = 6;
+ nsse = 8;
+ for (i=i0, a=ac; i<i1; i++, a++)
+ switch (i->op - op + Oarg) {
+ case Oarg:
+ if (KBASE(i->cls) == 0)
+ pn = &nint;
+ else
+ pn = &nsse;
+ if (*pn > 0) {
+ --*pn;
+ a->inmem = 0;
+ } else
+ a->inmem = 2;
+ a->align = 3;
+ a->size = 8;
+ a->cls[0] = i->cls;
+ break;
+ case Oargc:
+ n = i->arg[0].val;
+ typclass(a, &typ[n]);
+ if (a->inmem)
+ continue;
+ ni = ns = 0;
+ for (n=0; (uint)n*8<a->size; n++)
+ if (KBASE(a->cls[n]) == 0)
+ ni++;
+ else
+ ns++;
+ if (nint >= ni && nsse >= ns) {
+ nint -= ni;
+ nsse -= ns;
+ } else
+ a->inmem = 1;
+ break;
+ case Oarge:
+ if (op == Opar)
+ *env = i->to;
+ else
+ *env = i->arg[0];
+ break;
+ }
+
+ return ((6-nint) << 4) | ((8-nsse) << 8);
+}
+
+int amd64_sysv_rsave[] = {
+ RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
+};
+int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
+
+MAKESURE(sysv_arrays_ok,
+ sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
+ sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
+);
+
+/* layout of call's second argument (RCall)
+ *
+ * 29 12 8 4 3 0
+ * |0...00|x|xxxx|xxxx|xx|xx| range
+ * | | | | ` gp regs returned (0..2)
+ * | | | ` sse regs returned (0..2)
+ * | | ` gp regs passed (0..6)
+ * | ` sse regs passed (0..8)
+ * ` 1 if rax is used to pass data (0..1)
+ */
+
+bits
+amd64_sysv_retregs(Ref r, int p[2])
+{
+ bits b;
+ int ni, nf;
+
+ assert(rtype(r) == RCall);
+ b = 0;
+ ni = r.val & 3;
+ nf = (r.val >> 2) & 3;
+ if (ni >= 1)
+ b |= BIT(RAX);
+ if (ni >= 2)
+ b |= BIT(RDX);
+ if (nf >= 1)
+ b |= BIT(XMM0);
+ if (nf >= 2)
+ b |= BIT(XMM1);
+ if (p) {
+ p[0] = ni;
+ p[1] = nf;
+ }
+ return b;
+}
+
+bits
+amd64_sysv_argregs(Ref r, int p[2])
+{
+ bits b;
+ int j, ni, nf, ra;
+
+ assert(rtype(r) == RCall);
+ b = 0;
+ ni = (r.val >> 4) & 15;
+ nf = (r.val >> 8) & 15;
+ ra = (r.val >> 12) & 1;
+ for (j=0; j<ni; j++)
+ b |= BIT(amd64_sysv_rsave[j]);
+ for (j=0; j<nf; j++)
+ b |= BIT(XMM0+j);
+ if (p) {
+ p[0] = ni + ra;
+ p[1] = nf;
+ }
+ return b | (ra ? BIT(RAX) : 0);
+}
+
+static Ref
+rarg(int ty, int *ni, int *ns)
+{
+ if (KBASE(ty) == 0)
+ return TMP(amd64_sysv_rsave[(*ni)++]);
+ else
+ return TMP(XMM0 + (*ns)++);
+}
+
+static void
+selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
+{
+ Ins *i;
+ AClass *ac, *a, aret;
+ int ca, ni, ns, al, varc, envc;
+ uint stk, off;
+ Ref r, r1, r2, reg[2], env;
+ RAlloc *ra;
+
+ env = R;
+ ac = alloc((i1-i0) * sizeof ac[0]);
+
+ if (!req(i1->arg[1], R)) {
+ assert(rtype(i1->arg[1]) == RType);
+ typclass(&aret, &typ[i1->arg[1].val]);
+ ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
+ } else
+ ca = argsclass(i0, i1, ac, Oarg, 0, &env);
+
+ for (stk=0, a=&ac[i1-i0]; a>ac;)
+ if ((--a)->inmem) {
+ if (a->align > 4)
+ err("sysv abi requires alignments of 16 or less");
+ stk += a->size;
+ if (a->align == 4)
+ stk += stk & 15;
+ }
+ stk += stk & 15;
+ if (stk) {
+ r = getcon(-(int64_t)stk, fn);
+ emit(Osalloc, Kl, R, r, R);
+ }
+
+ if (!req(i1->arg[1], R)) {
+ if (aret.inmem) {
+ /* get the return location from eax
+ * it saves one callee-save reg */
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ocopy, Kl, i1->to, TMP(RAX), R);
+ ca += 1;
+ } else {
+ if (aret.size > 8) {
+ r = newtmp("abi", Kl, fn);
+ aret.ref[1] = newtmp("abi", aret.cls[1], fn);
+ emit(Ostorel, 0, R, aret.ref[1], r);
+ emit(Oadd, Kl, r, i1->to, getcon(8, fn));
+ }
+ aret.ref[0] = newtmp("abi", aret.cls[0], fn);
+ emit(Ostorel, 0, R, aret.ref[0], i1->to);
+ ca += retr(reg, &aret);
+ if (aret.size > 8)
+ emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
+ emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
+ r1 = i1->to;
+ }
+ /* allocate return pad */
+ ra = alloc(sizeof *ra);
+ /* specific to NAlign == 3 */
+ al = aret.align >= 2 ? aret.align - 2 : 0;
+ ra->i = (Ins){Oalloc+al, r1, {getcon(aret.size, fn)}, Kl};
+ ra->link = (*rap);
+ *rap = ra;
+ } else {
+ ra = 0;
+ if (KBASE(i1->cls) == 0) {
+ emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
+ ca += 1;
+ } else {
+ emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
+ ca += 1 << 2;
+ }
+ }
+ envc = !req(R, env);
+ varc = i1->op == Ovacall;
+ if (varc && envc)
+ err("sysv abi does not support variadic env calls");
+ ca |= (varc | envc) << 12;
+ emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
+ if (envc)
+ emit(Ocopy, Kl, TMP(RAX), env, R);
+ if (varc)
+ emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
+
+ ni = ns = 0;
+ if (ra && aret.inmem)
+ emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (a->inmem)
+ continue;
+ r1 = rarg(a->cls[0], &ni, &ns);
+ if (i->op == Oargc) {
+ if (a->size > 8) {
+ r2 = rarg(a->cls[1], &ni, &ns);
+ r = newtmp("abi", Kl, fn);
+ emit(Oload, a->cls[1], r2, r, R);
+ emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
+ }
+ emit(Oload, a->cls[0], r1, i->arg[1], R);
+ } else
+ emit(Ocopy, i->cls, r1, i->arg[0], R);
+ }
+
+ if (!stk)
+ return;
+
+ r = newtmp("abi", Kl, fn);
+ for (i=i0, a=ac, off=0; i<i1; i++, a++) {
+ if (!a->inmem)
+ continue;
+ if (i->op == Oargc) {
+ if (a->align == 4)
+ off += off & 15;
+ blit(r, off, i->arg[1], a->size, fn);
+ } else {
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorel, 0, R, i->arg[0], r1);
+ emit(Oadd, Kl, r1, r, getcon(off, fn));
+ }
+ off += a->size;
+ }
+ emit(Osalloc, Kl, r, getcon(stk, fn), R);
+}
+
+static int
+selpar(Fn *fn, Ins *i0, Ins *i1)
+{
+ AClass *ac, *a, aret;
+ Ins *i;
+ int ni, ns, s, al, fa;
+ Ref r, env;
+
+ env = R;
+ ac = alloc((i1-i0) * sizeof ac[0]);
+ curi = &insb[NIns];
+ ni = ns = 0;
+
+ if (fn->retty >= 0) {
+ typclass(&aret, &typ[fn->retty]);
+ fa = argsclass(i0, i1, ac, Opar, &aret, &env);
+ } else
+ fa = argsclass(i0, i1, ac, Opar, 0, &env);
+
+ for (i=i0, a=ac; i<i1; i++, a++) {
+ if (i->op != Oparc || a->inmem)
+ continue;
+ if (a->size > 8) {
+ r = newtmp("abi", Kl, fn);
+ a->ref[1] = newtmp("abi", Kl, fn);
+ emit(Ostorel, 0, R, a->ref[1], r);
+ emit(Oadd, Kl, r, i->to, getcon(8, fn));
+ }
+ a->ref[0] = newtmp("abi", Kl, fn);
+ emit(Ostorel, 0, R, a->ref[0], i->to);
+ /* specific to NAlign == 3 */
+ al = a->align >= 2 ? a->align - 2 : 0;
+ emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
+ }
+
+ if (fn->retty >= 0 && aret.inmem) {
+ r = newtmp("abi", Kl, fn);
+ emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
+ fn->retr = r;
+ }
+
+ for (i=i0, a=ac, s=4; i<i1; i++, a++) {
+ switch (a->inmem) {
+ case 1:
+ if (a->align > 4)
+ err("sysv abi requires alignments of 16 or less");
+ if (a->align == 4)
+ s = (s+3) & -4;
+ fn->tmp[i->to.val].slot = -s;
+ s += a->size / 4;
+ continue;
+ case 2:
+ emit(Oload, i->cls, i->to, SLOT(-s), R);
+ s += 2;
+ continue;
+ }
+ r = rarg(a->cls[0], &ni, &ns);
+ if (i->op == Oparc) {
+ emit(Ocopy, Kl, a->ref[0], r, R);
+ if (a->size > 8) {
+ r = rarg(a->cls[1], &ni, &ns);
+ emit(Ocopy, Kl, a->ref[1], r, R);
+ }
+ } else
+ emit(Ocopy, i->cls, i->to, r, R);
+ }
+
+ if (!req(R, env))
+ emit(Ocopy, Kl, env, TMP(RAX), R);
+
+ return fa | (s*4)<<12;
+}
+
+static Blk *
+split(Fn *fn, Blk *b)
+{
+ Blk *bn;
+
+ ++fn->nblk;
+ bn = blknew();
+ bn->nins = &insb[NIns] - curi;
+ idup(&bn->ins, curi, bn->nins);
+ curi = &insb[NIns];
+ bn->visit = ++b->visit;
+ snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
+ bn->loop = b->loop;
+ bn->link = b->link;
+ b->link = bn;
+ return bn;
+}
+
+static void
+chpred(Blk *b, Blk *bp, Blk *bp1)
+{
+ Phi *p;
+ uint a;
+
+ for (p=b->phi; p; p=p->link) {
+ for (a=0; p->blk[a]!=bp; a++)
+ assert(a+1<p->narg);
+ p->blk[a] = bp1;
+ }
+}
+
+static void
+selvaarg(Fn *fn, Blk *b, Ins *i)
+{
+ Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
+ Blk *b0, *bstk, *breg;
+ int isint;
+
+ c4 = getcon(4, fn);
+ c8 = getcon(8, fn);
+ c16 = getcon(16, fn);
+ ap = i->arg[0];
+ isint = KBASE(i->cls) == 0;
+
+ /* @b [...]
+ r0 =l add ap, (0 or 4)
+ nr =l loadsw r0
+ r1 =w cultw nr, (48 or 176)
+ jnz r1, @breg, @bstk
+ @breg
+ r0 =l add ap, 16
+ r1 =l loadl r0
+ lreg =l add r1, nr
+ r0 =w add nr, (8 or 16)
+ r1 =l add ap, (0 or 4)
+ storew r0, r1
+ @bstk
+ r0 =l add ap, 8
+ lstk =l loadl r0
+ r1 =l add lstk, 8
+ storel r1, r0
+ @b0
+ %loc =l phi @breg %lreg, @bstk %lstk
+ i->to =(i->cls) load %loc
+ */
+
+ loc = newtmp("abi", Kl, fn);
+ emit(Oload, i->cls, i->to, loc, R);
+ b0 = split(fn, b);
+ b0->jmp = b->jmp;
+ b0->s1 = b->s1;
+ b0->s2 = b->s2;
+ if (b->s1)
+ chpred(b->s1, b, b0);
+ if (b->s2 && b->s2 != b->s1)
+ chpred(b->s2, b, b0);
+
+ lreg = newtmp("abi", Kl, fn);
+ nr = newtmp("abi", Kl, fn);
+ r0 = newtmp("abi", Kw, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorew, Kw, R, r0, r1);
+ emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
+ emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Oadd, Kl, lreg, r1, nr);
+ emit(Oload, Kl, r1, r0, R);
+ emit(Oadd, Kl, r0, ap, c16);
+ breg = split(fn, b);
+ breg->jmp.type = Jjmp;
+ breg->s1 = b0;
+
+ lstk = newtmp("abi", Kl, fn);
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, r1, r0);
+ emit(Oadd, Kl, r1, lstk, c8);
+ emit(Oload, Kl, lstk, r0, R);
+ emit(Oadd, Kl, r0, ap, c8);
+ bstk = split(fn, b);
+ bstk->jmp.type = Jjmp;
+ bstk->s1 = b0;
+
+ b0->phi = alloc(sizeof *b0->phi);
+ *b0->phi = (Phi){
+ .cls = Kl, .to = loc,
+ .narg = 2,
+ .blk = {bstk, breg},
+ .arg = {lstk, lreg},
+ };
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kw, fn);
+ b->jmp.type = Jjnz;
+ b->jmp.arg = r1;
+ b->s1 = breg;
+ b->s2 = bstk;
+ c = getcon(isint ? 48 : 176, fn);
+ emit(Ocmpw+Ciult, Kw, r1, nr, c);
+ emit(Oloadsw, Kl, nr, r0, R);
+ emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
+}
+
+static void
+selvastart(Fn *fn, int fa, Ref ap)
+{
+ Ref r0, r1;
+ int gp, fp, sp;
+
+ gp = ((fa >> 4) & 15) * 8;
+ fp = 48 + ((fa >> 8) & 15) * 16;
+ sp = fa >> 12;
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, r1, r0);
+ emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
+ emit(Oadd, Kl, r0, ap, getcon(16, fn));
+ r0 = newtmp("abi", Kl, fn);
+ r1 = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, r1, r0);
+ emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
+ emit(Oadd, Kl, r0, ap, getcon(8, fn));
+ r0 = newtmp("abi", Kl, fn);
+ emit(Ostorew, Kw, R, getcon(fp, fn), r0);
+ emit(Oadd, Kl, r0, ap, getcon(4, fn));
+ emit(Ostorew, Kw, R, getcon(gp, fn), ap);
+}
+
+void
+amd64_sysv_abi(Fn *fn)
+{
+ Blk *b;
+ Ins *i, *i0, *ip;
+ RAlloc *ral;
+ int n, fa;
+
+ for (b=fn->start; b; b=b->link)
+ b->visit = 0;
+
+ /* lower parameters */
+ for (b=fn->start, i=b->ins; i-b->ins<b->nins; i++)
+ if (!ispar(i->op))
+ break;
+ fa = selpar(fn, b->ins, i);
+ n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
+ i0 = alloc(n * sizeof(Ins));
+ ip = icpy(ip = i0, curi, &insb[NIns] - curi);
+ ip = icpy(ip, i, &b->ins[b->nins] - i);
+ b->nins = n;
+ b->ins = i0;
+
+ /* lower calls, returns, and vararg instructions */
+ ral = 0;
+ b = fn->start;
+ do {
+ if (!(b = b->link))
+ b = fn->start; /* do it last */
+ if (b->visit)
+ continue;
+ curi = &insb[NIns];
+ selret(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;)
+ switch ((--i)->op) {
+ default:
+ emiti(*i);
+ break;
+ case Ocall:
+ case Ovacall:
+ for (i0=i; i0>b->ins; i0--)
+ if (!isarg((i0-1)->op))
+ break;
+ selcall(fn, i0, i, &ral);
+ i = i0;
+ break;
+ case Ovastart:
+ selvastart(fn, fa, i->arg[0]);
+ break;
+ case Ovaarg:
+ selvaarg(fn, b, i);
+ break;
+ case Oarg:
+ case Oargc:
+ die("unreachable");
+ }
+ if (b == fn->start)
+ for (; ral; ral=ral->link)
+ emiti(ral->i);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ } while (b != fn->start);
+
+ if (debug['A']) {
+ fprintf(stderr, "\n> After ABI lowering:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/amd64/targ.c b/amd64/targ.c
@@ -0,0 +1,30 @@
+#include "all.h"
+
+Amd64Op amd64_op[NOp] = {
+#define O(op, t, x) [O##op] =
+#define X(nm, zf, lf) { nm, zf, lf, },
+ #include "../ops.h"
+};
+
+static int
+amd64_memargs(int op)
+{
+ return amd64_op[op].nmem;
+}
+
+Target T_amd64_sysv = {
+ .gpr0 = RAX,
+ .ngpr = NGPR,
+ .fpr0 = XMM0,
+ .nfpr = NFPR,
+ .rglob = BIT(RBP) | BIT(RSP),
+ .nrglob = 2,
+ .rsave = amd64_sysv_rsave,
+ .nrsave = {NGPS, NFPS},
+ .retregs = amd64_sysv_retregs,
+ .argregs = amd64_sysv_argregs,
+ .memargs = amd64_memargs,
+ .abi = amd64_sysv_abi,
+ .isel = amd64_isel,
+ .emitfn = amd64_emitfn,
+};
diff --git a/cfg.c b/cfg.c
@@ -312,8 +312,8 @@ simpljmp(Fn *fn)
uffind(&b->s1, uf);
if (b->s2)
uffind(&b->s2, uf);
- c = b->jmp.type - Jxjc;
- if (0 <= c && c <= NXICmp)
+ c = b->jmp.type - Jjf;
+ if (0 <= c && c <= NCmp)
if (b->s1 == b->s2) {
b->jmp.type = Jjmp;
b->s2 = 0;
diff --git a/emit.c b/emit.c
@@ -1,696 +0,0 @@
-#include "all.h"
-
-char *locprefix, *symprefix;
-
-enum {
- SLong = 0,
- SWord = 1,
- SShort = 2,
- SByte = 3,
-
- Ki = -1, /* matches Kw and Kl */
- Ka = -2, /* matches all classes */
-};
-
-/* Instruction format strings:
- *
- * if the format string starts with -, the instruction
- * is assumed to be 3-address and is put in 2-address
- * mode using an extra mov if necessary
- *
- * if the format string starts with +, the same as the
- * above applies, but commutativity is also assumed
- *
- * %k is used to set the class of the instruction,
- * it'll expand to "l", "q", "ss", "sd", depending
- * on the instruction class
- * %0 designates the first argument
- * %1 designates the second argument
- * %= designates the result
- *
- * if %k is not used, a prefix to 0, 1, or = must be
- * added, it can be:
- * M - memory reference
- * L - long (64 bits)
- * W - word (32 bits)
- * H - short (16 bits)
- * B - byte (8 bits)
- * S - single precision float
- * D - double precision float
- */
-static struct {
- short op;
- short cls;
- char *asm;
-} omap[] = {
- { Oadd, Ka, "+add%k %1, %=" },
- { Osub, Ka, "-sub%k %1, %=" },
- { Oand, Ki, "+and%k %1, %=" },
- { Oor, Ki, "+or%k %1, %=" },
- { Oxor, Ki, "+xor%k %1, %=" },
- { Osar, Ki, "-sar%k %B1, %=" },
- { Oshr, Ki, "-shr%k %B1, %=" },
- { Oshl, Ki, "-shl%k %B1, %=" },
- { Omul, Ki, "+imul%k %1, %=" },
- { Omul, Ks, "+mulss %1, %=" },
- { Omul, Kd, "+mulsd %1, %=" },
- { Odiv, Ka, "-div%k %1, %=" },
- { Ostorel, Ka, "movq %L0, %M1" },
- { Ostorew, Ka, "movl %W0, %M1" },
- { Ostoreh, Ka, "movw %H0, %M1" },
- { Ostoreb, Ka, "movb %B0, %M1" },
- { Ostores, Ka, "movss %S0, %M1" },
- { Ostored, Ka, "movsd %D0, %M1" },
- { Oload, Ka, "mov%k %M0, %=" },
- { Oloadsw, Kl, "movslq %M0, %L=" },
- { Oloadsw, Kw, "movl %M0, %W=" },
- { Oloaduw, Ki, "movl %M0, %W=" },
- { Oloadsh, Ki, "movsw%k %M0, %=" },
- { Oloaduh, Ki, "movzw%k %M0, %=" },
- { Oloadsb, Ki, "movsb%k %M0, %=" },
- { Oloadub, Ki, "movzb%k %M0, %=" },
- { Oextsw, Kl, "movslq %W0, %L=" },
- { Oextuw, Kl, "movl %W0, %W=" },
- { Oextsh, Ki, "movsw%k %H0, %=" },
- { Oextuh, Ki, "movzw%k %H0, %=" },
- { Oextsb, Ki, "movsb%k %B0, %=" },
- { Oextub, Ki, "movzb%k %B0, %=" },
-
- { Oexts, Kd, "cvtss2sd %0, %=" },
- { Otruncd, Ks, "cvttsd2ss %0, %=" },
- { Ostosi, Ki, "cvttss2si%k %0, %=" },
- { Odtosi, Ki, "cvttsd2si%k %0, %=" },
- { Oswtof, Ka, "cvtsi2%k %W0, %=" },
- { Osltof, Ka, "cvtsi2%k %L0, %=" },
- { Ocast, Ki, "movq %D0, %L=" },
- { Ocast, Ka, "movq %L0, %D=" },
-
- { Oaddr, Ki, "lea%k %M0, %=" },
- { Oswap, Ki, "xchg%k %0, %1" },
- { Osign, Kl, "cqto" },
- { Osign, Kw, "cltd" },
- { Oxdiv, Ki, "div%k %0" },
- { Oxidiv, Ki, "idiv%k %0" },
- { Oxcmp, Ks, "comiss %S0, %S1" },
- { Oxcmp, Kd, "comisd %D0, %D1" },
- { Oxcmp, Ki, "cmp%k %0, %1" },
- { Oxtest, Ki, "test%k %0, %1" },
- { Oxset+ICule, Ki, "setbe %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICult, Ki, "setb %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICsle, Ki, "setle %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICslt, Ki, "setl %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICsgt, Ki, "setg %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICsge, Ki, "setge %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICugt, Ki, "seta %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICuge, Ki, "setae %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICeq, Ki, "setz %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICne, Ki, "setnz %B=\n\tmovzb%k %B=, %=" },
- { Oxset+ICxnp, Ki, "setnp %B=\n\tmovsb%k %B=, %=" },
- { Oxset+ICxp, Ki, "setp %B=\n\tmovsb%k %B=, %=" },
- { NOp, 0, 0 }
-};
-
-static char *rname[][4] = {
- [RAX] = {"rax", "eax", "ax", "al"},
- [RBX] = {"rbx", "ebx", "bx", "bl"},
- [RCX] = {"rcx", "ecx", "cx", "cl"},
- [RDX] = {"rdx", "edx", "dx", "dl"},
- [RSI] = {"rsi", "esi", "si", "sil"},
- [RDI] = {"rdi", "edi", "di", "dil"},
- [RBP] = {"rbp", "ebp", "bp", "bpl"},
- [RSP] = {"rsp", "esp", "sp", "spl"},
- [R8 ] = {"r8" , "r8d", "r8w", "r8b"},
- [R9 ] = {"r9" , "r9d", "r9w", "r9b"},
- [R10] = {"r10", "r10d", "r10w", "r10b"},
- [R11] = {"r11", "r11d", "r11w", "r11b"},
- [R12] = {"r12", "r12d", "r12w", "r12b"},
- [R13] = {"r13", "r13d", "r13w", "r13b"},
- [R14] = {"r14", "r14d", "r14w", "r14b"},
- [R15] = {"r15", "r15d", "r15w", "r15b"},
-};
-
-
-static int
-slot(int s, Fn *fn)
-{
- struct { int i:29; } x;
-
- /* sign extend s using a bitfield */
- x.i = s;
- assert(x.i <= fn->slot);
- /* specific to NAlign == 3 */
- if (x.i < 0)
- return -4 * x.i;
- else if (fn->vararg)
- return -176 + -4 * (fn->slot - x.i);
- else
- return -4 * (fn->slot - x.i);
-}
-
-static void
-emitcon(Con *con, FILE *f)
-{
- switch (con->type) {
- case CAddr:
- if (con->local)
- fprintf(f, "%s%s", locprefix, con->label);
- else
- fprintf(f, "%s%s", symprefix, con->label);
- if (con->bits.i)
- fprintf(f, "%+"PRId64, con->bits.i);
- break;
- case CBits:
- fprintf(f, "%"PRId64, con->bits.i);
- break;
- default:
- die("unreachable");
- }
-}
-
-static char *
-regtoa(int reg, int sz)
-{
- static char buf[6];
-
- if (reg >= XMM0) {
- sprintf(buf, "xmm%d", reg-XMM0);
- return buf;
- } else
- return rname[reg][sz];
-}
-
-static Ref
-getarg(char c, Ins *i)
-{
- switch (c) {
- case '0':
- return i->arg[0];
- case '1':
- return i->arg[1];
- case '=':
- return i->to;
- default:
- die("invalid arg letter %c", c);
- }
-}
-
-static void emitins(Ins, Fn *, FILE *);
-
-static void
-emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
-{
- Ins icp;
-
- icp.op = Ocopy;
- icp.arg[0] = r2;
- icp.to = r1;
- icp.cls = k;
- emitins(icp, fn, f);
-}
-
-static void
-emitf(char *s, Ins *i, Fn *fn, FILE *f)
-{
- static char clstoa[][3] = {"l", "q", "ss", "sd"};
- char c;
- int sz;
- Ref ref;
- Mem *m;
- Con off;
-
- switch (*s) {
- case '+':
- if (req(i->arg[1], i->to)) {
- ref = i->arg[0];
- i->arg[0] = i->arg[1];
- i->arg[1] = ref;
- }
- /* fall through */
- case '-':
- assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
- "cannot convert to 2-address");
- emitcopy(i->to, i->arg[0], i->cls, fn, f);
- s++;
- break;
- }
-
- fputc('\t', f);
-Next:
- while ((c = *s++) != '%')
- if (!c) {
- fputc('\n', f);
- return;
- } else
- fputc(c, f);
- switch ((c = *s++)) {
- case '%':
- fputc('%', f);
- break;
- case 'k':
- fputs(clstoa[i->cls], f);
- break;
- case '0':
- case '1':
- case '=':
- sz = KWIDE(i->cls) ? SLong : SWord;
- s--;
- goto Ref;
- case 'D':
- case 'S':
- sz = SLong; /* does not matter for floats */
- Ref:
- c = *s++;
- ref = getarg(c, i);
- switch (rtype(ref)) {
- case RTmp:
- assert(isreg(ref));
- fprintf(f, "%%%s", regtoa(ref.val, sz));
- break;
- case RSlot:
- fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
- break;
- case RMem:
- Mem:
- m = &fn->mem[ref.val];
- if (rtype(m->base) == RSlot) {
- off.type = CBits;
- off.bits.i = slot(m->base.val, fn);
- addcon(&m->offset, &off);
- m->base = TMP(RBP);
- }
- if (m->offset.type != CUndef)
- emitcon(&m->offset, f);
- fputc('(', f);
- if (req(m->base, R))
- fprintf(f, "%%rip");
- else
- fprintf(f, "%%%s", regtoa(m->base.val, SLong));
- if (!req(m->index, R))
- fprintf(f, ", %%%s, %d",
- regtoa(m->index.val, SLong),
- m->scale
- );
- fputc(')', f);
- break;
- case RCon:
- fputc('$', f);
- emitcon(&fn->con[ref.val], f);
- break;
- default:
- die("unreachable");
- }
- break;
- case 'L':
- sz = SLong;
- goto Ref;
- case 'W':
- sz = SWord;
- goto Ref;
- case 'H':
- sz = SShort;
- goto Ref;
- case 'B':
- sz = SByte;
- goto Ref;
- case 'M':
- c = *s++;
- ref = getarg(c, i);
- switch (rtype(ref)) {
- case RMem:
- goto Mem;
- case RSlot:
- fprintf(f, "%d(%%rbp)", slot(ref.val, fn));
- break;
- case RCon:
- emitcon(&fn->con[ref.val], f);
- fprintf(f, "(%%rip)");
- break;
- case RTmp:
- assert(isreg(ref));
- fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
- break;
- default:
- die("unreachable");
- }
- break;
- default:
- die("invalid format specifier %%%c", c);
- }
- goto Next;
-}
-
-static void
-emitins(Ins i, Fn *fn, FILE *f)
-{
- Ref r;
- int64_t val;
- int o;
-
- switch (i.op) {
- default:
- Table:
- /* most instructions are just pulled out of
- * the table omap[], some special cases are
- * detailed below */
- for (o=0;; o++) {
- /* this linear search should really be a binary
- * search */
- if (omap[o].op == NOp)
- die("no match for %s(%d)", opdesc[i.op].name, i.cls);
- if (omap[o].op == i.op)
- if (omap[o].cls == i.cls
- || (omap[o].cls == Ki && KBASE(i.cls) == 0)
- || (omap[o].cls == Ka))
- break;
- }
- emitf(omap[o].asm, &i, fn, f);
- break;
- case Onop:
- /* just do nothing for nops, they are inserted
- * by some passes */
- break;
- case Omul:
- /* here, we try to use the 3-addresss form
- * of multiplication when possible */
- if (rtype(i.arg[1]) == RCon) {
- r = i.arg[0];
- i.arg[0] = i.arg[1];
- i.arg[1] = r;
- }
- if (KBASE(i.cls) == 0 /* only available for ints */
- && rtype(i.arg[0]) == RCon
- && rtype(i.arg[1]) == RTmp) {
- emitf("imul%k %0, %1, %=", &i, fn, f);
- break;
- }
- goto Table;
- case Osub:
- /* we have to use the negation trick to handle
- * some 3-address substractions */
- if (req(i.to, i.arg[1])) {
- emitf("neg%k %=", &i, fn, f);
- emitf("add%k %0, %=", &i, fn, f);
- break;
- }
- goto Table;
- case Ocopy:
- /* make sure we don't emit useless copies,
- * also, we can use a trick to load 64-bits
- * registers, it's detailed in my note below
- * http://c9x.me/art/notes.html?09/19/2015 */
- if (req(i.to, R) || req(i.arg[0], R))
- break;
- if (isreg(i.to)
- && rtype(i.arg[0]) == RCon
- && i.cls == Kl
- && fn->con[i.arg[0].val].type == CBits
- && (val = fn->con[i.arg[0].val].bits.i) >= 0
- && val <= UINT32_MAX) {
- emitf("movl %W0, %W=", &i, fn, f);
- } else if (isreg(i.to)
- && rtype(i.arg[0]) == RCon
- && fn->con[i.arg[0].val].type == CAddr) {
- emitf("lea%k %M0, %=", &i, fn, f);
- } else if (!req(i.arg[0], i.to))
- emitf("mov%k %0, %=", &i, fn, f);
- break;
- case Ocall:
- /* calls simply have a weird syntax in AT&T
- * assembly... */
- switch (rtype(i.arg[0])) {
- case RCon:
- fprintf(f, "\tcallq ");
- emitcon(&fn->con[i.arg[0].val], f);
- fprintf(f, "\n");
- break;
- case RTmp:
- emitf("callq *%L0", &i, fn, f);
- break;
- default:
- die("invalid call argument");
- }
- break;
- case Osalloc:
- /* there is no good reason why this is here
- * maybe we should split Osalloc in 2 different
- * instructions depending on the result
- */
- emitf("subq %L0, %%rsp", &i, fn, f);
- if (!req(i.to, R))
- emitcopy(i.to, TMP(RSP), Kl, fn, f);
- break;
- case Oswap:
- if (KBASE(i.cls) == 0)
- goto Table;
- /* for floats, there is no swap instruction
- * so we use xmm15 as a temporary
- */
- emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
- emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
- emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
- break;
- }
-}
-
-static int
-cneg(int cmp)
-{
- switch (cmp) {
- default: die("invalid int comparison %d", cmp);
- case ICule: return ICugt;
- case ICult: return ICuge;
- case ICsle: return ICsgt;
- case ICslt: return ICsge;
- case ICsgt: return ICsle;
- case ICsge: return ICslt;
- case ICugt: return ICule;
- case ICuge: return ICult;
- case ICeq: return ICne;
- case ICne: return ICeq;
- case ICxnp: return ICxp;
- case ICxp: return ICxnp;
- }
-}
-
-static int
-framesz(Fn *fn)
-{
- int i, o, f;
-
- /* specific to NAlign == 3 */
- for (i=0, o=0; i<NRClob; i++)
- o ^= 1 & (fn->reg >> rclob[i]);
- f = fn->slot;
- f = (f + 3) & -4;
- return 4*f + 8*o + 176*fn->vararg;
-}
-
-void
-emitfn(Fn *fn, FILE *f)
-{
- static char *ctoa[] = {
- [ICeq] = "z",
- [ICule] = "be",
- [ICult] = "b",
- [ICsle] = "le",
- [ICslt] = "l",
- [ICsgt] = "g",
- [ICsge] = "ge",
- [ICugt] = "a",
- [ICuge] = "ae",
- [ICne] = "nz",
- [ICxnp] = "np",
- [ICxp] = "p"
- };
- static int id0;
- Blk *b, *s;
- Ins *i, itmp;
- int *r, c, fs, o, n, lbl;
-
- fprintf(f, ".text\n");
- if (fn->export)
- fprintf(f, ".globl %s%s\n", symprefix, fn->name);
- fprintf(f,
- "%s%s:\n"
- "\tpushq %%rbp\n"
- "\tmovq %%rsp, %%rbp\n",
- symprefix, fn->name
- );
- fs = framesz(fn);
- if (fs)
- fprintf(f, "\tsub $%d, %%rsp\n", fs);
- if (fn->vararg) {
- o = -176;
- for (r=rsave; r-rsave<6; ++r, o+=8)
- fprintf(f, "\tmovq %%%s, %d(%%rbp)\n", rname[*r][0], o);
- for (n=0; n<8; ++n, o+=16)
- fprintf(f, "\tmovaps %%xmm%d, %d(%%rbp)\n", n, o);
- }
- for (r=rclob; r-rclob < NRClob; r++)
- if (fn->reg & BIT(*r)) {
- itmp.arg[0] = TMP(*r);
- emitf("pushq %L0", &itmp, fn, f);
- }
-
- for (lbl=0, b=fn->start; b; b=b->link) {
- if (lbl || b->npred > 1)
- fprintf(f, "%sbb%d:\n", locprefix, id0+b->id);
- for (i=b->ins; i!=&b->ins[b->nins]; i++)
- emitins(*i, fn, f);
- lbl = 1;
- switch (b->jmp.type) {
- case Jret0:
- for (r=&rclob[NRClob]; r>rclob;)
- if (fn->reg & BIT(*--r)) {
- itmp.arg[0] = TMP(*r);
- emitf("popq %L0", &itmp, fn, f);
- }
- fprintf(f,
- "\tleave\n"
- "\tret\n"
- );
- break;
- case Jjmp:
- Jmp:
- if (b->s1 != b->link)
- fprintf(f, "\tjmp %sbb%d\n",
- locprefix, id0+b->s1->id);
- else
- lbl = 0;
- break;
- default:
- c = b->jmp.type - Jxjc;
- if (0 <= c && c <= NXICmp) {
- if (b->link == b->s2) {
- s = b->s1;
- b->s1 = b->s2;
- b->s2 = s;
- } else
- c = cneg(c);
- fprintf(f, "\tj%s %sbb%d\n", ctoa[c],
- locprefix, id0+b->s2->id);
- goto Jmp;
- }
- die("unhandled jump %d", b->jmp.type);
- }
- }
- id0 += fn->nblk;
-}
-
-void
-emitdat(Dat *d, FILE *f)
-{
- static int align;
- static char *dtoa[] = {
- [DAlign] = ".align",
- [DB] = "\t.byte",
- [DH] = "\t.value",
- [DW] = "\t.long",
- [DL] = "\t.quad"
- };
-
- switch (d->type) {
- case DStart:
- align = 0;
- fprintf(f, ".data\n");
- break;
- case DEnd:
- break;
- case DName:
- if (!align)
- fprintf(f, ".align 8\n");
- if (d->export)
- fprintf(f, ".globl %s%s\n", symprefix, d->u.str);
- fprintf(f, "%s%s:\n", symprefix, d->u.str);
- break;
- case DZ:
- fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
- break;
- default:
- if (d->type == DAlign)
- align = 1;
-
- if (d->isstr) {
- if (d->type != DB)
- err("strings only supported for 'b' currently");
- fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
- }
- else if (d->isref) {
- fprintf(f, "%s %s%+"PRId64"\n",
- dtoa[d->type], d->u.ref.nam,
- d->u.ref.off);
- }
- else {
- fprintf(f, "%s %"PRId64"\n",
- dtoa[d->type], d->u.num);
- }
- break;
- }
-}
-
-typedef struct FBits FBits;
-
-struct FBits {
- union {
- int64_t n;
- float f;
- double d;
- } bits;
- int wide;
- FBits *link;
-};
-
-static FBits *stash;
-
-int
-stashfp(int64_t n, int w)
-{
- FBits **pb, *b;
- int i;
-
- /* does a dumb de-dup of fp constants
- * this should be the linker's job */
- for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
- if (n == b->bits.n && w == b->wide)
- return i;
- b = emalloc(sizeof *b);
- b->bits.n = n;
- b->wide = w;
- b->link = 0;
- *pb = b;
- return i;
-}
-
-void
-emitfin(FILE *f)
-{
- FBits *b;
- int i;
-
- if (!stash)
- return;
- fprintf(f, "/* floating point constants */\n");
- fprintf(f, ".data\n.align 8\n");
- for (b=stash, i=0; b; b=b->link, i++)
- if (b->wide)
- fprintf(f,
- "%sfp%d:\n"
- "\t.quad %"PRId64
- " /* %f */\n",
- locprefix, i, b->bits.n,
- b->bits.d
- );
- for (b=stash, i=0; b; b=b->link, i++)
- if (!b->wide)
- fprintf(f,
- "%sfp%d:\n"
- "\t.long %"PRId64
- " /* %lf */\n",
- locprefix, i, b->bits.n & 0xffffffff,
- b->bits.f
- );
- while ((b=stash)) {
- stash = b->link;
- free(b);
- }
-}
diff --git a/fold.c b/fold.c
@@ -100,7 +100,7 @@ visitins(Ins *i, Fn *fn)
if (rtype(i->to) != RTmp)
return;
- if (opdesc[i->op].cfold) {
+ if (optab[i->op].canfold) {
l = latval(i->arg[0]);
if (!req(i->arg[1], R))
r = latval(i->arg[1]);
@@ -114,7 +114,7 @@ visitins(Ins *i, Fn *fn)
v = opfold(i->op, i->cls, &fn->con[l], &fn->con[r], fn);
} else
v = Bot;
- /* fprintf(stderr, "\nvisiting %s (%p)", opdesc[i->op].name, (void *)i); */
+ /* fprintf(stderr, "\nvisiting %s (%p)", optab[i->op].name, (void *)i); */
update(i->to.val, v, fn);
}
@@ -360,7 +360,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
else if (cl->type == CAddr || cr->type == CAddr) {
if (Ocmpl <= op && op <= Ocmpl1)
return 1;
- err("invalid address operand for '%s'", opdesc[op].name);
+ err("invalid address operand for '%s'", optab[op].name);
}
switch (op) {
case Oadd: x = l.u + r.u; break;
@@ -397,42 +397,42 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
} else
op -= Ocmpl - Ocmpw;
switch (op - Ocmpw) {
- case ICule: x = l.u <= r.u; break;
- case ICult: x = l.u < r.u; break;
- case ICsle: x = l.s <= r.s; break;
- case ICslt: x = l.s < r.s; break;
- case ICsgt: x = l.s > r.s; break;
- case ICsge: x = l.s >= r.s; break;
- case ICugt: x = l.u > r.u; break;
- case ICuge: x = l.u >= r.u; break;
- case ICeq: x = l.u == r.u; break;
- case ICne: x = l.u != r.u; break;
+ case Ciule: x = l.u <= r.u; break;
+ case Ciult: x = l.u < r.u; break;
+ case Cisle: x = l.s <= r.s; break;
+ case Cislt: x = l.s < r.s; break;
+ case Cisgt: x = l.s > r.s; break;
+ case Cisge: x = l.s >= r.s; break;
+ case Ciugt: x = l.u > r.u; break;
+ case Ciuge: x = l.u >= r.u; break;
+ case Cieq: x = l.u == r.u; break;
+ case Cine: x = l.u != r.u; break;
default: die("unreachable");
}
}
else if (Ocmps <= op && op <= Ocmps1) {
switch (op - Ocmps) {
- case FCle: x = l.fs <= r.fs; break;
- case FClt: x = l.fs < r.fs; break;
- case FCgt: x = l.fs > r.fs; break;
- case FCge: x = l.fs >= r.fs; break;
- case FCne: x = l.fs != r.fs; break;
- case FCeq: x = l.fs == r.fs; break;
- case FCo: x = l.fs < r.fs || l.fs >= r.fs; break;
- case FCuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
+ case Cfle: x = l.fs <= r.fs; break;
+ case Cflt: x = l.fs < r.fs; break;
+ case Cfgt: x = l.fs > r.fs; break;
+ case Cfge: x = l.fs >= r.fs; break;
+ case Cfne: x = l.fs != r.fs; break;
+ case Cfeq: x = l.fs == r.fs; break;
+ case Cfo: x = l.fs < r.fs || l.fs >= r.fs; break;
+ case Cfuo: x = !(l.fs < r.fs || l.fs >= r.fs); break;
default: die("unreachable");
}
}
else if (Ocmpd <= op && op <= Ocmpd1) {
switch (op - Ocmpd) {
- case FCle: x = l.fd <= r.fd; break;
- case FClt: x = l.fd < r.fd; break;
- case FCgt: x = l.fd > r.fd; break;
- case FCge: x = l.fd >= r.fd; break;
- case FCne: x = l.fd != r.fd; break;
- case FCeq: x = l.fd == r.fd; break;
- case FCo: x = l.fd < r.fd || l.fd >= r.fd; break;
- case FCuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
+ case Cfle: x = l.fd <= r.fd; break;
+ case Cflt: x = l.fd < r.fd; break;
+ case Cfgt: x = l.fd > r.fd; break;
+ case Cfge: x = l.fd >= r.fd; break;
+ case Cfne: x = l.fd != r.fd; break;
+ case Cfeq: x = l.fd == r.fd; break;
+ case Cfo: x = l.fd < r.fd || l.fd >= r.fd; break;
+ case Cfuo: x = !(l.fd < r.fd || l.fd >= r.fd); break;
default: die("unreachable");
}
}
@@ -453,7 +453,7 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
double xd, ld, rd;
if (cl->type != CBits || cr->type != CBits)
- err("invalid address operand for '%s'", opdesc[op].name);
+ err("invalid address operand for '%s'", optab[op].name);
if (w) {
ld = cl->bits.d;
rd = cr->bits.d;
@@ -495,7 +495,7 @@ opfold(int op, int cls, Con *cl, Con *cr, Fn *fn)
if ((op == Odiv || op == Oudiv
|| op == Orem || op == Ourem) && czero(cr, KWIDE(cls)))
- err("null divisor in '%s'", opdesc[op].name);
+ err("null divisor in '%s'", optab[op].name);
if (cls == Kw || cls == Kl) {
if (foldint(&c, op, cls == Kl, cl, cr))
return Bot;
diff --git a/gas.c b/gas.c
@@ -0,0 +1,122 @@
+#include "all.h"
+
+
+char *gasloc, *gassym;
+
+void
+gasemitdat(Dat *d, FILE *f)
+{
+ static int align;
+ static char *dtoa[] = {
+ [DAlign] = ".align",
+ [DB] = "\t.byte",
+ [DH] = "\t.short",
+ [DW] = "\t.int",
+ [DL] = "\t.quad"
+ };
+
+ switch (d->type) {
+ case DStart:
+ align = 0;
+ fprintf(f, ".data\n");
+ break;
+ case DEnd:
+ break;
+ case DName:
+ if (!align)
+ fprintf(f, ".align 8\n");
+ if (d->export)
+ fprintf(f, ".globl %s%s\n", gassym, d->u.str);
+ fprintf(f, "%s%s:\n", gassym, d->u.str);
+ break;
+ case DZ:
+ fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
+ break;
+ default:
+ if (d->type == DAlign)
+ align = 1;
+
+ if (d->isstr) {
+ if (d->type != DB)
+ err("strings only supported for 'b' currently");
+ fprintf(f, "\t.ascii \"%s\"\n", d->u.str);
+ }
+ else if (d->isref) {
+ fprintf(f, "%s %s%+"PRId64"\n",
+ dtoa[d->type], d->u.ref.nam,
+ d->u.ref.off);
+ }
+ else {
+ fprintf(f, "%s %"PRId64"\n",
+ dtoa[d->type], d->u.num);
+ }
+ break;
+ }
+}
+
+typedef struct FBits FBits;
+
+struct FBits {
+ union {
+ int64_t n;
+ float f;
+ double d;
+ } bits;
+ int wide;
+ FBits *link;
+};
+
+static FBits *stash;
+
+int
+gasstashfp(int64_t n, int w)
+{
+ FBits **pb, *b;
+ int i;
+
+ /* does a dumb de-dup of fp constants
+ * this should be the linker's job */
+ for (pb=&stash, i=0; (b=*pb); pb=&b->link, i++)
+ if (n == b->bits.n && w == b->wide)
+ return i;
+ b = emalloc(sizeof *b);
+ b->bits.n = n;
+ b->wide = w;
+ b->link = 0;
+ *pb = b;
+ return i;
+}
+
+void
+gasemitfin(FILE *f)
+{
+ FBits *b;
+ int i;
+
+ if (!stash)
+ return;
+ fprintf(f, "/* floating point constants */\n");
+ fprintf(f, ".data\n.align 8\n");
+ for (b=stash, i=0; b; b=b->link, i++)
+ if (b->wide)
+ fprintf(f,
+ "%sfp%d:\n"
+ "\t.quad %"PRId64
+ " /* %f */\n",
+ gasloc, i, b->bits.n,
+ b->bits.d
+ );
+ for (b=stash, i=0; b; b=b->link, i++)
+ if (!b->wide)
+ fprintf(f,
+ "%sfp%d:\n"
+ "\t.long %"PRId64
+ " /* %lf */\n",
+ gasloc, i, b->bits.n & 0xffffffff,
+ b->bits.f
+ );
+ while ((b=stash)) {
+ stash = b->link;
+ free(b);
+ }
+}
diff --git a/isel.c b/isel.c
@@ -1,649 +0,0 @@
-#include "all.h"
-#include <limits.h>
-
-/* For x86_64, do the following:
- *
- * - check that constants are used only in
- * places allowed
- * - ensure immediates always fit in 32b
- * - expose machine register contraints
- * on instructions like division.
- * - implement fast locals (the streak of
- * constant allocX in the first basic block)
- * - recognize complex addressing modes
- *
- * Invariant: the use counts that are used
- * in sel() must be sound. This
- * is not so trivial, maybe the
- * dce should be moved out...
- */
-
-typedef struct ANum ANum;
-
-struct ANum {
- char n, l, r;
- Ins *i;
-};
-
-static void amatch(Addr *, Ref, ANum *, Fn *, int);
-
-static int
-fcmptoi(int fc)
-{
- switch (fc) {
- default: die("invalid fp comparison %d", fc);
- case FCle: return ICule;
- case FClt: return ICult;
- case FCgt: return ICugt;
- case FCge: return ICuge;
- case FCne: return ICne;
- case FCeq: return ICeq;
- case FCo: return ICxnp;
- case FCuo: return ICxp;
- }
-}
-
-static int
-iscmp(int op, int *pk, int *pc)
-{
- if (Ocmpw <= op && op <= Ocmpw1) {
- *pc = op - Ocmpw;
- *pk = Kw;
- }
- else if (Ocmpl <= op && op <= Ocmpl1) {
- *pc = op - Ocmpl;
- *pk = Kl;
- }
- else if (Ocmps <= op && op <= Ocmps1) {
- *pc = fcmptoi(op - Ocmps);
- *pk = Ks;
- }
- else if (Ocmpd <= op && op <= Ocmpd1) {
- *pc = fcmptoi(op - Ocmpd);
- *pk = Kd;
- }
- else
- return 0;
- return 1;
-}
-
-static int
-noimm(Ref r, Fn *fn)
-{
- int64_t val;
-
- if (rtype(r) != RCon)
- return 0;
- switch (fn->con[r.val].type) {
- case CAddr:
- /* we only support the 'small'
- * code model of the ABI, this
- * means that we can always
- * address data with 32bits
- */
- return 0;
- case CBits:
- val = fn->con[r.val].bits.i;
- return (val < INT32_MIN || val > INT32_MAX);
- default:
- die("invalid constant");
- }
-}
-
-static int
-rslot(Ref r, Fn *fn)
-{
- if (rtype(r) != RTmp)
- return -1;
- return fn->tmp[r.val].slot;
-}
-
-static int
-argcls(Ins *i, int n)
-{
- return opdesc[i->op].argcls[n][i->cls];
-}
-
-static void
-fixarg(Ref *r, int k, int phi, Fn *fn)
-{
- Addr a, *m;
- Ref r0, r1;
- int s, n;
-
- r1 = r0 = *r;
- s = rslot(r0, fn);
- if (KBASE(k) == 1 && rtype(r0) == RCon) {
- /* load floating points from memory
- * slots, they can't be used as
- * immediates
- */
- r1 = MEM(fn->nmem);
- vgrow(&fn->mem, ++fn->nmem);
- memset(&a, 0, sizeof a);
- a.offset.type = CAddr;
- a.offset.local = 1;
- n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
- sprintf(a.offset.label, "fp%d", n);
- fn->mem[fn->nmem-1] = a;
- }
- else if (!phi && k == Kl && noimm(r0, fn)) {
- /* load constants that do not fit in
- * a 32bit signed integer into a
- * long temporary
- */
- r1 = newtmp("isel", Kl, fn);
- emit(Ocopy, Kl, r1, r0, R);
- }
- else if (s != -1) {
- /* load fast locals' addresses into
- * temporaries right before the
- * instruction
- */
- r1 = newtmp("isel", Kl, fn);
- emit(Oaddr, Kl, r1, SLOT(s), R);
- }
- else if (rtype(r0) == RMem) {
- /* apple asm fix */
- m = &fn->mem[r0.val];
- if (req(m->base, R)) {
- n = fn->ncon;
- vgrow(&fn->con, ++fn->ncon);
- fn->con[n] = m->offset;
- m->offset.type = CUndef;
- r0 = newtmp("isel", Kl, fn);
- emit(Oaddr, Kl, r0, CON(n), R);
- m->base = r0;
- }
- }
- *r = r1;
-}
-
-static void
-seladdr(Ref *r, ANum *an, Fn *fn)
-{
- Addr a;
- Ref r0;
-
- r0 = *r;
- if (rtype(r0) == RTmp) {
- amatch(&a, r0, an, fn, 1);
- if (req(a.base, r0))
- return;
- if (a.offset.type == CAddr)
- if (!req(a.base, R)) {
- /* apple asm fix */
- if (!req(a.index, R))
- return;
- else {
- a.index = a.base;
- a.scale = 1;
- a.base = R;
- }
- }
- chuse(r0, -1, fn);
- vgrow(&fn->mem, ++fn->nmem);
- fn->mem[fn->nmem-1] = a;
- chuse(a.base, +1, fn);
- chuse(a.index, +1, fn);
- *r = MEM(fn->nmem-1);
- }
-}
-
-static int
-selcmp(Ref arg[2], int k, Fn *fn)
-{
- int swap;
- Ref r, *iarg;
-
- swap = rtype(arg[0]) == RCon;
- if (swap) {
- r = arg[1];
- arg[1] = arg[0];
- arg[0] = r;
- }
- emit(Oxcmp, k, R, arg[1], arg[0]);
- iarg = curi->arg;
- if (rtype(arg[0]) == RCon) {
- assert(k == Kl);
- iarg[1] = newtmp("isel", k, fn);
- emit(Ocopy, k, iarg[1], arg[0], R);
- }
- fixarg(&iarg[0], k, 0, fn);
- fixarg(&iarg[1], k, 0, fn);
- return swap;
-}
-
-static void
-sel(Ins i, ANum *an, Fn *fn)
-{
- Ref r0, r1, *iarg;
- int x, k, kc;
- int64_t sz;
- Ins *i0, *i1;
-
- if (rtype(i.to) == RTmp)
- if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
- if (fn->tmp[i.to.val].nuse == 0) {
- chuse(i.arg[0], -1, fn);
- chuse(i.arg[1], -1, fn);
- return;
- }
- i0 = curi;
- k = i.cls;
- switch (i.op) {
- case Odiv:
- case Orem:
- case Oudiv:
- case Ourem:
- if (i.op == Odiv || i.op == Oudiv)
- r0 = TMP(RAX), r1 = TMP(RDX);
- else
- r0 = TMP(RDX), r1 = TMP(RAX);
- emit(Ocopy, k, i.to, r0, R);
- emit(Ocopy, k, R, r1, R);
- if (rtype(i.arg[1]) == RCon) {
- /* immediates not allowed for
- * divisions in x86
- */
- r0 = newtmp("isel", k, fn);
- } else
- r0 = i.arg[1];
- if (fn->tmp[r0.val].slot != -1)
- err("unlikely argument %%%s in %s",
- fn->tmp[r0.val].name, opdesc[i.op].name);
- if (i.op == Odiv || i.op == Orem) {
- emit(Oxidiv, k, R, r0, R);
- emit(Osign, k, TMP(RDX), TMP(RAX), R);
- } else {
- emit(Oxdiv, k, R, r0, R);
- emit(Ocopy, k, TMP(RDX), CON_Z, R);
- }
- emit(Ocopy, k, TMP(RAX), i.arg[0], R);
- fixarg(&curi->arg[0], k, 0, fn);
- if (rtype(i.arg[1]) == RCon)
- emit(Ocopy, k, r0, i.arg[1], R);
- break;
- case Osar:
- case Oshr:
- case Oshl:
- if (rtype(i.arg[1]) == RCon)
- goto Emit;
- r0 = i.arg[1];
- i.arg[1] = TMP(RCX);
- emit(Ocopy, Kw, R, TMP(RCX), R);
- emiti(i);
- emit(Ocopy, Kw, TMP(RCX), r0, R);
- break;
- case Onop:
- break;
- case Ostored:
- case Ostores:
- case Ostorel:
- case Ostorew:
- case Ostoreh:
- case Ostoreb:
- if (rtype(i.arg[0]) == RCon) {
- if (i.op == Ostored)
- i.op = Ostorel;
- if (i.op == Ostores)
- i.op = Ostorew;
- }
- seladdr(&i.arg[1], an, fn);
- goto Emit;
- case_Oload:
- seladdr(&i.arg[0], an, fn);
- goto Emit;
- case Ocall:
- case Osalloc:
- case Ocopy:
- case Oadd:
- case Osub:
- case Omul:
- case Oand:
- case Oor:
- case Oxor:
- case Oxtest:
- case Ostosi:
- case Odtosi:
- case Oswtof:
- case Osltof:
- case Oexts:
- case Otruncd:
- case Ocast:
- case_OExt:
-Emit:
- emiti(i);
- iarg = curi->arg; /* fixarg() can change curi */
- fixarg(&iarg[0], argcls(&i, 0), 0, fn);
- fixarg(&iarg[1], argcls(&i, 1), 0, fn);
- break;
- case Oalloc:
- case Oalloc+1:
- case Oalloc+2: /* == Oalloc1 */
- /* we need to make sure
- * the stack remains aligned
- * (rsp = 0) mod 16
- */
- if (rtype(i.arg[0]) == RCon) {
- sz = fn->con[i.arg[0].val].bits.i;
- if (sz < 0 || sz >= INT_MAX-15)
- err("invalid alloc size %"PRId64, sz);
- sz = (sz + 15) & -16;
- emit(Osalloc, Kl, i.to, getcon(sz, fn), R);
- } else {
- /* r0 = (i.arg[0] + 15) & -16 */
- r0 = newtmp("isel", Kl, fn);
- r1 = newtmp("isel", Kl, fn);
- emit(Osalloc, Kl, i.to, r0, R);
- emit(Oand, Kl, r0, r1, getcon(-16, fn));
- emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn));
- if (fn->tmp[i.arg[0].val].slot != -1)
- err("unlikely argument %%%s in %s",
- fn->tmp[i.arg[0].val].name, opdesc[i.op].name);
- }
- break;
- default:
- if (isext(i.op))
- goto case_OExt;
- if (isload(i.op))
- goto case_Oload;
- if (iscmp(i.op, &kc, &x)) {
- emit(Oxset+x, k, i.to, R, R);
- i1 = curi;
- if (selcmp(i.arg, kc, fn))
- i1->op = Oxset + icmpop(x);
- break;
- }
- die("unknown instruction %s", opdesc[i.op].name);
- }
-
- while (i0 > curi && --i0) {
- assert(rslot(i0->arg[0], fn) == -1);
- assert(rslot(i0->arg[1], fn) == -1);
- }
-}
-
-static Ins *
-flagi(Ins *i0, Ins *i)
-{
- while (i>i0) {
- i--;
- if (opdesc[i->op].sflag)
- return i;
- if (opdesc[i->op].lflag)
- continue;
- return 0;
- }
- return 0;
-}
-
-static void
-seljmp(Blk *b, Fn *fn)
-{
- Ref r;
- int c, k;
- Ins *fi;
- Tmp *t;
-
- if (b->jmp.type == Jret0 || b->jmp.type == Jjmp)
- return;
- assert(b->jmp.type == Jjnz);
- r = b->jmp.arg;
- t = &fn->tmp[r.val];
- b->jmp.arg = R;
- assert(!req(r, R) && rtype(r) != RCon);
- if (b->s1 == b->s2) {
- chuse(r, -1, fn);
- b->jmp.type = Jjmp;
- b->s2 = 0;
- return;
- }
- fi = flagi(b->ins, &b->ins[b->nins]);
- if (!fi || !req(fi->to, r)) {
- selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */
- b->jmp.type = Jxjc + ICne;
- }
- else if (iscmp(fi->op, &k, &c)) {
- if (t->nuse == 1) {
- if (selcmp(fi->arg, k, fn))
- c = icmpop(c);
- *fi = (Ins){.op = Onop};
- }
- b->jmp.type = Jxjc + c;
- }
- else if (fi->op == Oand && t->nuse == 1
- && (rtype(fi->arg[0]) == RTmp ||
- rtype(fi->arg[1]) == RTmp)) {
- fi->op = Oxtest;
- fi->to = R;
- b->jmp.type = Jxjc + ICne;
- if (rtype(fi->arg[1]) == RCon) {
- r = fi->arg[1];
- fi->arg[1] = fi->arg[0];
- fi->arg[0] = r;
- }
- }
- else {
- /* since flags are not tracked in liveness,
- * the result of the flag-setting instruction
- * has to be marked as live
- */
- if (t->nuse == 1)
- emit(Ocopy, Kw, R, r, R);
- b->jmp.type = Jxjc + ICne;
- }
-}
-
-static int
-aref(Ref r, ANum *ai)
-{
- switch (rtype(r)) {
- case RCon:
- return 2;
- case RTmp:
- return ai[r.val].n;
- default:
- die("constant or temporary expected");
- }
-}
-
-static int
-ascale(Ref r, Con *con)
-{
- int64_t n;
-
- if (rtype(r) != RCon)
- return 0;
- if (con[r.val].type != CBits)
- return 0;
- n = con[r.val].bits.i;
- return n == 1 || n == 2 || n == 4 || n == 8;
-}
-
-static void
-anumber(ANum *ai, Blk *b, Con *con)
-{
- /* This should be made obsolete by a proper
- * reassoc pass.
- *
- * Rules:
- *
- * RTmp(_) -> 0 tmp
- * ( RTmp(_) -> 1 slot )
- * RCon(_) -> 2 con
- * 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
- */
- static char add[10][10] = {
- [2] [2] = 2, /* folding */
- [2] [5] = 5, [5] [2] = 5,
- [2] [6] = 6, [6] [2] = 6,
- [2] [7] = 7, [7] [2] = 7,
- [0] [0] = 4, /* 4: b + s * i */
- [0] [3] = 4, [3] [0] = 4,
- [2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
- [0] [2] = 6, [2] [0] = 6, /* 6: o + b */
- [2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
- [0] [5] = 7, [5] [0] = 7,
- [6] [3] = 7, [3] [6] = 7,
-
- };
- int a, a1, a2, n1, n2, t1, t2;
- Ins *i;
-
- for (i=b->ins; i-b->ins < b->nins; i++) {
- if (rtype(i->to) == RTmp)
- ai[i->to.val].i = i;
- if (i->op != Oadd && i->op != Omul)
- continue;
- a1 = aref(i->arg[0], ai);
- a2 = aref(i->arg[1], ai);
- t1 = a1 != 1 && a1 != 2;
- t2 = a2 != 1 && a2 != 2;
- if (i->op == Oadd) {
- a = add[n1 = a1][n2 = a2];
- if (t1 && a < add[0][a2])
- a = add[n1 = 0][n2 = a2];
- if (t2 && a < add[a1][0])
- a = add[n1 = a1][n2 = 0];
- if (t1 && t2 && a < add[0][0])
- a = add[n1 = 0][n2 = 0];
- } else {
- n1 = n2 = a = 0;
- if (ascale(i->arg[0], con) && t2)
- a = 3, n1 = 2, n2 = 0;
- if (t1 && ascale(i->arg[1], con))
- a = 3, n1 = 0, n2 = 2;
- }
- ai[i->to.val].n = a;
- ai[i->to.val].l = n1;
- ai[i->to.val].r = n2;
- }
-}
-
-static void
-amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
-{
- Ins *i;
- int nl, nr, t, s;
- Ref al, ar;
-
- if (top)
- memset(a, 0, sizeof *a);
- if (rtype(r) == RCon) {
- addcon(&a->offset, &fn->con[r.val]);
- return;
- }
- assert(rtype(r) == RTmp);
- i = ai[r.val].i;
- nl = ai[r.val].l;
- nr = ai[r.val].r;
- if (i) {
- if (nl > nr) {
- al = i->arg[1];
- ar = i->arg[0];
- t = nl, nl = nr, nr = t;
- } else {
- al = i->arg[0];
- ar = i->arg[1];
- }
- }
- switch (ai[r.val].n) {
- case 3: /* s * i */
- if (!top) {
- a->index = al;
- a->scale = fn->con[ar.val].bits.i;
- } else
- a->base = r;
- break;
- case 4: /* b + s * i */
- switch (nr) {
- case 0:
- if (fn->tmp[ar.val].slot != -1) {
- al = i->arg[1];
- ar = i->arg[0];
- }
- a->index = ar;
- a->scale = 1;
- break;
- case 3:
- amatch(a, ar, ai, fn, 0);
- break;
- }
- r = al;
- case 0:
- s = fn->tmp[r.val].slot;
- if (s != -1)
- r = SLOT(s);
- a->base = r;
- break;
- case 2: /* constants */
- case 5: /* o + s * i */
- case 6: /* o + b */
- case 7: /* o + b + s * i */
- amatch(a, ar, ai, fn, 0);
- amatch(a, al, ai, fn, 0);
- break;
- default:
- die("unreachable");
- }
-}
-
-/* instruction selection
- * requires use counts (as given by parsing)
- */
-void
-isel(Fn *fn)
-{
- Blk *b, **sb;
- Ins *i;
- Phi *p;
- uint a;
- int n, al;
- int64_t sz;
- ANum *ainfo;
-
- /* assign slots to fast allocs */
- b = fn->start;
- /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
- for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
- for (i=b->ins; i-b->ins < b->nins; i++)
- if (i->op == al) {
- if (rtype(i->arg[0]) != RCon)
- break;
- sz = fn->con[i->arg[0].val].bits.i;
- if (sz < 0 || sz >= INT_MAX-15)
- err("invalid alloc size %"PRId64, sz);
- sz = (sz + n-1) & -n;
- sz /= 4;
- fn->tmp[i->to.val].slot = fn->slot;
- fn->slot += sz;
- *i = (Ins){.op = Onop};
- }
-
- /* process basic blocks */
- n = fn->ntmp;
- ainfo = emalloc(n * sizeof ainfo[0]);
- for (b=fn->start; b; b=b->link) {
- curi = &insb[NIns];
- for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
- for (p=(*sb)->phi; p; p=p->link) {
- for (a=0; p->blk[a] != b; a++)
- assert(a+1 < p->narg);
- fixarg(&p->arg[a], p->cls, 1, fn);
- }
- memset(ainfo, 0, n * sizeof ainfo[0]);
- anumber(ainfo, b, fn->con);
- seljmp(b, fn);
- for (i=&b->ins[b->nins]; i!=b->ins;)
- sel(*--i, ainfo, fn);
- b->nins = &insb[NIns] - curi;
- idup(&b->ins, curi, b->nins);
- }
- free(ainfo);
-
- if (debug['I']) {
- fprintf(stderr, "\n> After instruction selection:\n");
- printfn(fn, stderr);
- }
-}
diff --git a/live.c b/live.c
@@ -104,31 +104,39 @@ Again:
memset(phi, 0, f->ntmp * sizeof phi[0]);
memset(nlv, 0, sizeof nlv);
- b->out->t[0] |= RGLOB;
+ b->out->t[0] |= T.rglob;
bscopy(b->in, b->out);
for (t=0; bsiter(b->in, &t); t++) {
phifix(t, phi, f->tmp);
nlv[KBASE(f->tmp[t].cls)]++;
}
if (rtype(b->jmp.arg) == RCall) {
- assert(bscount(b->in) == NRGlob && nlv[0] == NRGlob && nlv[1] == 0);
- b->in->t[0] |= retregs(b->jmp.arg, nlv);
+ assert((int)bscount(b->in) == T.nrglob &&
+ nlv[0] == T.nrglob &&
+ nlv[1] == 0);
+ b->in->t[0] |= T.retregs(b->jmp.arg, nlv);
} else
bset(b->jmp.arg, b, nlv, phi, f->tmp);
for (k=0; k<2; k++)
b->nlive[k] = nlv[k];
for (i=&b->ins[b->nins]; i!=b->ins;) {
if ((--i)->op == Ocall && rtype(i->arg[1]) == RCall) {
- b->in->t[0] &= ~retregs(i->arg[1], m);
- for (k=0; k<2; k++)
+ b->in->t[0] &= ~T.retregs(i->arg[1], m);
+ for (k=0; k<2; k++) {
nlv[k] -= m[k];
- if (nlv[0] + NISave > b->nlive[0])
- b->nlive[0] = nlv[0] + NISave;
- if (nlv[1] + NFSave > b->nlive[1])
- b->nlive[1] = nlv[1] + NFSave;
- b->in->t[0] |= argregs(i->arg[1], m);
- for (k=0; k<2; k++)
+ /* caller-save registers are used
+ * by the callee, in that sense,
+ * right in the middle of the call,
+ * they are live: */
+ nlv[k] += T.nrsave[k];
+ if (nlv[k] > b->nlive[k])
+ b->nlive[k] = nlv[k];
+ }
+ b->in->t[0] |= T.argregs(i->arg[1], m);
+ for (k=0; k<2; k++) {
+ nlv[k] -= T.nrsave[k];
nlv[k] += m[k];
+ }
}
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
diff --git a/main.c b/main.c
@@ -3,6 +3,18 @@
#include <ctype.h>
#include <getopt.h>
+Target T;
+
+extern Target T_amd64_sysv;
+
+static struct TMap {
+ char *name;
+ Target *T;
+} tmap[] = {
+ { "amd64_sysv", &T_amd64_sysv },
+ { 0, 0 }
+};
+
enum Asm {
Gasmacho,
Gaself,
@@ -33,7 +45,7 @@ data(Dat *d)
fputs("/* end data */\n\n", outf);
freeall();
}
- emitdat(d, outf);
+ gasemitdat(d, outf);
}
static void
@@ -62,10 +74,10 @@ func(Fn *fn)
copy(fn);
filluse(fn);
fold(fn);
- abi(fn);
+ T.abi(fn);
fillpreds(fn);
filluse(fn);
- isel(fn);
+ T.isel(fn);
fillrpo(fn);
filllive(fn);
fillcost(fn);
@@ -83,7 +95,7 @@ func(Fn *fn)
} else
fn->rpo[n]->link = fn->rpo[n+1];
if (!dbg) {
- emitfn(fn, outf);
+ T.emitfn(fn, outf);
fprintf(outf, "/* end function %s */\n\n", fn->name);
} else
fprintf(stderr, "\n");
@@ -93,13 +105,15 @@ func(Fn *fn)
int
main(int ac, char *av[])
{
- FILE *inf;
- char *f;
+ struct TMap *tm;
+ FILE *inf, *hf;
+ char *f, *sep;
int c, asm;
- asm = Defaultasm;
+ asm = Defasm;
+ T = Deftgt;
outf = stdout;
- while ((c = getopt(ac, av, "hd:o:G:")) != -1)
+ while ((c = getopt(ac, av, "hd:o:G:t:")) != -1)
switch (c) {
case 'd':
for (; *optarg; optarg++)
@@ -112,6 +126,18 @@ main(int ac, char *av[])
if (strcmp(optarg, "-") != 0)
outf = fopen(optarg, "w");
break;
+ case 't':
+ for (tm=tmap;; tm++) {
+ if (!tm->name) {
+ fprintf(stderr, "unknown target '%s'\n", optarg);
+ exit(1);
+ }
+ if (strcmp(optarg, tm->name) == 0) {
+ T = *tm->T;
+ break;
+ }
+ }
+ break;
case 'G':
if (strcmp(optarg, "e") == 0)
asm = Gaself;
@@ -124,22 +150,28 @@ main(int ac, char *av[])
break;
case 'h':
default:
- fprintf(stderr, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
- fprintf(stderr, "\t%-10s prints this help\n", "-h");
- fprintf(stderr, "\t%-10s output to file\n", "-o file");
- fprintf(stderr, "\t%-10s generate gas (e) or osx (m) asm\n", "-G {e,m}");
- fprintf(stderr, "\t%-10s dump debug information\n", "-d <flags>");
+ hf = c != 'h' ? stderr : stdout;
+ fprintf(hf, "%s [OPTIONS] {file.ssa, -}\n", av[0]);
+ fprintf(hf, "\t%-11s prints this help\n", "-h");
+ fprintf(hf, "\t%-11s output to file\n", "-o file");
+ fprintf(hf, "\t%-11s generate for a target among:\n", "-t <target>");
+ fprintf(hf, "\t%-11s ", "");
+ for (tm=tmap, sep=""; tm->name; tm++, sep=", ")
+ fprintf(hf, "%s%s", sep, tm->name);
+ fprintf(hf, "\n");
+ fprintf(hf, "\t%-11s generate gas (e) or osx (m) asm\n", "-G {e,m}");
+ fprintf(hf, "\t%-11s dump debug information\n", "-d <flags>");
exit(c != 'h');
}
switch (asm) {
case Gaself:
- locprefix = ".L";
- symprefix = "";
+ gasloc = ".L";
+ gassym = "";
break;
case Gasmacho:
- locprefix = "L";
- symprefix = "_";
+ gasloc = "L";
+ gassym = "_";
break;
}
@@ -159,7 +191,7 @@ main(int ac, char *av[])
} while (++optind < ac);
if (!dbg)
- emitfin(outf);
+ gasemitfin(outf);
exit(0);
}
diff --git a/mem.c b/mem.c
@@ -34,9 +34,9 @@ memopt(Fn *fn)
if (isstore(l->op))
if (req(i->to, l->arg[1]) && !req(i->to, l->arg[0]))
if (s == -1 || s == storesz(l))
- if (k == -1 || k == opdesc[l->op].argcls[0][0]) {
+ if (k == -1 || k == optab[l->op].argcls[0][0]) {
s = storesz(l);
- k = opdesc[l->op].argcls[0][0];
+ k = optab[l->op].argcls[0][0];
continue;
}
goto Skip;
diff --git a/ops.h b/ops.h
@@ -0,0 +1,167 @@
+#ifndef X /* amd64 */
+ #define X(NMemArgs, SetsZeroFlag, LeavesFlags)
+#endif
+
+#define T(a,b,c,d,e,f,g,h) { \
+ {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
+ {[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h} \
+}
+
+
+/*********************/
+/* PUBLIC OPERATIONS */
+/*********************/
+
+/* Arithmetic and Bits */
+O(add, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
+O(sub, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
+O(div, T(w,l,s,d, w,l,s,d), 1) X(0, 0, 0)
+O(rem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(udiv, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(urem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
+O(mul, T(w,l,s,d, w,l,s,d), 1) X(2, 0, 0)
+O(and, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(or, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(xor, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
+O(sar, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+O(shr, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+O(shl, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+
+/* Comparisons */
+O(ceqw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cnew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csgew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csgtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cslew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(csltw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cugew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cugtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(culew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+O(cultw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
+
+O(ceql, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cnel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csgel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csgtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cslel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(csltl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cugel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cugtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(culel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+O(cultl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
+
+O(ceqs, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cges, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cgts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cles, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(clts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cnes, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+O(cuos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
+
+O(ceqd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cged, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cgtd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cled, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cltd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cned, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(cuod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+
+/* Memory */
+O(storeb, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storeh, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storew, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(storel, T(l,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(stores, T(s,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+O(stored, T(d,e,e,e, m,e,e,e), 0) X(0, 0, 1)
+
+O(loadsb, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadub, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadsh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loaduh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loadsw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(loaduw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(load, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 1)
+
+/* Extensions and Truncations */
+O(extsb, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extub, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extsh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extuh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(extsw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
+O(extuw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
+
+O(exts, T(e,e,e,s, e,e,e,x), 1) X(0, 0, 1)
+O(truncd, T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
+O(stosi, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(dtosi, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
+O(swtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
+O(sltof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
+O(cast, T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1)
+
+/* Stack Allocation */
+O(alloc4, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(alloc8, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(alloc16, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+
+/* Variadic Function Helpers */
+O(vaarg, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+O(vastart, T(m,e,e,e, x,e,e,e), 0) X(0, 0, 0)
+
+O(copy, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1)
+
+
+/****************************************/
+/* INTERNAL OPERATIONS (keep nop first) */
+/****************************************/
+
+/* Miscellaneous and Architecture-Specific Operations */
+O(nop, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1)
+O(addr, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(swap, T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0)
+O(sign, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0)
+O(salloc, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(xidiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
+O(xdiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
+O(xcmp, T(w,l,s,d, w,l,s,d), 0) X(1, 1, 0)
+O(xtest, T(w,l,e,e, w,l,e,e), 0) X(1, 1, 0)
+
+/* Arguments, Parameters, and Calls */
+O(par, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
+O(parc, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(pare, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
+O(arg, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0)
+O(argc, T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0)
+O(arge, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0)
+O(call, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+O(vacall, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+
+/* Flags Setting */
+O(flagieq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagine, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagisle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagislt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiuge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiugt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiule, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagiult, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfeq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagflt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfne, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagfuo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+
+
+#undef T
+#undef X
+#undef O
+
+/*
+| column -t -o ' '
+*/
diff --git a/parse.c b/parse.c
@@ -4,91 +4,13 @@
enum {
Ke = -2, /* Erroneous mode */
- Km = Kl, /* Memory pointer (for x64) */
+ Km = Kl, /* Memory pointer */
};
-OpDesc opdesc[NOp] = {
-#define A(a,b,c,d) {[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}
-
- /* NAME NM ARGCLS0 ARGCLS1 SF LF FLD*/
- [Oadd] = { "add", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 },
- [Osub] = { "sub", 2, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 1 },
- [Odiv] = { "div", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 },
- [Orem] = { "rem", 2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
- [Oudiv] = { "udiv", 2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
- [Ourem] = { "urem", 2, {A(w,l,e,e), A(w,l,e,e)}, 0, 0, 1 },
- [Omul] = { "mul", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 1 },
- [Oand] = { "and", 2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
- [Oor] = { "or", 2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
- [Oxor] = { "xor", 2, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 1 },
- [Osar] = { "sar", 1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
- [Oshr] = { "shr", 1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
- [Oshl] = { "shl", 1, {A(w,l,e,e), A(w,w,e,e)}, 1, 0, 1 },
- [Ostored] = { "stored", 0, {A(d,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
- [Ostores] = { "stores", 0, {A(s,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
- [Ostorel] = { "storel", 0, {A(l,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
- [Ostorew] = { "storew", 0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
- [Ostoreh] = { "storeh", 0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
- [Ostoreb] = { "storeb", 0, {A(w,e,e,e), A(m,e,e,e)}, 0, 1, 0 },
- [Oload] = { "load", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 1, 0 },
- [Oloadsw] = { "loadsw", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
- [Oloaduw] = { "loaduw", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
- [Oloadsh] = { "loadsh", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
- [Oloaduh] = { "loaduh", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
- [Oloadsb] = { "loadsb", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
- [Oloadub] = { "loadub", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
- [Oextsw] = { "extsw", 0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 },
- [Oextuw] = { "extuw", 0, {A(e,w,e,e), A(e,x,e,e)}, 0, 1, 1 },
- [Oextsh] = { "extsh", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
- [Oextuh] = { "extuh", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
- [Oextsb] = { "extsb", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
- [Oextub] = { "extub", 0, {A(w,w,e,e), A(x,x,e,e)}, 0, 1, 1 },
- [Oexts] = { "exts", 0, {A(e,e,e,s), A(e,e,e,x)}, 0, 1, 1 },
- [Otruncd] = { "truncd", 0, {A(e,e,d,e), A(e,e,x,e)}, 0, 1, 1 },
- [Ostosi] = { "stosi", 0, {A(s,s,e,e), A(x,x,e,e)}, 0, 1, 1 },
- [Odtosi] = { "dtosi", 0, {A(d,d,e,e), A(x,x,e,e)}, 0, 1, 1 },
- [Oswtof] = { "swtof", 0, {A(e,e,w,w), A(e,e,x,x)}, 0, 1, 1 },
- [Osltof] = { "sltof", 0, {A(e,e,l,l), A(e,e,x,x)}, 0, 1, 1 },
- [Ocast] = { "cast", 0, {A(s,d,w,l), A(x,x,x,x)}, 0, 1, 1 },
- [Ocopy] = { "copy", 1, {A(w,l,s,d), A(x,x,x,x)}, 0, 1, 0 },
- [Onop] = { "nop", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 1, 0 },
- [Oswap] = { "swap", 2, {A(w,l,s,d), A(w,l,s,d)}, 0, 0, 0 },
- [Osign] = { "sign", 0, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
- [Osalloc] = { "salloc", 0, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
- [Oxidiv] = { "xidiv", 1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
- [Oxdiv] = { "xdiv", 1, {A(w,l,e,e), A(x,x,e,e)}, 0, 0, 0 },
- [Oxcmp] = { "xcmp", 1, {A(w,l,s,d), A(w,l,s,d)}, 1, 0, 0 },
- [Oxtest] = { "xtest", 1, {A(w,l,e,e), A(w,l,e,e)}, 1, 0, 0 },
- [Oaddr] = { "addr", 0, {A(m,m,e,e), A(x,x,e,e)}, 0, 1, 0 },
- [Opar] = { "par", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 },
- [Opare] = { "pare", 0, {A(x,x,x,x), A(x,x,x,x)}, 0, 0, 0 },
- [Oparc] = { "parc", 0, {A(e,x,e,e), A(e,x,e,e)}, 0, 0, 0 },
- [Oarg] = { "arg", 0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 },
- [Oarge] = { "arge", 0, {A(w,l,s,d), A(x,x,x,x)}, 0, 0, 0 },
- [Oargc] = { "argc", 0, {A(e,x,e,e), A(e,l,e,e)}, 0, 0, 0 },
- [Ocall] = { "call", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
- [Ovacall] = { "vacall", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
- [Oxsetnp] = { "xsetnp", 0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 },
- [Oxsetp] = { "xsetp", 0, {A(x,x,e,e), A(x,x,e,e)}, 0, 0, 0 },
- [Oalloc] = { "alloc4", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
- [Oalloc+1] = { "alloc8", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
- [Oalloc+2] = { "alloc16", 1, {A(e,l,e,e), A(e,x,e,e)}, 0, 0, 0 },
- [Ovaarg] = { "vaarg", 0, {A(m,m,m,m), A(x,x,x,x)}, 0, 0, 0 },
- [Ovastart] = { "vastart", 0, {A(m,e,e,e), A(x,e,e,e)}, 0, 0, 0 },
-#define X(c) \
- [Ocmpw+IC##c] = { "c" #c "w", 0, {A(w,w,e,e), A(w,w,e,e)}, 1, 0, 1 }, \
- [Ocmpl+IC##c] = { "c" #c "l", 0, {A(l,l,e,e), A(l,l,e,e)}, 1, 0, 1 }, \
- [Oxset+IC##c] = { "xset" #c, 0, {A(x,x,e,e), A(x,x,e,e)}, 0, 1, 0 },
- ICMPS(X)
-#undef X
-#define X(c) \
- [Ocmps+FC##c] = { "c" #c "s", 0, {A(s,s,e,e), A(s,s,e,e)}, 1, 0, 1 }, \
- [Ocmpd+FC##c] = { "c" #c "d", 0, {A(d,d,e,e), A(d,d,e,e)}, 1, 0, 1 },
- FCMPS(X)
-#undef X
-
+Op optab[NOp] = {
+#define O(op, t, cf) [O##op]={#op, t, cf},
+ #include "ops.h"
};
-#undef A
typedef enum {
PXXX,
@@ -242,8 +164,8 @@ lexinit()
if (done)
return;
for (i=0; i<NPubOp; ++i)
- if (opdesc[i].name)
- kwmap[i] = opdesc[i].name;
+ if (optab[i].name)
+ kwmap[i] = optab[i].name;
assert(Ntok <= CHAR_MAX);
for (i=0; i<Ntok; ++i)
if (kwmap[i]) {
@@ -810,26 +732,26 @@ typecheck(Fn *fn)
}
for (i=b->ins; i-b->ins < b->nins; i++)
for (n=0; n<2; n++) {
- k = opdesc[i->op].argcls[n][i->cls];
+ k = optab[i->op].argcls[n][i->cls];
r = i->arg[n];
t = &fn->tmp[r.val];
if (k == Ke)
err("invalid instruction type in %s",
- opdesc[i->op].name);
+ optab[i->op].name);
if (rtype(r) == RType)
continue;
if (rtype(r) != -1 && k == Kx)
err("no %s operand expected in %s",
n == 1 ? "second" : "first",
- opdesc[i->op].name);
+ optab[i->op].name);
if (rtype(r) == -1 && k != Kx)
err("missing %s operand in %s",
n == 1 ? "second" : "first",
- opdesc[i->op].name);
+ optab[i->op].name);
if (!usecheck(r, k, fn))
err("invalid type for %s operand %%%s in %s",
n == 1 ? "second" : "first",
- t->name, opdesc[i->op].name);
+ t->name, optab[i->op].name);
}
r = b->jmp.arg;
if (isret(b->jmp.type)) {
@@ -866,7 +788,10 @@ parsefn(int export)
curf->tmp = vnew(curf->ntmp, sizeof curf->tmp[0], Pfn);
curf->con = vnew(curf->ncon, sizeof curf->con[0], Pfn);
for (i=0; i<Tmp0; ++i)
- newtmp(0, i < XMM0 ? Kl : Kd, curf);
+ if (T.fpr0 <= i && i < T.fpr0 + T.nfpr)
+ newtmp(0, Kd, curf);
+ else
+ newtmp(0, Kl, curf);
curf->con[0].type = CBits;
curf->export = export;
blink = &curf->start;
@@ -1228,29 +1153,12 @@ printref(Ref r, Fn *fn, FILE *f)
void
printfn(Fn *fn, FILE *f)
{
+ static char ktoc[] = "wlsd";
static char *jtoa[NJmp] = {
- [Jret0] = "ret",
- [Jretw] = "retw",
- [Jretl] = "retl",
- [Jretc] = "retc",
- [Jrets] = "rets",
- [Jretd] = "retd",
- [Jjnz] = "jnz",
- [Jxjnp] = "xjnp",
- [Jxjp] = "xjp",
- #define X(c) [Jxjc+IC##c] = "xj" #c,
- ICMPS(X)
+ #define X(j) [J##j] = #j,
+ JMPS(X)
#undef X
};
- static char prcls[NOp] = {
- [Oarg] = 1,
- [Oswap] = 1,
- [Oxcmp] = 1,
- [Oxtest] = 1,
- [Oxdiv] = 1,
- [Oxidiv] = 1,
- };
- static char ktoc[] = "wlsd";
Blk *b;
Phi *p;
Ins *i;
@@ -1282,10 +1190,18 @@ printfn(Fn *fn, FILE *f)
printref(i->to, fn, f);
fprintf(f, " =%c ", ktoc[i->cls]);
}
- assert(opdesc[i->op].name);
- fprintf(f, "%s", opdesc[i->op].name);
- if (req(i->to, R) && prcls[i->op])
- fputc(ktoc[i->cls], f);
+ assert(optab[i->op].name);
+ fprintf(f, "%s", optab[i->op].name);
+ if (req(i->to, R))
+ switch (i->op) {
+ case Oarg:
+ case Oswap:
+ case Oxcmp:
+ case Oxtest:
+ case Oxdiv:
+ case Oxidiv:
+ fputc(ktoc[i->cls], f);
+ }
if (!req(i->arg[0], R)) {
fprintf(f, " ");
printref(i->arg[0], fn, f);
diff --git a/rega.c b/rega.c
@@ -8,8 +8,8 @@
typedef struct RMap RMap;
struct RMap {
- int t[NIReg+NFReg];
- int r[NIReg+NFReg];
+ int t[Tmp0];
+ int r[Tmp0];
BSet b[1];
int n;
};
@@ -78,10 +78,12 @@ static void
radd(RMap *m, int t, int r)
{
assert((t >= Tmp0 || t == r) && "invalid temporary");
- assert(((RAX <= r && r < RAX + NIReg) || (XMM0 <= r && r < XMM0 + NFReg)) && "invalid register");
+ assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr)
+ || (T.fpr0 <= r && r < T.fpr0 + T.nfpr))
+ && "invalid register");
assert(!bshas(m->b, t) && "temporary has mapping");
assert(!bshas(m->b, r) && "register already allocated");
- assert(m->n <= NIReg+NFReg && "too many mappings");
+ assert(m->n <= T.ngpr+T.nfpr && "too many mappings");
bsset(m->b, t);
bsset(m->b, r);
m->t[m->n] = t;
@@ -110,11 +112,11 @@ ralloc(RMap *m, int t)
regs = tmp[phicls(t, tmp)].hint.m;
regs |= m->b->t[0];
if (KBASE(tmp[t].cls) == 0) {
- r0 = RAX;
- r1 = RAX + NIReg;
+ r0 = T.gpr0;
+ r1 = r0 + T.ngpr;
} else {
- r0 = XMM0;
- r1 = XMM0 + NFReg;
+ r0 = T.fpr0;
+ r1 = r0 + T.nfpr;
}
for (r=r0; r<r1; r++)
if (!(regs & BIT(r)))
@@ -135,7 +137,7 @@ rfree(RMap *m, int t)
{
int i, r;
- assert(t >= Tmp0 || !(BIT(t) & RGLOB));
+ assert(t >= Tmp0 || !(BIT(t) & T.rglob));
if (!bshas(m->b, t))
return -1;
for (i=0; m->t[i] != t; i++)
@@ -295,10 +297,10 @@ dopm(Blk *b, Ins *i, RMap *m)
} while (i != b->ins && regcpy(i-1));
assert(m0.n <= m->n);
if (i != b->ins && (i-1)->op == Ocall) {
- def = retregs((i-1)->arg[1], 0);
- for (r=0; r<NRSave; r++)
- if (!(BIT(rsave[r]) & def))
- move(rsave[r], R, m);
+ def = T.retregs((i-1)->arg[1], 0) | T.rglob;
+ for (r=0; T.rsave[r]>=0; r++)
+ if (!(BIT(T.rsave[r]) & def))
+ move(T.rsave[r], R, m);
}
for (npm=0, n=0; n<m->n; n++) {
t = m->t[n];
@@ -370,10 +372,10 @@ doblk(Blk *b, RMap *cur)
for (i=&b->ins[b->nins]; i!=b->ins;) {
switch ((--i)->op) {
case Ocall:
- rs = argregs(i->arg[1], 0);
- for (r=0; r<NRSave; r++)
- if (!(BIT(rsave[r]) & rs))
- rfree(cur, rsave[r]);
+ rs = T.argregs(i->arg[1], 0) | T.rglob;
+ for (r=0; T.rsave[r]>=0; r++)
+ if (!(BIT(T.rsave[r]) & rs))
+ rfree(cur, T.rsave[r]);
break;
case Ocopy:
if (isreg(i->arg[0])) {
@@ -388,7 +390,7 @@ doblk(Blk *b, RMap *cur)
if (!req(i->to, R)) {
assert(rtype(i->to) == RTmp);
r = i->to.val;
- if (r >= Tmp0 || !(BIT(r) & RGLOB))
+ if (r >= Tmp0 || !(BIT(r) & T.rglob))
r = rfree(cur, r);
if (r == -1) {
assert(!isreg(i->to));
diff --git a/spill.c b/spill.c
@@ -196,8 +196,8 @@ limit2(BSet *b1, int k1, int k2, BSet *fst)
bscopy(b2, b1);
bsinter(b1, mask[0]);
bsinter(b2, mask[1]);
- limit(b1, NIReg - k1, fst);
- limit(b2, NFReg - k2, fst);
+ limit(b1, T.ngpr - k1, fst);
+ limit(b2, T.nfpr - k2, fst);
bsunion(b1, b2);
}
@@ -265,11 +265,11 @@ dopm(Blk *b, Ins *i, BSet *v)
} while (i != b->ins && regcpy(i-1));
bscopy(u, v);
if (i != b->ins && (i-1)->op == Ocall) {
- v->t[0] &= ~retregs((i-1)->arg[1], 0);
- limit2(v, NISave, NFSave, 0);
- for (r=0, n=0; n<NRSave; n++)
- r |= BIT(rsave[n]);
- v->t[0] |= argregs((i-1)->arg[1], 0);
+ v->t[0] &= ~T.retregs((i-1)->arg[1], 0);
+ limit2(v, T.nrsave[0], T.nrsave[1], 0);
+ for (n=0, r=0; T.rsave[n]>=0; n++)
+ r |= BIT(T.rsave[n]);
+ v->t[0] |= T.argregs((i-1)->arg[1], 0);
} else {
limit2(v, 0, 0, 0);
r = v->t[0];
@@ -318,9 +318,9 @@ spill(Fn *fn)
slot8 = 0;
for (t=0; t<ntmp; t++) {
k = 0;
- if (t >= XMM0 && t < XMM0 + NFReg)
+ if (t >= T.fpr0 && t < T.fpr0 + T.nfpr)
k = 1;
- else if (t >= Tmp0)
+ if (t >= Tmp0)
k = KBASE(tmp[t].cls);
bsset(mask[k], t);
}
@@ -344,9 +344,9 @@ spill(Fn *fn)
if (hd) {
/* back-edge */
bszero(v);
- hd->gen->t[0] |= RGLOB; /* don't spill registers */
+ hd->gen->t[0] |= T.rglob; /* don't spill registers */
for (k=0; k<2; k++) {
- n = k == 0 ? NIReg : NFReg;
+ n = k == 0 ? T.ngpr : T.nfpr;
bscopy(u, b->out);
bsinter(u, mask[k]);
bscopy(w, u);
@@ -373,7 +373,7 @@ spill(Fn *fn)
} else {
bscopy(v, b->out);
if (rtype(b->jmp.arg) == RCall)
- v->t[0] |= retregs(b->jmp.arg, 0);
+ v->t[0] |= T.retregs(b->jmp.arg, 0);
}
for (t=Tmp0; bsiter(b->out, &t); t++)
if (!bshas(v, t))
@@ -381,7 +381,7 @@ spill(Fn *fn)
bscopy(b->out, v);
/* 2. process the block instructions */
- r = v->t[0] & (BIT(Tmp0)-1);
+ r = v->t[0];
curi = &insb[NIns];
for (i=&b->ins[b->nins]; i!=b->ins;) {
i--;
@@ -402,7 +402,7 @@ spill(Fn *fn)
bsset(w, t);
}
}
- j = opdesc[i->op].nmem;
+ j = T.memargs(i->op);
for (n=0; n<2; n++)
if (rtype(i->arg[n]) == RMem)
j--;
@@ -449,11 +449,11 @@ spill(Fn *fn)
bsclr(v, t);
}
emiti(*i);
- r = v->t[0] & (BIT(Tmp0)-1);
+ r = v->t[0]; /* Tmp0 is NBit */
if (r)
sethint(v, r);
}
- assert(r == RGLOB || b == fn->start);
+ assert(r == T.rglob || b == fn->start);
for (p=b->phi; p; p=p->link) {
assert(rtype(p->to) == RTmp);
diff --git a/sysv.c b/sysv.c
@@ -1,718 +0,0 @@
-#include "all.h"
-
-typedef struct AClass AClass;
-typedef struct RAlloc RAlloc;
-
-struct AClass {
- int inmem;
- int align;
- uint size;
- int cls[2];
- Ref ref[2];
-};
-
-struct RAlloc {
- Ins i;
- RAlloc *link;
-};
-
-static void
-classify(AClass *a, Typ *t, int *pn, int *pe)
-{
- Seg *seg;
- int n, s, *cls;
-
- for (n=0; n<t->nunion; n++) {
- seg = t->seg[n];
- for (s=0; *pe<2; (*pe)++) {
- cls = &a->cls[*pe];
- for (; *pn<8; s++) {
- switch (seg[s].type) {
- case SEnd:
- goto Done;
- case SPad:
- /* don't change anything */
- break;
- case SFlt:
- if (*cls == Kx)
- *cls = Kd;
- break;
- case SInt:
- *cls = Kl;
- break;
- case STyp:
- classify(a, &typ[seg[s].len], pn, pe);
- continue;
- }
- *pn += seg[s].len;
- }
- Done:
- assert(*pn <= 8);
- *pn = 0;
- }
- }
-}
-
-static void
-typclass(AClass *a, Typ *t)
-{
- int e, n;
- uint sz, al;
-
- sz = t->size;
- al = 1u << t->align;
-
- /* the ABI requires sizes to be rounded
- * up to the nearest multiple of 8, moreover
- * it makes it easy load and store structures
- * in registers
- */
- if (al < 8)
- al = 8;
- sz = (sz + al-1) & -al;
-
- a->size = sz;
- a->align = t->align;
-
- if (t->dark || sz > 16 || sz == 0) {
- /* large or unaligned structures are
- * required to be passed in memory
- */
- a->inmem = 1;
- return;
- }
-
- a->cls[0] = Kx;
- a->cls[1] = Kx;
- a->inmem = 0;
- n = 0;
- e = 0;
- classify(a, t, &n, &e);
-}
-
-static void
-blit(Ref rstk, uint soff, Ref rsrc, uint sz, Fn *fn)
-{
- Ref r, r1;
- uint boff;
-
- /* it's an impolite blit, we might go across the end
- * of the source object a little bit... */
- for (boff=0; sz>0; sz-=8, soff+=8, boff+=8) {
- r = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, 0, R, r, r1);
- emit(Oadd, Kl, r1, rstk, getcon(soff, fn));
- r1 = newtmp("abi", Kl, fn);
- emit(Oload, Kl, r, r1, R);
- emit(Oadd, Kl, r1, rsrc, getcon(boff, fn));
- }
-}
-
-static int
-retr(Ref reg[2], AClass *aret)
-{
- static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
- int n, k, ca, nr[2];
-
- nr[0] = nr[1] = 0;
- ca = 0;
- for (n=0; (uint)n*8<aret->size; n++) {
- k = KBASE(aret->cls[n]);
- reg[n] = TMP(retreg[k][nr[k]++]);
- ca += 1 << (2 * k);
- }
- return ca;
-}
-
-static void
-selret(Blk *b, Fn *fn)
-{
- int j, k, ca;
- Ref r, r0, reg[2];
- AClass aret;
-
- j = b->jmp.type;
-
- if (!isret(j) || j == Jret0)
- return;
-
- r0 = b->jmp.arg;
- b->jmp.type = Jret0;
-
- if (j == Jretc) {
- typclass(&aret, &typ[fn->retty]);
- if (aret.inmem) {
- assert(rtype(fn->retr) == RTmp);
- emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
- blit(fn->retr, 0, r0, aret.size, fn);
- ca = 1;
- } else {
- ca = retr(reg, &aret);
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- emit(Oload, Kl, reg[1], r, R);
- emit(Oadd, Kl, r, r0, getcon(8, fn));
- }
- emit(Oload, Kl, reg[0], r0, R);
- }
- } else {
- k = j - Jretw;
- if (KBASE(k) == 0) {
- emit(Ocopy, k, TMP(RAX), r0, R);
- ca = 1;
- } else {
- emit(Ocopy, k, TMP(XMM0), r0, R);
- ca = 1 << 2;
- }
- }
-
- b->jmp.arg = CALL(ca);
-}
-
-static int
-argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
-{
- int nint, ni, nsse, ns, n, *pn;
- AClass *a;
- Ins *i;
-
- if (aret && aret->inmem)
- nint = 5; /* hidden argument */
- else
- nint = 6;
- nsse = 8;
- for (i=i0, a=ac; i<i1; i++, a++)
- switch (i->op - op + Oarg) {
- case Oarg:
- if (KBASE(i->cls) == 0)
- pn = &nint;
- else
- pn = &nsse;
- if (*pn > 0) {
- --*pn;
- a->inmem = 0;
- } else
- a->inmem = 2;
- a->align = 3;
- a->size = 8;
- a->cls[0] = i->cls;
- break;
- case Oargc:
- n = i->arg[0].val;
- typclass(a, &typ[n]);
- if (a->inmem)
- continue;
- ni = ns = 0;
- for (n=0; (uint)n*8<a->size; n++)
- if (KBASE(a->cls[n]) == 0)
- ni++;
- else
- ns++;
- if (nint >= ni && nsse >= ns) {
- nint -= ni;
- nsse -= ns;
- } else
- a->inmem = 1;
- break;
- case Oarge:
- if (op == Opar)
- *env = i->to;
- else
- *env = i->arg[0];
- break;
- }
-
- return ((6-nint) << 4) | ((8-nsse) << 8);
-}
-
-int rsave[] = {
- RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
-};
-int rclob[] = {RBX, R12, R13, R14, R15};
-
-MAKESURE(rsave_has_correct_size, sizeof rsave == NRSave * sizeof(int));
-MAKESURE(rclob_has_correct_size, sizeof rclob == NRClob * sizeof(int));
-
-/* layout of call's second argument (RCall)
- *
- * 29 12 8 4 3 0
- * |0...00|x|xxxx|xxxx|xx|xx| range
- * | | | | ` gp regs returned (0..2)
- * | | | ` sse regs returned (0..2)
- * | | ` gp regs passed (0..6)
- * | ` sse regs passed (0..8)
- * ` 1 if rax is used to pass data (0..1)
- */
-
-bits
-retregs(Ref r, int p[2])
-{
- bits b;
- int ni, nf;
-
- assert(rtype(r) == RCall);
- b = 0;
- ni = r.val & 3;
- nf = (r.val >> 2) & 3;
- if (ni >= 1)
- b |= BIT(RAX);
- if (ni >= 2)
- b |= BIT(RDX);
- if (nf >= 1)
- b |= BIT(XMM0);
- if (nf >= 2)
- b |= BIT(XMM1);
- if (p) {
- p[0] = ni;
- p[1] = nf;
- }
- return b;
-}
-
-bits
-argregs(Ref r, int p[2])
-{
- bits b;
- int j, ni, nf, ra;
-
- assert(rtype(r) == RCall);
- b = 0;
- ni = (r.val >> 4) & 15;
- nf = (r.val >> 8) & 15;
- ra = (r.val >> 12) & 1;
- for (j=0; j<ni; j++)
- b |= BIT(rsave[j]);
- for (j=0; j<nf; j++)
- b |= BIT(XMM0+j);
- if (p) {
- p[0] = ni + ra;
- p[1] = nf;
- }
- return b | (ra ? BIT(RAX) : 0);
-}
-
-static Ref
-rarg(int ty, int *ni, int *ns)
-{
- if (KBASE(ty) == 0)
- return TMP(rsave[(*ni)++]);
- else
- return TMP(XMM0 + (*ns)++);
-}
-
-static void
-selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
-{
- Ins *i;
- AClass *ac, *a, aret;
- int ca, ni, ns, al, varc, envc;
- uint stk, off;
- Ref r, r1, r2, reg[2], env;
- RAlloc *ra;
-
- env = R;
- ac = alloc((i1-i0) * sizeof ac[0]);
-
- if (!req(i1->arg[1], R)) {
- assert(rtype(i1->arg[1]) == RType);
- typclass(&aret, &typ[i1->arg[1].val]);
- ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
- } else
- ca = argsclass(i0, i1, ac, Oarg, 0, &env);
-
- for (stk=0, a=&ac[i1-i0]; a>ac;)
- if ((--a)->inmem) {
- if (a->align > 4)
- err("sysv abi requires alignments of 16 or less");
- stk += a->size;
- if (a->align == 4)
- stk += stk & 15;
- }
- stk += stk & 15;
- if (stk) {
- r = getcon(-(int64_t)stk, fn);
- emit(Osalloc, Kl, R, r, R);
- }
-
- if (!req(i1->arg[1], R)) {
- if (aret.inmem) {
- /* get the return location from eax
- * it saves one callee-save reg */
- r1 = newtmp("abi", Kl, fn);
- emit(Ocopy, Kl, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- if (aret.size > 8) {
- r = newtmp("abi", Kl, fn);
- aret.ref[1] = newtmp("abi", aret.cls[1], fn);
- emit(Ostorel, 0, R, aret.ref[1], r);
- emit(Oadd, Kl, r, i1->to, getcon(8, fn));
- }
- aret.ref[0] = newtmp("abi", aret.cls[0], fn);
- emit(Ostorel, 0, R, aret.ref[0], i1->to);
- ca += retr(reg, &aret);
- if (aret.size > 8)
- emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
- emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
- r1 = i1->to;
- }
- /* allocate return pad */
- ra = alloc(sizeof *ra);
- /* specific to NAlign == 3 */
- al = aret.align >= 2 ? aret.align - 2 : 0;
- ra->i = (Ins){Oalloc+al, r1, {getcon(aret.size, fn)}, Kl};
- ra->link = (*rap);
- *rap = ra;
- } else {
- ra = 0;
- if (KBASE(i1->cls) == 0) {
- emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
- ca += 1;
- } else {
- emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
- ca += 1 << 2;
- }
- }
- envc = !req(R, env);
- varc = i1->op == Ovacall;
- if (varc && envc)
- err("sysv abi does not support variadic env calls");
- ca |= (varc | envc) << 12;
- emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
- if (envc)
- emit(Ocopy, Kl, TMP(RAX), env, R);
- if (varc)
- emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
-
- ni = ns = 0;
- if (ra && aret.inmem)
- emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (a->inmem)
- continue;
- r1 = rarg(a->cls[0], &ni, &ns);
- if (i->op == Oargc) {
- if (a->size > 8) {
- r2 = rarg(a->cls[1], &ni, &ns);
- r = newtmp("abi", Kl, fn);
- emit(Oload, a->cls[1], r2, r, R);
- emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
- }
- emit(Oload, a->cls[0], r1, i->arg[1], R);
- } else
- emit(Ocopy, i->cls, r1, i->arg[0], R);
- }
-
- if (!stk)
- return;
-
- r = newtmp("abi", Kl, fn);
- for (i=i0, a=ac, off=0; i<i1; i++, a++) {
- if (!a->inmem)
- continue;
- if (i->op == Oargc) {
- if (a->align == 4)
- off += off & 15;
- blit(r, off, i->arg[1], a->size, fn);
- } else {
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, 0, R, i->arg[0], r1);
- emit(Oadd, Kl, r1, r, getcon(off, fn));
- }
- off += a->size;
- }
- emit(Osalloc, Kl, r, getcon(stk, fn), R);
-}
-
-static int
-selpar(Fn *fn, Ins *i0, Ins *i1)
-{
- AClass *ac, *a, aret;
- Ins *i;
- int ni, ns, s, al, fa;
- Ref r, env;
-
- env = R;
- ac = alloc((i1-i0) * sizeof ac[0]);
- curi = &insb[NIns];
- ni = ns = 0;
-
- if (fn->retty >= 0) {
- typclass(&aret, &typ[fn->retty]);
- fa = argsclass(i0, i1, ac, Opar, &aret, &env);
- } else
- fa = argsclass(i0, i1, ac, Opar, 0, &env);
-
- for (i=i0, a=ac; i<i1; i++, a++) {
- if (i->op != Oparc || a->inmem)
- continue;
- if (a->size > 8) {
- r = newtmp("abi", Kl, fn);
- a->ref[1] = newtmp("abi", Kl, fn);
- emit(Ostorel, 0, R, a->ref[1], r);
- emit(Oadd, Kl, r, i->to, getcon(8, fn));
- }
- a->ref[0] = newtmp("abi", Kl, fn);
- emit(Ostorel, 0, R, a->ref[0], i->to);
- /* specific to NAlign == 3 */
- al = a->align >= 2 ? a->align - 2 : 0;
- emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
- }
-
- if (fn->retty >= 0 && aret.inmem) {
- r = newtmp("abi", Kl, fn);
- emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
- fn->retr = r;
- }
-
- for (i=i0, a=ac, s=4; i<i1; i++, a++) {
- switch (a->inmem) {
- case 1:
- if (a->align > 4)
- err("sysv abi requires alignments of 16 or less");
- if (a->align == 4)
- s = (s+3) & -4;
- fn->tmp[i->to.val].slot = -s;
- s += a->size / 4;
- continue;
- case 2:
- emit(Oload, i->cls, i->to, SLOT(-s), R);
- s += 2;
- continue;
- }
- r = rarg(a->cls[0], &ni, &ns);
- if (i->op == Oparc) {
- emit(Ocopy, Kl, a->ref[0], r, R);
- if (a->size > 8) {
- r = rarg(a->cls[1], &ni, &ns);
- emit(Ocopy, Kl, a->ref[1], r, R);
- }
- } else
- emit(Ocopy, i->cls, i->to, r, R);
- }
-
- if (!req(R, env))
- emit(Ocopy, Kl, env, TMP(RAX), R);
-
- return fa | (s*4)<<12;
-}
-
-static Blk *
-split(Fn *fn, Blk *b)
-{
- Blk *bn;
-
- ++fn->nblk;
- bn = blknew();
- bn->nins = &insb[NIns] - curi;
- idup(&bn->ins, curi, bn->nins);
- curi = &insb[NIns];
- bn->visit = ++b->visit;
- snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
- bn->loop = b->loop;
- bn->link = b->link;
- b->link = bn;
- return bn;
-}
-
-static void
-chpred(Blk *b, Blk *bp, Blk *bp1)
-{
- Phi *p;
- uint a;
-
- for (p=b->phi; p; p=p->link) {
- for (a=0; p->blk[a]!=bp; a++)
- assert(a+1<p->narg);
- p->blk[a] = bp1;
- }
-}
-
-void
-selvaarg(Fn *fn, Blk *b, Ins *i)
-{
- Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
- Blk *b0, *bstk, *breg;
- int isint;
-
- c4 = getcon(4, fn);
- c8 = getcon(8, fn);
- c16 = getcon(16, fn);
- ap = i->arg[0];
- isint = KBASE(i->cls) == 0;
-
- /* @b [...]
- r0 =l add ap, (0 or 4)
- nr =l loadsw r0
- r1 =w cultw nr, (48 or 176)
- jnz r1, @breg, @bstk
- @breg
- r0 =l add ap, 16
- r1 =l loadl r0
- lreg =l add r1, nr
- r0 =w add nr, (8 or 16)
- r1 =l add ap, (0 or 4)
- storew r0, r1
- @bstk
- r0 =l add ap, 8
- lstk =l loadl r0
- r1 =l add lstk, 8
- storel r1, r0
- @b0
- %loc =l phi @breg %lreg, @bstk %lstk
- i->to =(i->cls) load %loc
- */
-
- loc = newtmp("abi", Kl, fn);
- emit(Oload, i->cls, i->to, loc, R);
- b0 = split(fn, b);
- b0->jmp = b->jmp;
- b0->s1 = b->s1;
- b0->s2 = b->s2;
- if (b->s1)
- chpred(b->s1, b, b0);
- if (b->s2 && b->s2 != b->s1)
- chpred(b->s2, b, b0);
-
- lreg = newtmp("abi", Kl, fn);
- nr = newtmp("abi", Kl, fn);
- r0 = newtmp("abi", Kw, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorew, Kw, R, r0, r1);
- emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
- emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Oadd, Kl, lreg, r1, nr);
- emit(Oload, Kl, r1, r0, R);
- emit(Oadd, Kl, r0, ap, c16);
- breg = split(fn, b);
- breg->jmp.type = Jjmp;
- breg->s1 = b0;
-
- lstk = newtmp("abi", Kl, fn);
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, Kw, R, r1, r0);
- emit(Oadd, Kl, r1, lstk, c8);
- emit(Oload, Kl, lstk, r0, R);
- emit(Oadd, Kl, r0, ap, c8);
- bstk = split(fn, b);
- bstk->jmp.type = Jjmp;
- bstk->s1 = b0;
-
- b0->phi = alloc(sizeof *b0->phi);
- *b0->phi = (Phi){
- .cls = Kl, .to = loc,
- .narg = 2,
- .blk = {bstk, breg},
- .arg = {lstk, lreg},
- };
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kw, fn);
- b->jmp.type = Jjnz;
- b->jmp.arg = r1;
- b->s1 = breg;
- b->s2 = bstk;
- c = getcon(isint ? 48 : 176, fn);
- emit(Ocmpw+ICult, Kw, r1, nr, c);
- emit(Oloadsw, Kl, nr, r0, R);
- emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
-}
-
-void
-selvastart(Fn *fn, int fa, Ref ap)
-{
- Ref r0, r1;
- int gp, fp, sp;
-
- gp = ((fa >> 4) & 15) * 8;
- fp = 48 + ((fa >> 8) & 15) * 16;
- sp = fa >> 12;
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, Kw, R, r1, r0);
- emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
- emit(Oadd, Kl, r0, ap, getcon(16, fn));
- r0 = newtmp("abi", Kl, fn);
- r1 = newtmp("abi", Kl, fn);
- emit(Ostorel, Kw, R, r1, r0);
- emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
- emit(Oadd, Kl, r0, ap, getcon(8, fn));
- r0 = newtmp("abi", Kl, fn);
- emit(Ostorew, Kw, R, getcon(fp, fn), r0);
- emit(Oadd, Kl, r0, ap, getcon(4, fn));
- emit(Ostorew, Kw, R, getcon(gp, fn), ap);
-}
-
-void
-abi(Fn *fn)
-{
- Blk *b;
- Ins *i, *i0, *ip;
- RAlloc *ral;
- int n, fa;
-
- for (b=fn->start; b; b=b->link)
- b->visit = 0;
-
- /* lower parameters */
- for (b=fn->start, i=b->ins; i-b->ins<b->nins; i++)
- if (!ispar(i->op))
- break;
- fa = selpar(fn, b->ins, i);
- n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
- i0 = alloc(n * sizeof(Ins));
- ip = icpy(ip = i0, curi, &insb[NIns] - curi);
- ip = icpy(ip, i, &b->ins[b->nins] - i);
- b->nins = n;
- b->ins = i0;
-
- /* lower calls, returns, and vararg instructions */
- ral = 0;
- b = fn->start;
- do {
- if (!(b = b->link))
- b = fn->start; /* do it last */
- if (b->visit)
- continue;
- curi = &insb[NIns];
- selret(b, fn);
- for (i=&b->ins[b->nins]; i!=b->ins;)
- switch ((--i)->op) {
- default:
- emiti(*i);
- break;
- case Ocall:
- case Ovacall:
- for (i0=i; i0>b->ins; i0--)
- if (!isarg((i0-1)->op))
- break;
- selcall(fn, i0, i, &ral);
- i = i0;
- break;
- case Ovastart:
- selvastart(fn, fa, i->arg[0]);
- break;
- case Ovaarg:
- selvaarg(fn, b, i);
- break;
- case Oarg:
- case Oargc:
- die("unreachable");
- }
- if (b == fn->start)
- for (; ral; ral=ral->link)
- emiti(ral->i);
- b->nins = &insb[NIns] - curi;
- idup(&b->ins, curi, b->nins);
- } while (b != fn->start);
-
- if (debug['A']) {
- fprintf(stderr, "\n> After ABI lowering:\n");
- printfn(fn, stderr);
- }
-}
diff --git a/util.c b/util.c
@@ -87,6 +87,36 @@ freeall()
nptr = 1;
}
+int
+iscmp(int op, int *pk, int *pc)
+{
+ if (Ocmpw <= op && op <= Ocmpw1) {
+ *pc = op - Ocmpw;
+ *pk = Kw;
+ }
+ else if (Ocmpl <= op && op <= Ocmpl1) {
+ *pc = op - Ocmpl;
+ *pk = Kl;
+ }
+ else if (Ocmps <= op && op <= Ocmps1) {
+ *pc = NCmpI + op - Ocmps;
+ *pk = Ks;
+ }
+ else if (Ocmpd <= op && op <= Ocmpd1) {
+ *pc = NCmpI + op - Ocmpd;
+ *pk = Kd;
+ }
+ else
+ return 0;
+ return 1;
+}
+
+int
+argcls(Ins *i, int n)
+{
+ return optab[i->op].argcls[n][i->cls];
+}
+
void
emit(int op, int k, Ref to, Ref arg0, Ref arg1)
{
@@ -165,6 +195,42 @@ vgrow(void *vp, ulong len)
*(Vec **)vp = v1;
}
+static int cmptab[][2] ={
+ /* negation swap */
+ [Ciule] = {Ciugt, Ciuge},
+ [Ciult] = {Ciuge, Ciugt},
+ [Ciugt] = {Ciule, Ciult},
+ [Ciuge] = {Ciult, Ciule},
+ [Cisle] = {Cisgt, Cisge},
+ [Cislt] = {Cisge, Cisgt},
+ [Cisgt] = {Cisle, Cislt},
+ [Cisge] = {Cislt, Cisle},
+ [Cieq] = {Cine, Cieq},
+ [Cine] = {Cieq, Cine},
+ [NCmpI+Cfle] = {NCmpI+Cfgt, NCmpI+Cfge},
+ [NCmpI+Cflt] = {NCmpI+Cfge, NCmpI+Cfgt},
+ [NCmpI+Cfgt] = {NCmpI+Cfle, NCmpI+Cflt},
+ [NCmpI+Cfge] = {NCmpI+Cflt, NCmpI+Cfle},
+ [NCmpI+Cfeq] = {NCmpI+Cfne, NCmpI+Cfeq},
+ [NCmpI+Cfne] = {NCmpI+Cfeq, NCmpI+Cfne},
+ [NCmpI+Cfo] = {NCmpI+Cfuo, NCmpI+Cfo},
+ [NCmpI+Cfuo] = {NCmpI+Cfo, NCmpI+Cfuo},
+};
+
+int
+cmpneg(int c)
+{
+ assert(0 <= c && c < NCmp);
+ return cmptab[c][0];
+}
+
+int
+cmpop(int c)
+{
+ assert(0 <= c && c < NCmp);
+ return cmptab[c][1];
+}
+
int
clsmerge(short *pk, short k)
{
@@ -257,6 +323,30 @@ addcon(Con *c0, Con *c1)
}
void
+blit(Ref rdst, uint doff, Ref rsrc, uint sz, Fn *fn)
+{
+ struct { int st, ld, cls, size; } *p, tbl[] = {
+ { Ostorel, Oload, Kl, 8 },
+ { Ostorew, Oload, Kw, 8 },
+ { Ostoreh, Oloaduh, Kw, 2 },
+ { Ostoreb, Oloadub, Kw, 1 }
+ };
+ Ref r, r1;
+ uint boff, s;
+
+ for (boff=0, p=tbl; sz; p++)
+ for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) {
+ r = newtmp("blt", Kl, fn);
+ r1 = newtmp("blt", Kl, fn);
+ emit(p->st, 0, R, r, r1);
+ emit(Oadd, Kl, r1, rdst, getcon(doff, fn));
+ r1 = newtmp("blt", Kl, fn);
+ emit(p->ld, p->cls, r, r1, R);
+ emit(Oadd, Kl, r1, rsrc, getcon(boff, fn));
+ }
+}
+
+void
bsinit(BSet *bs, uint n)
{
n = (n + NBit-1) / NBit;