commit 4e93eeaa3b63b6ae50954a29662cc3ea6be48b23
parent 8e040d58615e49a63fb50dda5dc695e96a54a7bc
Author: Michael Forney <mforney@mforney.org>
Date: Sat, 12 Feb 2022 02:27:50 -0800
add rv64 backend
It is mostly complete, but still has a few ABI bugs when passing
floats in structs, or when structs are passed partly in register,
and partly on stack.
Diffstat:
M | Makefile | | | 14 | ++++++++++++-- |
M | all.h | | | 2 | +- |
M | doc/il.txt | | | 1 | + |
A | doc/rv64.txt | | | 20 | ++++++++++++++++++++ |
M | main.c | | | 2 | ++ |
M | ops.h | | | 266 | +++++++++++++++++++++++++++++++++++++++++-------------------------------------- |
A | rv64/abi.c | | | 584 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | rv64/all.h | | | 49 | +++++++++++++++++++++++++++++++++++++++++++++++++ |
A | rv64/emit.c | | | 499 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | rv64/isel.c | | | 278 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | rv64/targ.c | | | 53 | +++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | test/dark.ssa | | | 2 | +- |
M | tools/test.sh | | | 24 | ++++++++++++++++++++++++ |
13 files changed, 1661 insertions(+), 133 deletions(-)
diff --git a/Makefile b/Makefile
@@ -7,11 +7,13 @@ SRC = main.c util.c parse.c cfg.c mem.c ssa.c alias.c load.c copy.c \
fold.c live.c spill.c rega.c gas.c
AMD64SRC = amd64/targ.c amd64/sysv.c amd64/isel.c amd64/emit.c
ARM64SRC = arm64/targ.c arm64/abi.c arm64/isel.c arm64/emit.c
-SRCALL = $(SRC) $(AMD64SRC) $(ARM64SRC)
+RV64SRC = rv64/targ.c rv64/abi.c rv64/isel.c rv64/emit.c
+SRCALL = $(SRC) $(AMD64SRC) $(ARM64SRC) $(RV64SRC)
AMD64OBJ = $(AMD64SRC:%.c=$(OBJDIR)/%.o)
ARM64OBJ = $(ARM64SRC:%.c=$(OBJDIR)/%.o)
-OBJ = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ) $(ARM64OBJ)
+RV64OBJ = $(RV64SRC:%.c=$(OBJDIR)/%.o)
+OBJ = $(SRC:%.c=$(OBJDIR)/%.o) $(AMD64OBJ) $(ARM64OBJ) $(RV64OBJ)
CFLAGS += -Wall -Wextra -std=c99 -g -pedantic
@@ -27,11 +29,13 @@ $(OBJDIR)/timestamp:
@mkdir -p $(OBJDIR)
@mkdir -p $(OBJDIR)/amd64
@mkdir -p $(OBJDIR)/arm64
+ @mkdir -p $(OBJDIR)/rv64
@touch $@
$(OBJ): all.h ops.h
$(AMD64OBJ): amd64/all.h
$(ARM64OBJ): arm64/all.h
+$(RV64OBJ): rv64/all.h
$(OBJDIR)/main.o: config.h
config.h:
@@ -46,6 +50,9 @@ config.h:
*aarch64*) \
echo "#define Deftgt T_arm64"; \
;; \
+ *riscv64*) \
+ echo "#define Deftgt T_rv64"; \
+ ;; \
*) \
echo "#define Deftgt T_amd64_sysv";\
;; \
@@ -72,6 +79,9 @@ check: $(OBJDIR)/$(BIN)
check-arm64: $(OBJDIR)/$(BIN)
TARGET=arm64 tools/test.sh all
+check-rv64: $(OBJDIR)/$(BIN)
+ TARGET=rv64 tools/test.sh all
+
src:
@echo $(SRCALL)
diff --git a/all.h b/all.h
@@ -179,7 +179,7 @@ enum {
#define isarg(o) INRANGE(o, Oarg, Oargv)
#define isret(j) INRANGE(j, Jret0, Jretc)
-enum Class {
+enum {
Kx = -1, /* "top" class (see usecheck() and clsmerge()) */
Kw,
Kl,
diff --git a/doc/il.txt b/doc/il.txt
@@ -856,6 +856,7 @@ alignment required by all the targets.
type :valist = align 8 { 24 } # For amd64_sysv
type :valist = align 8 { 32 } # For arm64
+ type :valist = align 8 { 8 } # For rv64
The following example defines a variadic function adding
its first three arguments.
diff --git a/doc/rv64.txt b/doc/rv64.txt
@@ -0,0 +1,20 @@
+=========
+RISC-V 64
+=========
+
+- Known issues
+--------------
+
+ABI with structs containing floats is not yet supported.
+
+- Possible improvements
+-----------------------
+
+rv64_isel() could turn compare used only with jnz into b{lt,ge}[u].
+
+- Helpful links
+---------------
+
+RISC-V spec: https://github.com/riscv/riscv-isa-manual/releases/latest/download/riscv-spec.pdf
+ASM manual: https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
+psABI: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc
diff --git a/main.c b/main.c
@@ -7,6 +7,7 @@ Target T;
extern Target T_amd64_sysv;
extern Target T_arm64;
+extern Target T_rv64;
static struct TMap {
char *name;
@@ -14,6 +15,7 @@ static struct TMap {
} tmap[] = {
{ "amd64_sysv", &T_amd64_sysv },
{ "arm64", &T_arm64 },
+ { "rv64", &T_rv64 },
{ 0, 0 }
};
diff --git a/ops.h b/ops.h
@@ -2,6 +2,11 @@
#define X(NMemArgs, SetsZeroFlag, LeavesFlags)
#endif
+#ifndef V /* riscv64 */
+ #define V(Imm)
+#endif
+
+
#define T(a,b,c,d,e,f,g,h) { \
{[Kw]=K##a, [Kl]=K##b, [Ks]=K##c, [Kd]=K##d}, \
{[Kw]=K##e, [Kl]=K##f, [Ks]=K##g, [Kd]=K##h} \
@@ -13,108 +18,108 @@
/*********************/
/* Arithmetic and Bits */
-O(add, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
-O(sub, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0)
-O(neg, T(w,l,s,d, x,x,x,x), 1) X(1, 1, 0)
-O(div, T(w,l,s,d, w,l,s,d), 1) X(0, 0, 0)
-O(rem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
-O(udiv, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
-O(urem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0)
-O(mul, T(w,l,s,d, w,l,s,d), 1) X(2, 0, 0)
-O(and, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
-O(or, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
-O(xor, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0)
-O(sar, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
-O(shr, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
-O(shl, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0)
+O(add, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0) V(1)
+O(sub, T(w,l,s,d, w,l,s,d), 1) X(2, 1, 0) V(0)
+O(neg, T(w,l,s,d, x,x,x,x), 1) X(1, 1, 0) V(0)
+O(div, T(w,l,s,d, w,l,s,d), 1) X(0, 0, 0) V(0)
+O(rem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0) V(0)
+O(udiv, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0) V(0)
+O(urem, T(w,l,e,e, w,l,e,e), 1) X(0, 0, 0) V(0)
+O(mul, T(w,l,s,d, w,l,s,d), 1) X(2, 0, 0) V(0)
+O(and, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0) V(1)
+O(or, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0) V(1)
+O(xor, T(w,l,e,e, w,l,e,e), 1) X(2, 1, 0) V(1)
+O(sar, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0) V(1)
+O(shr, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0) V(1)
+O(shl, T(w,l,e,e, w,w,e,e), 1) X(1, 1, 0) V(1)
/* Comparisons */
-O(ceqw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(cnew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(csgew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(csgtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(cslew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(csltw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(cugew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(cugtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(culew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-O(cultw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0)
-
-O(ceql, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(cnel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(csgel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(csgtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(cslel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(csltl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(cugel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(cugtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(culel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-O(cultl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0)
-
-O(ceqs, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-O(cges, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-O(cgts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-O(cles, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-O(clts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-O(cnes, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-O(cos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-O(cuos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0)
-
-O(ceqd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
-O(cged, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
-O(cgtd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
-O(cled, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
-O(cltd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
-O(cned, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
-O(cod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
-O(cuod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0)
+O(ceqw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(cnew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(csgew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(csgtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(cslew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(csltw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(1)
+O(cugew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(cugtw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(culew, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(0)
+O(cultw, T(w,w,e,e, w,w,e,e), 1) X(0, 1, 0) V(1)
+
+O(ceql, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(cnel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(csgel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(csgtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(cslel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(csltl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(1)
+O(cugel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(cugtl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(culel, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(0)
+O(cultl, T(l,l,e,e, l,l,e,e), 1) X(0, 1, 0) V(1)
+
+O(ceqs, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+O(cges, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+O(cgts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+O(cles, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+O(clts, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+O(cnes, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+O(cos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+O(cuos, T(s,s,e,e, s,s,e,e), 1) X(0, 1, 0) V(0)
+
+O(ceqd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
+O(cged, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
+O(cgtd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
+O(cled, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
+O(cltd, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
+O(cned, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
+O(cod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
+O(cuod, T(d,d,e,e, d,d,e,e), 1) X(0, 1, 0) V(0)
/* Memory */
-O(storeb, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
-O(storeh, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
-O(storew, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1)
-O(storel, T(l,e,e,e, m,e,e,e), 0) X(0, 0, 1)
-O(stores, T(s,e,e,e, m,e,e,e), 0) X(0, 0, 1)
-O(stored, T(d,e,e,e, m,e,e,e), 0) X(0, 0, 1)
-
-O(loadsb, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(loadub, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(loadsh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(loaduh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(loadsw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(loaduw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(load, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 1)
+O(storeb, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1) V(0)
+O(storeh, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1) V(0)
+O(storew, T(w,e,e,e, m,e,e,e), 0) X(0, 0, 1) V(0)
+O(storel, T(l,e,e,e, m,e,e,e), 0) X(0, 0, 1) V(0)
+O(stores, T(s,e,e,e, m,e,e,e), 0) X(0, 0, 1) V(0)
+O(stored, T(d,e,e,e, m,e,e,e), 0) X(0, 0, 1) V(0)
+
+O(loadsb, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(loadub, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(loadsh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(loaduh, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(loadsw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(loaduw, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(load, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 1) V(0)
/* Extensions and Truncations */
-O(extsb, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(extub, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(extsh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(extuh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(extsw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
-O(extuw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1)
-
-O(exts, T(e,e,e,s, e,e,e,x), 1) X(0, 0, 1)
-O(truncd, T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
-O(stosi, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(stoui, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(dtosi, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(dtoui, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
-O(swtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
-O(uwtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
-O(sltof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
-O(ultof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
-O(cast, T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1)
+O(extsb, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(extub, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(extsh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(extuh, T(w,w,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(extsw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1) V(0)
+O(extuw, T(e,w,e,e, e,x,e,e), 1) X(0, 0, 1) V(0)
+
+O(exts, T(e,e,e,s, e,e,e,x), 1) X(0, 0, 1) V(0)
+O(truncd, T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1) V(0)
+O(stosi, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(stoui, T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(dtosi, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(dtoui, T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1) V(0)
+O(swtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1) V(0)
+O(uwtof, T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1) V(0)
+O(sltof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1) V(0)
+O(ultof, T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1) V(0)
+O(cast, T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1) V(0)
/* Stack Allocation */
-O(alloc4, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
-O(alloc8, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
-O(alloc16, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
+O(alloc4, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
+O(alloc8, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
+O(alloc16, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
/* Variadic Function Helpers */
-O(vaarg, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
-O(vastart, T(m,e,e,e, x,e,e,e), 0) X(0, 0, 0)
+O(vaarg, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0) V(0)
+O(vastart, T(m,e,e,e, x,e,e,e), 0) X(0, 0, 0) V(0)
-O(copy, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1)
+O(copy, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1) V(0)
/****************************************/
@@ -122,52 +127,55 @@ O(copy, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 1)
/****************************************/
/* Miscellaneous and Architecture-Specific Operations */
-O(nop, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1)
-O(addr, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(swap, T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0)
-O(sign, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0)
-O(salloc, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
-O(xidiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
-O(xdiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0)
-O(xcmp, T(w,l,s,d, w,l,s,d), 0) X(1, 1, 0)
-O(xtest, T(w,l,e,e, w,l,e,e), 0) X(1, 1, 0)
-O(acmp, T(w,l,e,e, w,l,e,e), 0) X(0, 0, 0)
-O(acmn, T(w,l,e,e, w,l,e,e), 0) X(0, 0, 0)
-O(afcmp, T(e,e,s,d, e,e,s,d), 0) X(0, 0, 0)
+O(nop, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 1) V(0)
+O(addr, T(m,m,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(swap, T(w,l,s,d, w,l,s,d), 0) X(1, 0, 0) V(0)
+O(sign, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0)
+O(salloc, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
+O(xidiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0) V(0)
+O(xdiv, T(w,l,e,e, x,x,e,e), 0) X(1, 0, 0) V(0)
+O(xcmp, T(w,l,s,d, w,l,s,d), 0) X(1, 1, 0) V(0)
+O(xtest, T(w,l,e,e, w,l,e,e), 0) X(1, 1, 0) V(0)
+O(acmp, T(w,l,e,e, w,l,e,e), 0) X(0, 0, 0) V(0)
+O(acmn, T(w,l,e,e, w,l,e,e), 0) X(0, 0, 0) V(0)
+O(afcmp, T(e,e,s,d, e,e,s,d), 0) X(0, 0, 0) V(0)
+O(reqz, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0)
+O(rnez, T(w,l,e,e, x,x,e,e), 0) X(0, 0, 0) V(0)
/* Arguments, Parameters, and Calls */
-O(par, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
-O(parc, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0)
-O(pare, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0)
-O(arg, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0)
-O(argc, T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0)
-O(arge, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0)
-O(argv, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0)
-O(call, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0)
+O(par, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
+O(parc, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
+O(pare, T(e,x,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
+O(arg, T(w,l,s,d, x,x,x,x), 0) X(0, 0, 0) V(0)
+O(argc, T(e,x,e,e, e,l,e,e), 0) X(0, 0, 0) V(0)
+O(arge, T(e,l,e,e, e,x,e,e), 0) X(0, 0, 0) V(0)
+O(argv, T(x,x,x,x, x,x,x,x), 0) X(0, 0, 0) V(0)
+O(call, T(m,m,m,m, x,x,x,x), 0) X(0, 0, 0) V(0)
/* Flags Setting */
-O(flagieq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagine, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagisge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagisgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagisle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagislt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagiuge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagiugt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagiule, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagiult, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagfeq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagfge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagfgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagfle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagflt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagfne, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagfo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
-O(flagfuo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1)
+O(flagieq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagine, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagisge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagisgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagisle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagislt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagiuge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagiugt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagiule, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagiult, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagfeq, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagfge, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagfgt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagfle, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagflt, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagfne, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagfo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
+O(flagfuo, T(x,x,e,e, x,x,e,e), 0) X(0, 0, 1) V(0)
#undef T
#undef X
+#undef V
#undef O
/*
diff --git a/rv64/abi.c b/rv64/abi.c
@@ -0,0 +1,584 @@
+#include "all.h"
+
+typedef struct Class Class;
+typedef struct Insl Insl;
+typedef struct Params Params;
+
+enum {
+ Cptr = 1, /* replaced by a pointer */
+ Cstk1 = 2, /* pass first XLEN on the stack */
+ Cstk2 = 4, /* pass second XLEN on the stack */
+ Cstk = Cstk1 | Cstk2,
+ Cfpint = 8, /* float passed like integer */
+};
+
+struct Class {
+ char class;
+ uint size;
+ Typ *t;
+ uchar nreg;
+ uchar ngp;
+ uchar nfp;
+ int reg[2];
+ int cls[2];
+};
+
+struct Insl {
+ Ins i;
+ Insl *link;
+};
+
+struct Params {
+ int ngp;
+ int nfp;
+ int stk; /* stack offset for varargs */
+};
+
+static int gpreg[] = { A0, A1, A2, A3, A4, A5, A6, A7};
+static int fpreg[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
+
+/* layout of call's second argument (RCall)
+ *
+ * 29 8 4 2 0
+ * |0.00|xxxx|xxxx|xx|xx| range
+ * | | | ` gp regs returned (0..2)
+ * | | ` fp regs returned (0..2)
+ * | ` gp regs passed (0..8)
+ * ` fp regs passed (0..8)
+ */
+
+bits
+rv64_retregs(Ref r, int p[2])
+{
+ bits b;
+ int ngp, nfp;
+
+ assert(rtype(r) == RCall);
+ ngp = r.val & 3;
+ nfp = (r.val >> 2) & 3;
+ if (p) {
+ p[0] = ngp;
+ p[1] = nfp;
+ }
+ b = 0;
+ while (ngp--)
+ b |= BIT(A0+ngp);
+ while (nfp--)
+ b |= BIT(FA0+nfp);
+ return b;
+}
+
+bits
+rv64_argregs(Ref r, int p[2])
+{
+ bits b;
+ int ngp, nfp;
+
+ assert(rtype(r) == RCall);
+ ngp = (r.val >> 4) & 15;
+ nfp = (r.val >> 8) & 15;
+ b = 0;
+ if (p) {
+ p[0] = ngp;
+ p[1] = nfp;
+ }
+ b = 0;
+ while (ngp--)
+ b |= BIT(A0+ngp);
+ while (nfp--)
+ b |= BIT(FA0+nfp);
+ return b;
+}
+
+static void
+typclass(Class *c, Typ *t, int *gp, int *fp)
+{
+ uint64_t sz;
+ uint n;
+
+ sz = (t->size + 7) & ~7;
+ c->t = t;
+ c->class = 0;
+ c->ngp = 0;
+ c->nfp = 0;
+
+ if (t->align > 4)
+ err("alignments larger than 16 are not supported");
+
+ if (t->dark || sz > 16 || sz == 0) {
+ /* large structs are replaced by a
+ * pointer to some caller-allocated
+ * memory */
+ c->class |= Cptr;
+ c->size = 8;
+ return;
+ }
+
+ c->size = sz;
+
+ /* TODO: float */
+
+ for (n=0; n<sz/8; n++, c->ngp++) {
+ c->reg[n] = *gp++;
+ c->cls[n] = Kl;
+ }
+
+ c->nreg = n;
+}
+
+static void
+sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
+{
+ static int st[] = {
+ [Kw] = Ostorew, [Kl] = Ostorel,
+ [Ks] = Ostores, [Kd] = Ostored
+ };
+ uint n;
+ uint64_t off;
+ Ref r;
+
+ assert(nreg <= 4);
+ off = 0;
+ for (n=0; n<nreg; n++) {
+ tmp[n] = newtmp("abi", cls[n], fn);
+ r = newtmp("abi", Kl, fn);
+ emit(st[cls[n]], 0, R, tmp[n], r);
+ emit(Oadd, Kl, r, mem, getcon(off, fn));
+ off += KWIDE(cls[n]) ? 8 : 4;
+ }
+}
+
+static void
+ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
+{
+ int i;
+ uint64_t off;
+ Ref r;
+
+ off = 0;
+ for (i=0; i<n; i++) {
+ r = newtmp("abi", Kl, fn);
+ emit(Oload, cls[i], TMP(reg[i]), r, R);
+ emit(Oadd, Kl, r, mem, getcon(off, fn));
+ off += KWIDE(cls[i]) ? 8 : 4;
+ }
+}
+
+static void
+selret(Blk *b, Fn *fn)
+{
+ int j, k, cty;
+ Ref r;
+ Class cr;
+
+ j = b->jmp.type;
+
+ if (!isret(j) || j == Jret0)
+ return;
+
+ r = b->jmp.arg;
+ b->jmp.type = Jret0;
+
+ if (j == Jretc) {
+ typclass(&cr, &typ[fn->retty], gpreg, fpreg);
+ cty = (cr.nfp << 2) | cr.ngp;
+ if (cr.class & Cptr) {
+ assert(rtype(fn->retr) == RTmp);
+ blit(fn->retr, 0, r, cr.t->size, fn);
+ } else {
+ ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
+ }
+ } else {
+ k = j - Jretw;
+ if (KBASE(k) == 0) {
+ emit(Ocopy, k, TMP(A0), r, R);
+ cty = 1;
+ } else {
+ emit(Ocopy, k, TMP(FA0), r, R);
+ cty = 1 << 2;
+ }
+ }
+
+ b->jmp.arg = CALL(cty);
+}
+
+static int
+argsclass(Ins *i0, Ins *i1, Class *carg, Ref *env, int retptr)
+{
+ int ngp, nfp, *gp, *fp, vararg;
+ Class *c;
+ Ins *i;
+
+ gp = gpreg;
+ fp = fpreg;
+ ngp = 8;
+ nfp = 8;
+ vararg = 0;
+ if (retptr) {
+ gp++;
+ ngp--;
+ }
+ for (i=i0, c=carg; i<i1; i++, c++) {
+ switch (i->op) {
+ case Opar:
+ case Oarg:
+ c->cls[0] = i->cls;
+ c->size = 8;
+ /* variadic float args are passed in int regs */
+ if (!vararg && KBASE(i->cls) == 1 && nfp > 0) {
+ nfp--;
+ c->reg[0] = *fp++;
+ } else if (ngp > 0) {
+ if (KBASE(i->cls) == 1)
+ c->class |= Cfpint;
+ ngp--;
+ c->reg[0] = *gp++;
+ } else {
+ c->class |= Cstk1;
+ }
+ break;
+ case Oargv:
+ /* subsequent arguments are variadic */
+ vararg = 1;
+ break;
+ case Oparc:
+ case Oargc:
+ typclass(c, &typ[i->arg[0].val], gp, fp);
+ if (c->class & Cptr) {
+ c->ngp = 1;
+ c->reg[0] = *gp;
+ c->cls[0] = Kl;
+ }
+ if (c->ngp <= ngp && c->nfp <= nfp) {
+ ngp -= c->ngp;
+ nfp -= c->nfp;
+ gp += c->ngp;
+ fp += c->nfp;
+ break;
+ }
+ c->ngp += c->nfp;
+ c->nfp = 0;
+ if (c->ngp <= ngp) {
+ ngp -= c->ngp;
+ gp += c->ngp;
+ break;
+ }
+ c->class |= Cstk1;
+ if (c->ngp - 1 > ngp)
+ c->class |= Cstk2;
+ break;
+ case Opare:
+ *env = i->to;
+ break;
+ case Oarge:
+ *env = i->arg[0];
+ break;
+ }
+ }
+ return (gp-gpreg) << 4 | (fp-fpreg) << 8;
+}
+
+static void
+stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
+{
+ Insl *il;
+ int al;
+ uint64_t sz;
+
+ il = alloc(sizeof *il);
+ al = c->t->align - 2; /* NAlign == 3 */
+ if (al < 0)
+ al = 0;
+ sz = c->class & Cptr ? c->t->size : c->size;
+ il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
+ il->link = *ilp;
+ *ilp = il;
+}
+
+static void
+selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
+{
+ Ins *i;
+ Class *ca, *c, cr;
+ int k, cty, envc;
+ uint n;
+ uint64_t stk, off;
+ Ref r, r1, env, tmp[2];
+
+ env = R;
+ ca = alloc((i1-i0) * sizeof ca[0]);
+ cr.class = 0;
+
+ if (!req(i1->arg[1], R))
+ typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
+
+ cty = argsclass(i0, i1, ca, &env, cr.class & Cptr);
+ stk = 0;
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if (i->op == Oargv)
+ continue;
+ if (c->class & Cptr) {
+ i->arg[0] = newtmp("abi", Kl, fn);
+ stkblob(i->arg[0], c, fn, ilp);
+ i->op = Oarg;
+ }
+ if (c->class & Cstk1)
+ stk += 8;
+ if (c->class & Cstk2)
+ stk += 8;
+ }
+ if (stk)
+ emit(Osalloc, Kl, R, getcon(-stk, fn), R);
+
+ if (!req(i1->arg[1], R)) {
+ stkblob(i1->to, &cr, fn, ilp);
+ cty |= (cr.nfp << 2) | cr.ngp;
+ if (cr.class & Cptr) {
+ cty |= 1;
+ emit(Ocopy, Kw, R, TMP(A0), R);
+ } else {
+ sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
+ for (n=0; n<cr.nreg; n++) {
+ r = TMP(cr.reg[n]);
+ emit(Ocopy, cr.cls[n], tmp[n], r, R);
+ }
+ }
+ } else if (KBASE(i1->cls) == 0) {
+ emit(Ocopy, i1->cls, i1->to, TMP(A0), R);
+ cty |= 1;
+ } else {
+ emit(Ocopy, i1->cls, i1->to, TMP(FA0), R);
+ cty |= 1 << 2;
+ }
+
+ envc = !req(R, env);
+ if (envc)
+ die("todo (rv64 abi): env calls");
+ emit(Ocall, 0, R, i1->arg[0], CALL(cty));
+
+ if (cr.class & Cptr)
+ /* struct return argument */
+ emit(Ocopy, Kl, TMP(A0), i1->to, R);
+
+ /* move arguments into registers */
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if (i->op == Oargv || c->class & Cstk1)
+ continue;
+ if (i->op == Oargc) {
+ ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
+ } else if (c->class & Cfpint) {
+ k = KWIDE(*c->cls) ? Kl : Kw;
+ r = newtmp("abi", k, fn);
+ emit(Ocopy, k, TMP(c->reg[0]), r, R);
+ c->reg[0] = r.val;
+ } else {
+ emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
+ }
+ }
+
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if (c->class & Cfpint)
+ emit(Ocast, KWIDE(*c->cls) ? Kl : Kw, TMP(*c->reg), i->arg[0], R);
+ if (c->class & Cptr)
+ blit(i->arg[0], 0, i->arg[1], c->t->size, fn);
+ }
+
+ if (!stk)
+ return;
+
+ r = newtmp("abi", Kl, fn);
+ for (i=i0, c=ca, off=0; i<i1; i++, c++) {
+ if (i->op == Oargv || (c->class & Cstk) == 0)
+ continue;
+ if (i->op != Oargc) {
+ r1 = newtmp("abi", Kl, fn);
+ /* w arguments are stored sign-extended
+ * to 64-bits
+ *
+ * s arguments can just be stored with
+ * Ostores into the first 32-bits in the
+ * stack position since the ABI says the
+ * upper bits are undefined
+ */
+ emit(i->cls == Kw ? Ostorel : Ostorew+i->cls, 0, R, i->arg[0], r1);
+ if (i->cls == Kw) {
+ /* TODO: we only need this sign extension
+ * for subtyped l temporaries passed as w
+ * arguments (see rv64/isel.c:fixarg)
+ *
+ * however, we cannot just fix it in isel
+ * since by that point we have forgotten
+ * the original argument type
+ */
+ curi->arg[0] = newtmp("abi", Kl, fn);
+ emit(Oextsw, Kl, curi->arg[0], i->arg[0], R);
+ }
+ emit(Oadd, Kl, r1, r, getcon(off, fn));
+ } else
+ blit(r, off, i->arg[1], c->t->size, fn);
+ off += c->size;
+ }
+ emit(Osalloc, Kl, r, getcon(stk, fn), R);
+}
+
+static Params
+selpar(Fn *fn, Ins *i0, Ins *i1)
+{
+ Class *ca, *c, cr;
+ Insl *il;
+ Ins *i;
+ int n, s, cty;
+ Ref r, env, tmp[16], *t;
+
+ env = R;
+ ca = alloc((i1-i0) * sizeof ca[0]);
+ cr.class = 0;
+ curi = &insb[NIns];
+
+ if (fn->retty >= 0) {
+ typclass(&cr, &typ[fn->retty], gpreg, fpreg);
+ if (cr.class & Cptr) {
+ fn->retr = newtmp("abi", Kl, fn);
+ emit(Ocopy, Kl, fn->retr, TMP(A0), R);
+ }
+ }
+
+ cty = argsclass(i0, i1, ca, &env, cr.class & Cptr);
+ fn->reg = rv64_argregs(CALL(cty), 0);
+
+ il = 0;
+ t = tmp;
+ for (i=i0, c=ca; i<i1; i++, c++) {
+ if (i->op != Oparc || (c->class & (Cptr|Cstk)))
+ continue;
+ sttmps(t, c->cls, c->nreg, i->to, fn);
+ stkblob(i->to, c, fn, &il);
+ t += c->nreg;
+ }
+ for (; il; il=il->link)
+ emiti(il->i);
+
+ t = tmp;
+ for (i=i0, c=ca, s=2 + 8 * fn->vararg; i<i1; i++, c++) {
+ if (i->op == Oparc
+ && (c->class & Cptr) == 0) {
+ if (c->class & Cstk) {
+ fn->tmp[i->to.val].slot = -s;
+ s += c->size / 8;
+ } else {
+ for (n=0; n<c->nreg; n++) {
+ r = TMP(c->reg[n]);
+ emit(Ocopy, c->cls[n], *t++, r, R);
+ }
+ }
+ } else if (c->class & Cstk1) {
+ emit(Oload, c->cls[0], i->to, SLOT(-s), R);
+ s++;
+ } else {
+ emit(Ocopy, c->cls[0], i->to, TMP(c->reg[0]), R);
+ }
+ }
+
+ if (!req(R, env))
+ die("todo (rv64 abi): env calls");
+
+ return (Params){
+ .stk = s,
+ .ngp = (cty >> 4) & 15,
+ .nfp = (cty >> 8) & 15,
+ };
+}
+
+static void
+selvaarg(Fn *fn, Ins *i)
+{
+ Ref loc, newloc;
+
+ loc = newtmp("abi", Kl, fn);
+ newloc = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, newloc, i->arg[0]);
+ emit(Oadd, Kl, newloc, loc, getcon(8, fn));
+ emit(Oload, i->cls, i->to, loc, R);
+ emit(Oload, Kl, loc, i->arg[0], R);
+}
+
+static void
+selvastart(Fn *fn, Params p, Ref ap)
+{
+ Ref rsave;
+ int s;
+
+ rsave = newtmp("abi", Kl, fn);
+ emit(Ostorel, Kw, R, rsave, ap);
+ s = p.stk > 2 + 8 * fn->vararg ? p.stk : 2 + p.ngp;
+ emit(Oaddr, Kl, rsave, SLOT(-s), R);
+}
+
+void
+rv64_abi(Fn *fn)
+{
+ Blk *b;
+ Ins *i, *i0, *ip;
+ Insl *il;
+ int n;
+ Params p;
+
+ for (b=fn->start; b; b=b->link)
+ b->visit = 0;
+
+ /* lower parameters */
+ for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
+ if (!ispar(i->op))
+ break;
+ p = selpar(fn, b->ins, i);
+ n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
+ i0 = alloc(n * sizeof(Ins));
+ ip = icpy(ip = i0, curi, &insb[NIns] - curi);
+ ip = icpy(ip, i, &b->ins[b->nins] - i);
+ b->nins = n;
+ b->ins = i0;
+
+ /* lower calls, returns, and vararg instructions */
+ il = 0;
+ b = fn->start;
+ do {
+ if (!(b = b->link))
+ b = fn->start; /* do it last */
+ if (b->visit)
+ continue;
+ curi = &insb[NIns];
+ selret(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;)
+ switch ((--i)->op) {
+ default:
+ emiti(*i);
+ break;
+ case Ocall:
+ for (i0=i; i0>b->ins; i0--)
+ if (!isarg((i0-1)->op))
+ break;
+ selcall(fn, i0, i, &il);
+ i = i0;
+ break;
+ case Ovastart:
+ selvastart(fn, p, i->arg[0]);
+ break;
+ case Ovaarg:
+ selvaarg(fn, i);
+ break;
+ case Oarg:
+ case Oargc:
+ die("unreachable");
+ }
+ if (b == fn->start)
+ for (; il; il=il->link)
+ emiti(il->i);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ } while (b != fn->start);
+
+ if (debug['A']) {
+ fprintf(stderr, "\n> After ABI lowering:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/rv64/all.h b/rv64/all.h
@@ -0,0 +1,49 @@
+#include "../all.h"
+
+typedef struct Rv64Op Rv64Op;
+
+enum Rv64Reg {
+ /* caller-save */
+ T0 = RXX + 1, T1, T2, T3, T4, T5,
+ A0, A1, A2, A3, A4, A5, A6, A7,
+
+ /* callee-save */
+ S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11,
+
+ /* globally live */
+ FP, SP, GP, TP, RA, T6,
+
+ /* FP caller-save */
+ FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7, FT8, FT9, FT10, FT11,
+ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7,
+
+ /* FP callee-save */
+ FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11,
+
+ NFPR = FS11 - FT0 + 1,
+ NGPR = T6 - T0 + 1,
+ NGPS = A7 - T0 + 1,
+ NFPS = FA7 - FT0 + 1,
+ NCLR = (S11 - S1 + 1) + (FS11 - FS0 + 1),
+};
+MAKESURE(reg_not_tmp, FS11 < (int)Tmp0);
+
+struct Rv64Op {
+ char imm;
+};
+
+/* targ.c */
+extern int rv64_rsave[];
+extern int rv64_rclob[];
+extern Rv64Op rv64_op[];
+
+/* abi.c */
+bits rv64_retregs(Ref, int[2]);
+bits rv64_argregs(Ref, int[2]);
+void rv64_abi(Fn *);
+
+/* isel.c */
+void rv64_isel(Fn *);
+
+/* emit.c */
+void rv64_emitfn(Fn *, FILE *);
diff --git a/rv64/emit.c b/rv64/emit.c
@@ -0,0 +1,499 @@
+#include "all.h"
+
+enum {
+ Ki = -1, /* matches Kw and Kl */
+ Ka = -2, /* matches all classes */
+};
+
+static struct {
+ short op;
+ short cls;
+ char *asm;
+} omap[] = {
+ { Oadd, Ki, "add%k %=, %0, %1" },
+ { Oadd, Ka, "fadd.%k %=, %0, %1" },
+ { Osub, Ki, "sub%k %=, %0, %1" },
+ { Osub, Ka, "fsub.%k %=, %0, %1" },
+ { Oneg, Ki, "neg%k %=, %0" },
+ { Oneg, Ka, "fneg.%k %=, %0" },
+ { Odiv, Ki, "div%k %=, %0, %1" },
+ { Odiv, Ka, "fdiv.%k %=, %0, %1" },
+ { Orem, Ki, "rem%k %=, %0, %1" },
+ { Orem, Kl, "rem %=, %0, %1" },
+ { Oudiv, Ki, "divu%k %=, %0, %1" },
+ { Ourem, Ki, "remu%k %=, %0, %1" },
+ { Omul, Ki, "mul%k %=, %0, %1" },
+ { Omul, Ka, "fmul.%k %=, %0, %1" },
+ { Oand, Ki, "and %=, %0, %1" },
+ { Oor, Ki, "or %=, %0, %1" },
+ { Oxor, Ki, "xor %=, %0, %1" },
+ { Osar, Ki, "sra%k %=, %0, %1" },
+ { Oshr, Ki, "srl%k %=, %0, %1" },
+ { Oshl, Ki, "sll%k %=, %0, %1" },
+ { Ocsltl, Ki, "slt %=, %0, %1" },
+ { Ocultl, Ki, "sltu %=, %0, %1" },
+ { Oceqs, Ki, "feq.s %=, %0, %1" },
+ { Ocges, Ki, "fge.s %=, %0, %1" },
+ { Ocgts, Ki, "fgt.s %=, %0, %1" },
+ { Ocles, Ki, "fle.s %=, %0, %1" },
+ { Oclts, Ki, "flt.s %=, %0, %1" },
+ { Oceqd, Ki, "feq.d %=, %0, %1" },
+ { Ocged, Ki, "fge.d %=, %0, %1" },
+ { Ocgtd, Ki, "fgt.d %=, %0, %1" },
+ { Ocled, Ki, "fle.d %=, %0, %1" },
+ { Ocltd, Ki, "flt.d %=, %0, %1" },
+ { Ostoreb, Kw, "sb %0, %M1" },
+ { Ostoreh, Kw, "sh %0, %M1" },
+ { Ostorew, Kw, "sw %0, %M1" },
+ { Ostorel, Ki, "sd %0, %M1" },
+ { Ostores, Kw, "fsw %0, %M1" },
+ { Ostored, Kw, "fsd %0, %M1" },
+ { Oloadsb, Ki, "lb %=, %M0" },
+ { Oloadub, Ki, "lbu %=, %M0" },
+ { Oloadsh, Ki, "lh %=, %M0" },
+ { Oloaduh, Ki, "lhu %=, %M0" },
+ { Oloadsw, Ki, "lw %=, %M0" },
+ /* riscv64 always sign-extends 32-bit
+ * values stored in 64-bit registers
+ */
+ { Oloaduw, Kw, "lw %=, %M0" },
+ { Oloaduw, Kl, "lwu %=, %M0" },
+ { Oload, Kw, "lw %=, %M0" },
+ { Oload, Kl, "ld %=, %M0" },
+ { Oload, Ks, "flw %=, %M0" },
+ { Oload, Kd, "fld %=, %M0" },
+ { Oextsb, Ki, "sext.b %=, %0" },
+ { Oextub, Ki, "zext.b %=, %0" },
+ { Oextsh, Ki, "sext.h %=, %0" },
+ { Oextuh, Ki, "zext.h %=, %0" },
+ { Oextsw, Kl, "sext.w %=, %0" },
+ { Oextuw, Kl, "zext.w %=, %0" },
+ { Otruncd, Ks, "fcvt.s.d %=, %0" },
+ { Oexts, Kd, "fcvt.d.s %=, %0" },
+ { Ostosi, Kw, "fcvt.w.s %=, %0, rtz" },
+ { Ostosi, Kl, "fcvt.l.s %=, %0, rtz" },
+ { Ostoui, Kw, "fcvt.wu.s %=, %0, rtz" },
+ { Ostoui, Kl, "fcvt.lu.s %=, %0, rtz" },
+ { Odtosi, Kw, "fcvt.w.d %=, %0, rtz" },
+ { Odtosi, Kl, "fcvt.l.d %=, %0, rtz" },
+ { Odtoui, Kw, "fcvt.wu.d %=, %0, rtz" },
+ { Odtoui, Kl, "fcvt.lu.d %=, %0, rtz" },
+ { Oswtof, Ka, "fcvt.%k.w %=, %0" },
+ { Ouwtof, Ka, "fcvt.%k.wu %=, %0" },
+ { Osltof, Ka, "fcvt.%k.l %=, %0" },
+ { Oultof, Ka, "fcvt.%k.lu %=, %0" },
+ { Ocast, Kw, "fmv.x.w %=, %0" },
+ { Ocast, Kl, "fmv.x.d %=, %0" },
+ { Ocast, Ks, "fmv.w.x %=, %0" },
+ { Ocast, Kd, "fmv.d.x %=, %0" },
+ { Ocopy, Ki, "mv %=, %0" },
+ { Ocopy, Ka, "fmv.%k %=, %0" },
+ { Oswap, Ki, "mv %?, %0\n\tmv %0, %1\n\tmv %1, %?" },
+ { Oreqz, Ki, "seqz %=, %0" },
+ { Ornez, Ki, "snez %=, %0" },
+ { Ocall, Kw, "jalr %0" },
+ { NOp, 0, 0 }
+};
+
+static char *rname[] = {
+ [FP] = "fp",
+ [SP] = "sp",
+ [GP] = "gp",
+ [TP] = "tp",
+ [RA] = "ra",
+ [T6] = "t6",
+ [T0] = "t0", "t1", "t2", "t3", "t4", "t5",
+ [A0] = "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ [S1] = "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
+
+ [FT0] = "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11",
+ [FA0] = "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7",
+ [FS0] = "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", "fs8", "fs9", "fs10", "fs11",
+};
+
+static int64_t
+slot(int s, Fn *fn)
+{
+ s = ((int32_t)s << 3) >> 3;
+ assert(s <= fn->slot);
+ if (s < 0)
+ return 8 * -s;
+ else
+ return -4 * (fn->slot - s);
+}
+
+static void
+emitaddr(Con *c, FILE *f)
+{
+ char off[32], *p;
+
+ if (c->bits.i)
+ sprintf(off, "+%"PRIi64, c->bits.i);
+ else
+ off[0] = 0;
+ p = c->local ? ".L" : "";
+ fprintf(f, "%s%s%s", p, str(c->label), off);
+}
+
+static void
+emitf(char *s, Ins *i, Fn *fn, FILE *f)
+{
+ static char clschr[] = {'w', 'l', 's', 'd'};
+ Ref r;
+ int k, c;
+ Con *pc;
+ int64_t offset;
+
+ fputc('\t', f);
+ for (;;) {
+ k = i->cls;
+ while ((c = *s++) != '%')
+ if (!c) {
+ fputc('\n', f);
+ return;
+ } else
+ fputc(c, f);
+ switch ((c = *s++)) {
+ default:
+ die("invalid escape");
+ case '?':
+ if (KBASE(k) == 0)
+ fputs("t6", f);
+ else
+ abort();
+ break;
+ case 'k':
+ if (i->cls != Kl)
+ fputc(clschr[i->cls], f);
+ break;
+ case '=':
+ case '0':
+ r = c == '=' ? i->to : i->arg[0];
+ assert(isreg(r));
+ fputs(rname[r.val], f);
+ break;
+ case '1':
+ r = i->arg[1];
+ switch (rtype(r)) {
+ default:
+ die("invalid second argument");
+ case RTmp:
+ assert(isreg(r));
+ fputs(rname[r.val], f);
+ break;
+ case RCon:
+ pc = &fn->con[r.val];
+ assert(pc->type == CBits);
+ assert(pc->bits.i >= -2048 && pc->bits.i <= 2047);
+ fprintf(f, "%d", (int)pc->bits.i);
+ break;
+ }
+ break;
+ case 'M':
+ c = *s++;
+ assert(c == '0' || c == '1');
+ r = i->arg[c - '0'];
+ switch (rtype(r)) {
+ default:
+ die("invalid address argument");
+ case RTmp:
+ fprintf(f, "0(%s)", rname[r.val]);
+ break;
+ case RCon:
+ pc = &fn->con[r.val];
+ assert(pc->type == CAddr);
+ emitaddr(pc, f);
+ if (isstore(i->op)
+ || (isload(i->op) && KBASE(i->cls) == 1)) {
+ /* store (and float load)
+ * pseudo-instructions need a
+ * temporary register in which to
+ * load the address
+ */
+ fprintf(f, ", t6");
+ }
+ break;
+ case RSlot:
+ offset = slot(r.val, fn);
+ assert(offset >= -2048 && offset <= 2047);
+ fprintf(f, "%d(fp)", (int)offset);
+ break;
+ }
+ break;
+ }
+ }
+}
+
+static void
+loadcon(Con *c, int r, int k, FILE *f)
+{
+ char *rn;
+ int64_t n;
+ int w;
+
+ w = KWIDE(k);
+ rn = rname[r];
+ switch (c->type) {
+ case CAddr:
+ fprintf(f, "\tla %s, ", rn);
+ emitaddr(c, f);
+ fputc('\n', f);
+ break;
+ case CBits:
+ n = c->bits.i;
+ if (!w)
+ n = (int32_t)n;
+ fprintf(f, "\tli %s, %"PRIu64"\n", rn, n);
+ break;
+ default:
+ die("invalid constant");
+ }
+}
+
+static void
+fixslot(Ref *pr, Fn *fn, FILE *f)
+{
+ Ref r;
+ int64_t s;
+
+ r = *pr;
+ if (rtype(r) == RSlot) {
+ s = slot(r.val, fn);
+ if (s < -2048 || s > 2047) {
+ fprintf(f, "\tli t6, %"PRId64"\n", s);
+ fprintf(f, "\tadd t6, fp, t6\n");
+ *pr = TMP(T6);
+ }
+ }
+}
+
+static void
+emitins(Ins *i, Fn *fn, FILE *f)
+{
+ int o;
+ char *rn;
+ int64_t s;
+ Con *con;
+
+ switch (i->op) {
+ default:
+ if (isload(i->op))
+ fixslot(&i->arg[0], fn, f);
+ else if (isstore(i->op))
+ fixslot(&i->arg[1], fn, f);
+ Table:
+ /* most instructions are just pulled out of
+ * the table omap[], some special cases are
+ * detailed below */
+ for (o=0;; o++) {
+ /* this linear search should really be a binary
+ * search */
+ if (omap[o].op == NOp)
+ die("no match for %s(%c)",
+ optab[i->op].name, "wlsd"[i->cls]);
+ if (omap[o].op == i->op)
+ if (omap[o].cls == i->cls || omap[o].cls == Ka
+ || (omap[o].cls == Ki && KBASE(i->cls) == 0))
+ break;
+ }
+ emitf(omap[o].asm, i, fn, f);
+ break;
+ case Ocopy:
+ if (req(i->to, i->arg[0]))
+ break;
+ if (rtype(i->to) == RSlot) {
+ switch (rtype(i->arg[0])) {
+ case RSlot:
+ case RCon:
+ die("unimplemented");
+ break;
+ default:
+ assert(isreg(i->arg[0]));
+ i->arg[1] = i->to;
+ i->to = R;
+ switch (i->cls) {
+ case Kw: i->op = Ostorew; break;
+ case Kl: i->op = Ostorel; break;
+ case Ks: i->op = Ostores; break;
+ case Kd: i->op = Ostored; break;
+ }
+ fixslot(&i->arg[1], fn, f);
+ goto Table;
+ }
+ break;
+ }
+ assert(isreg(i->to));
+ switch (rtype(i->arg[0])) {
+ case RCon:
+ loadcon(&fn->con[i->arg[0].val], i->to.val, i->cls, f);
+ break;
+ case RSlot:
+ i->op = Oload;
+ fixslot(&i->arg[0], fn, f);
+ goto Table;
+ default:
+ assert(isreg(i->arg[0]));
+ goto Table;
+ }
+ break;
+ case Onop:
+ break;
+ case Oaddr:
+ assert(rtype(i->arg[0]) == RSlot);
+ rn = rname[i->to.val];
+ s = slot(i->arg[0].val, fn);
+ if (-s < 2048) {
+ fprintf(f, "\tadd %s, fp, %"PRId64"\n", rn, s);
+ } else {
+ fprintf(f,
+ "\tli %s, %"PRId64"\n"
+ "\tadd %s, fp, %s\n",
+ rn, s, rn, rn
+ );
+ }
+ break;
+ case Ocall:
+ switch (rtype(i->arg[0])) {
+ case RCon:
+ con = &fn->con[i->arg[0].val];
+ if (con->type != CAddr || con->bits.i)
+ goto invalid;
+ fprintf(f, "\tcall %s\n", str(con->label));
+ break;
+ case RTmp:
+ emitf("jalr %0", i, fn, f);
+ break;
+ default:
+ invalid:
+ die("invalid call argument");
+ }
+ break;
+ case Osalloc:
+ emitf("sub sp, sp, %0", i, fn, f);
+ if (!req(i->to, R))
+ emitf("mv %=, sp", i, fn, f);
+ break;
+ }
+}
+
+/*
+
+ Stack-frame layout:
+
+ +=============+
+ | varargs |
+ | save area |
+ +-------------+
+ | saved ra |
+ | saved fp |
+ +-------------+ <- fp
+ | ... |
+ | spill slots |
+ | ... |
+ +-------------+
+ | ... |
+ | locals |
+ | ... |
+ +-------------+
+ | padding |
+ +-------------+
+ | callee-save |
+ | registers |
+ +=============+
+
+*/
+
+void
+rv64_emitfn(Fn *fn, FILE *f)
+{
+ static int id0;
+ int lbl, neg, off, frame, *pr, r;
+ Blk *b, *s;
+ Ins *i;
+
+ gasemitlnk(fn->name, &fn->lnk, ".text", f);
+
+ if (fn->vararg) {
+ /* TODO: only need space for registers unused by named arguments */
+ fprintf(f, "\tadd sp, sp, -64\n");
+ for (r = A0; r <= A7; r++)
+ fprintf(f, "\tsd %s, %d(sp)\n", rname[r], 8 * (r - A0));
+ }
+ fprintf(f, "\tsd fp, -16(sp)\n");
+ fprintf(f, "\tsd ra, -8(sp)\n");
+ fprintf(f, "\tadd fp, sp, -16\n");
+
+ frame = (16 + 4 * fn->slot + 15) & ~15;
+ for (pr = rv64_rclob; *pr>=0; pr++) {
+ if (fn->reg & BIT(*pr))
+ frame += 8;
+ }
+ frame = (frame + 15) & ~15;
+
+ if (frame <= 2048)
+ fprintf(f, "\tadd sp, sp, -%d\n", frame);
+ else
+ fprintf(f,
+ "\tli t6, %d\n"
+ "\tsub sp, sp, t6\n",
+ frame);
+ for (pr = rv64_rclob, off = 0; *pr >= 0; pr++) {
+ if (fn->reg & BIT(*pr)) {
+ fprintf(f, "\t%s %s, %d(sp)\n", *pr < FT0 ? "sd" : "fsd", rname[*pr], off);
+ off += 8;
+ }
+ }
+
+ for (lbl = 0, b = fn->start; b; b=b->link) {
+ if (lbl || b->npred > 1)
+ fprintf(f, ".L%d:\n", id0+b->id);
+ for (i=b->ins; i!=&b->ins[b->nins]; i++)
+ emitins(i, fn, f);
+ lbl = 1;
+ switch (b->jmp.type) {
+ case Jret0:
+ if (fn->dynalloc) {
+ if (frame - 16 <= 2048)
+ fprintf(f, "\tadd sp, fp, -%d\n", frame - 16);
+ else
+ fprintf(f,
+ "\tli t6, %d\n"
+ "\tsub sp, sp, t6\n",
+ frame - 16);
+ }
+ for (pr = rv64_rclob, off = 0; *pr >= 0; pr++) {
+ if (fn->reg & BIT(*pr)) {
+ fprintf(f, "\t%s %s, %d(sp)\n", *pr < FT0 ? "ld" : "fld", rname[*pr], off);
+ off += 8;
+ }
+ }
+ fprintf(f,
+ "\tadd sp, fp, %d\n"
+ "\tld ra, 8(fp)\n"
+ "\tld fp, 0(fp)\n"
+ "\tret\n",
+ 16 + fn->vararg * 64
+ );
+ break;
+ case Jjmp:
+ Jmp:
+ if (b->s1 != b->link)
+ fprintf(f, "\tj .L%d\n", id0+b->s1->id);
+ else
+ lbl = 0;
+ break;
+ case Jjnz:
+ neg = 0;
+ if (b->link == b->s2) {
+ s = b->s1;
+ b->s1 = b->s2;
+ b->s2 = s;
+ neg = 1;
+ }
+ assert(isreg(b->jmp.arg));
+ fprintf(f, "\tb%sz %s, .L%d\n", neg ? "ne" : "eq", rname[b->jmp.arg.val], id0+b->s2->id);
+ goto Jmp;
+ }
+ }
+ id0 += fn->nblk;
+}
diff --git a/rv64/isel.c b/rv64/isel.c
@@ -0,0 +1,278 @@
+#include "all.h"
+
+static int
+memarg(Ref *r, int op, Ins *i)
+{
+ return ((isload(op) || op == Ocall) && r == &i->arg[0])
+ || (isstore(op) && r == &i->arg[1]);
+}
+
+static int
+immarg(Ref *r, int op, Ins *i)
+{
+ return rv64_op[op].imm && r == &i->arg[1];
+}
+
+static void
+fixarg(Ref *r, int k, Ins *i, Fn *fn)
+{
+ char buf[32];
+ Ref r0, r1;
+ int s, n, op;
+ Con *c;
+
+ r0 = r1 = *r;
+ op = i ? i->op : Ocopy;
+ switch (rtype(r0)) {
+ case RCon:
+ c = &fn->con[r0.val];
+ if (c->type == CAddr && memarg(r, op, i))
+ break;
+ if (c->type == CBits && immarg(r, op, i)
+ && -2048 <= c->bits.i && c->bits.i < 2048)
+ break;
+ r1 = newtmp("isel", k, fn);
+ if (KBASE(k) == 1) {
+ /* load floating points from memory
+ * slots, they can't be used as
+ * immediates
+ */
+ assert(c->type == CBits);
+ n = gasstash(&c->bits, KWIDE(k) ? 8 : 4);
+ vgrow(&fn->con, ++fn->ncon);
+ c = &fn->con[fn->ncon-1];
+ sprintf(buf, "fp%d", n);
+ *c = (Con){.type = CAddr, .local = 1};
+ c->label = intern(buf);
+ emit(Oload, k, r1, CON(c-fn->con), R);
+ break;
+ }
+ emit(Ocopy, k, r1, r0, R);
+ break;
+ case RTmp:
+ if (isreg(r0))
+ break;
+ s = fn->tmp[r0.val].slot;
+ if (s != -1) {
+ /* aggregate passed by value on
+ * stack, or fast local address,
+ * replace with slot if we can
+ */
+ if (memarg(r, op, i)) {
+ r1 = SLOT(s);
+ break;
+ }
+ r1 = newtmp("isel", k, fn);
+ emit(Oaddr, k, r1, SLOT(s), R);
+ break;
+ }
+ if (k == Kw && fn->tmp[r0.val].cls == Kl) {
+ /* TODO: this sign extension isn't needed
+ * for 32-bit arithmetic instructions
+ */
+ r1 = newtmp("isel", k, fn);
+ emit(Oextsw, Kl, r1, r0, R);
+ } else {
+ assert(k == fn->tmp[r0.val].cls);
+ }
+ break;
+ }
+ *r = r1;
+}
+
+static void
+negate(Ref *pr, Fn *fn)
+{
+ Ref r;
+
+ r = newtmp("isel", Kw, fn);
+ emit(Oxor, Kw, *pr, r, getcon(1, fn));
+ *pr = r;
+}
+
+static void
+selcmp(Ins i, int k, int op, Fn *fn)
+{
+ Ins *icmp;
+ Ref r, r0, r1;
+ int sign, swap, neg;
+
+ switch (op) {
+ case Cieq:
+ r = newtmp("isel", k, fn);
+ emit(Oreqz, i.cls, i.to, r, R);
+ emit(Oxor, k, r, i.arg[0], i.arg[1]);
+ icmp = curi;
+ fixarg(&icmp->arg[0], k, icmp, fn);
+ fixarg(&icmp->arg[1], k, icmp, fn);
+ return;
+ case Cine:
+ r = newtmp("isel", k, fn);
+ emit(Ornez, i.cls, i.to, r, R);
+ emit(Oxor, k, r, i.arg[0], i.arg[1]);
+ icmp = curi;
+ fixarg(&icmp->arg[0], k, icmp, fn);
+ fixarg(&icmp->arg[1], k, icmp, fn);
+ return;
+ case Cisge: sign = 1, swap = 0, neg = 1; break;
+ case Cisgt: sign = 1, swap = 1, neg = 0; break;
+ case Cisle: sign = 1, swap = 1, neg = 1; break;
+ case Cislt: sign = 1, swap = 0, neg = 0; break;
+ case Ciuge: sign = 0, swap = 0, neg = 1; break;
+ case Ciugt: sign = 0, swap = 1, neg = 0; break;
+ case Ciule: sign = 0, swap = 1, neg = 1; break;
+ case Ciult: sign = 0, swap = 0, neg = 0; break;
+ case NCmpI+Cfeq:
+ case NCmpI+Cfge:
+ case NCmpI+Cfgt:
+ case NCmpI+Cfle:
+ case NCmpI+Cflt:
+ swap = 0, neg = 0;
+ break;
+ case NCmpI+Cfuo:
+ negate(&i.to, fn);
+ /* fallthrough */
+ case NCmpI+Cfo:
+ r0 = newtmp("isel", i.cls, fn);
+ r1 = newtmp("isel", i.cls, fn);
+ emit(Oand, i.cls, i.to, r0, r1);
+ op = KWIDE(k) ? Oceqd : Oceqs;
+ emit(op, i.cls, r0, i.arg[0], i.arg[0]);
+ icmp = curi;
+ fixarg(&icmp->arg[0], k, icmp, fn);
+ fixarg(&icmp->arg[1], k, icmp, fn);
+ emit(op, i.cls, r1, i.arg[1], i.arg[1]);
+ icmp = curi;
+ fixarg(&icmp->arg[0], k, icmp, fn);
+ fixarg(&icmp->arg[1], k, icmp, fn);
+ return;
+ case NCmpI+Cfne:
+ swap = 0, neg = 1;
+ i.op = KWIDE(k) ? Oceqd : Oceqs;
+ break;
+ default:
+ assert(0 && "unknown comparison");
+ }
+ if (op < NCmpI)
+ i.op = sign ? Ocsltl : Ocultl;
+ if (swap) {
+ r = i.arg[0];
+ i.arg[0] = i.arg[1];
+ i.arg[1] = r;
+ }
+ if (neg)
+ negate(&i.to, fn);
+ emiti(i);
+ icmp = curi;
+ fixarg(&icmp->arg[0], k, icmp, fn);
+ fixarg(&icmp->arg[1], k, icmp, fn);
+}
+
+static void
+sel(Ins i, Fn *fn)
+{
+ Ref r0, r1;
+ Ins *i0;
+ int ck, cc;
+ int64_t sz;
+
+ switch (i.op) {
+ case Onop:
+ break;
+ case Oalloc4:
+ case Oalloc8:
+ case Oalloc16:
+ /* we need to make sure
+ * the stack remains aligned
+ * (rsp = 0) mod 16
+ */
+ fn->dynalloc = 1;
+ if (rtype(i.arg[0]) == RCon) {
+ sz = fn->con[i.arg[0].val].bits.i;
+ if (sz < 0)
+ err("invalid alloc size %"PRId64, sz);
+ sz = (sz + 15) & -16;
+ emit(Osalloc, Kl, i.to, getcon(sz, fn), R);
+ fixarg(&curi->arg[0], Kl, curi, fn);
+ } else {
+ /* r0 = (i.arg[0] + 15) & -16 */
+ r0 = newtmp("isel", Kl, fn);
+ r1 = newtmp("isel", Kl, fn);
+ emit(Osalloc, Kl, i.to, r0, R);
+ emit(Oand, Kl, r0, r1, getcon(-16, fn));
+ emit(Oadd, Kl, r1, i.arg[0], getcon(15, fn));
+ if (fn->tmp[i.arg[0].val].slot != -1)
+ err("unlikely argument %%%s in %s",
+ fn->tmp[i.arg[0].val].name, optab[i.op].name);
+ }
+ break;
+ default:
+ if (iscmp(i.op, &ck, &cc)) {
+ selcmp(i, ck, cc, fn);
+ break;
+ }
+ emiti(i);
+ i0 = curi; /* fixarg() can change curi */
+ fixarg(&i0->arg[0], argcls(&i, 0), i0, fn);
+ fixarg(&i0->arg[1], argcls(&i, 1), i0, fn);
+ }
+}
+
+static void
+seljmp(Blk *b, Fn *fn)
+{
+ /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */
+ if (b->jmp.type == Jjnz)
+ fixarg(&b->jmp.arg, Kw, 0, fn);
+}
+
+void
+rv64_isel(Fn *fn)
+{
+ Blk *b, **sb;
+ Ins *i;
+ Phi *p;
+ uint n;
+ int al;
+ int64_t sz;
+
+ /* assign slots to fast allocs */
+ b = fn->start;
+ /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */
+ for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2)
+ for (i=b->ins; i<&b->ins[b->nins]; i++)
+ if (i->op == al) {
+ if (rtype(i->arg[0]) != RCon)
+ break;
+ sz = fn->con[i->arg[0].val].bits.i;
+ if (sz < 0 || sz >= INT_MAX-15)
+ err("invalid alloc size %"PRId64, sz);
+ sz = (sz + n-1) & -n;
+ sz /= 4;
+ if (sz > INT_MAX - fn->slot)
+ die("alloc too large");
+ fn->tmp[i->to.val].slot = fn->slot;
+ fn->slot += sz;
+ *i = (Ins){.op = Onop};
+ }
+
+ for (b=fn->start; b; b=b->link) {
+ curi = &insb[NIns];
+ for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
+ for (p=(*sb)->phi; p; p=p->link) {
+ for (n=0; p->blk[n] != b; n++)
+ assert(n+1 < p->narg);
+ fixarg(&p->arg[n], p->cls, 0, fn);
+ }
+ seljmp(b, fn);
+ for (i=&b->ins[b->nins]; i!=b->ins;)
+ sel(*--i, fn);
+ b->nins = &insb[NIns] - curi;
+ idup(&b->ins, curi, b->nins);
+ }
+
+ if (debug['I']) {
+ fprintf(stderr, "\n> After instruction selection:\n");
+ printfn(fn, stderr);
+ }
+}
diff --git a/rv64/targ.c b/rv64/targ.c
@@ -0,0 +1,53 @@
+#include "all.h"
+
+Rv64Op rv64_op[NOp] = {
+#define O(op, t, x) [O##op] =
+#define V(imm) { imm },
+#include "../ops.h"
+};
+
+int rv64_rsave[] = {
+ T0, T1, T2, T3, T4, T5,
+ A0, A1, A2, A3, A4, A5, A6, A7,
+ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7,
+ FT0, FT1, FT2, FT3, FT4, FT5, FT6, FT7,
+ FT8, FT9, FT10, FT11,
+ -1
+};
+int rv64_rclob[] = {
+ S1, S2, S3, S4, S5, S6, S7,
+ S8, S9, S10, S11,
+ FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7,
+ FS8, FS9, FS10, FS11,
+ -1
+};
+
+/* T6 used as swap register (TODO: is there a better choice?) */
+#define RGLOB (BIT(FP) | BIT(SP) | BIT(GP) | BIT(TP) | BIT(RA) | BIT(T6))
+
+static int
+rv64_memargs(int op)
+{
+ (void)op;
+ return 0;
+}
+
+Target T_rv64 = {
+ .gpr0 = T0,
+ .ngpr = NGPR,
+ .fpr0 = FT0,
+ .nfpr = NFPR,
+ .rglob = RGLOB,
+ .nrglob = 6,
+ .rsave = rv64_rsave,
+ .nrsave = {NGPS, NFPS},
+ .retregs = rv64_retregs,
+ .argregs = rv64_argregs,
+ .memargs = rv64_memargs,
+ .abi = rv64_abi,
+ .isel = rv64_isel,
+ .emitfn = rv64_emitfn,
+};
+
+MAKESURE(rsave_size_ok, sizeof rv64_rsave == (NGPS+NFPS+1) * sizeof(int));
+MAKESURE(rclob_size_ok, sizeof rv64_rclob == (NCLR+1) * sizeof(int));
diff --git a/test/dark.ssa b/test/dark.ssa
@@ -1,4 +1,4 @@
-# skip arm64
+# skip arm64 rv64
# a hack example,
# we use a dark type to get
# a pointer to the stack.
diff --git a/tools/test.sh b/tools/test.sh
@@ -43,6 +43,30 @@ init() {
fi
bin="$bin -t arm64"
;;
+ rv64)
+ for p in riscv64-linux-musl riscv64-linux-gnu
+ do
+ cc="$p-gcc -no-pie"
+ qemu="qemu-riscv64"
+ if
+ $cc -v >/dev/null 2>&1 &&
+ $qemu -version >/dev/null 2>&1
+ then
+ if sysroot=$($cc -print-sysroot) && test -n "$sysroot"
+ then
+ qemu="$qemu -L $sysroot"
+ fi
+ break
+ fi
+ cc=
+ done
+ if test -z "$cc"
+ then
+ echo "Cannot find riscv64 compiler or qemu."
+ exit 1
+ fi
+ bin="$bin -t rv64"
+ ;;
"")
case `uname` in
*Darwin*)