implement unsigned -> float casts - qbe - Internal scc patchset buffer for QBE

commit 74d022f975f22fda20c0d1fe09a3f6fc7680f64f
parent b0d27d8a019811d6a4e0c0cb7ec804ab27fcec80
Author: Bor Grošelj Simić <bor.groseljsimic@telemach.net>
Date:   Fri, 28 Jan 2022 02:06:17 +0100

implement unsigned -> float casts

amd64 lacks an instruction for this so it has to be implemented with
signed -> float casts:
 - Word casting is done by zero-extending the word to a long and then doing
   a regular signed cast.
 - Long casting is done by dividing by two with correct rounding if the
   highest bit is set and casting that to float, then adding
   1 to mantissa with integer addition

Diffstat:
M amd64/isel.c  | 45 +++++++++++++++++++++++++++++++++++++++++++--
M arm64/emit.c  | 2 ++
M doc/il.txt  | 10 +++++++---
M fold.c  | 4 ++++
M ops.h  | 2 ++
M test/fpcnv.ssa  | 49 +++++++++++++++++++++++++++++++++++++++++++++++++

6 files changed, 107 insertions(+), 5 deletions(-)
diff --git a/amd64/isel.c b/amd64/isel.c
@@ -201,8 +201,8 @@ selcmp(Ref arg[2], int k, int swap, Fn *fn)
 static void
 sel(Ins i, ANum *an, Fn *fn)
 {
-	Ref r0, r1;
-	int x, k, kc, swap;
+	Ref r0, r1, tmp[7];
+	int x, j, k, kc, swap;
 	int64_t sz;
 	Ins *i0, *i1;
 
@@ -266,6 +266,47 @@ sel(Ins i, ANum *an, Fn *fn)
 		emit(Ocopy, Kw, TMP(RCX), r0, R);
 		fixarg(&i1->arg[0], argcls(&i, 0), i1, fn);
 		break;
+	case Ouwtof:
+		r0 = newtmp("utof", Kl, fn);
+		emit(Osltof, k, i.to, r0, R);
+		emit(Oextuw, Kl, r0, i.arg[0], R);
+		fixarg(&curi->arg[0], k, curi, fn);
+		break;
+	case Oultof:
+		/*
+		%mask =l and %arg.0, 1
+		%isbig =l shr %arg.0, 63
+		%divided =l shr %arg.0, %isbig
+		%or =l or %mask, %divided
+		%float =d sltof %or
+		%cast =l cast %float
+		%addend =l shl %isbig, 52
+		%sum =l add %cast, %addend
+		%result =d cast %sum
+		*/
+		r0 = newtmp("utof", k, fn);
+		if (k == Ks)
+			kc = Kw;
+		else
+			kc = Kl;
+		for (j=0; j<4; j++)
+			tmp[j] = newtmp("utof", Kl, fn);
+		for (; j<7; j++)
+			tmp[j] = newtmp("utof", kc, fn);
+		emit(Ocast, k, i.to, tmp[6], R);
+		emit(Oadd, kc, tmp[6], tmp[4], tmp[5]);
+		emit(Oshl, kc, tmp[5], tmp[1], getcon(k == Ks ? 23 : 52, fn));
+		emit(Ocast, kc, tmp[4], r0, R);
+
+		emit(Osltof, k, r0, tmp[3], R);
+		emit(Oor, Kl, tmp[3], tmp[0], tmp[2]);
+		emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]);
+		sel(*curi++, an, fn);
+		emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn));
+		fixarg(&curi->arg[0], Kl, curi, fn);
+		emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn));
+		fixarg(&curi->arg[0], Kl, curi, fn);
+		break;
 	case Onop:
 		break;
 	case Ostored:
diff --git a/arm64/emit.c b/arm64/emit.c
@@ -91,7 +91,9 @@ static struct {
 	{ Ostosi,  Ka, "fcvtzs %=, %S0" },
 	{ Odtosi,  Ka, "fcvtzs %=, %D0" },
 	{ Oswtof,  Ka, "scvtf %=, %W0" },
+	{ Ouwtof,  Ka, "ucvtf %=, %W0" },
 	{ Osltof,  Ka, "scvtf %=, %L0" },
+	{ Oultof,  Ka, "ucvtf %=, %L0" },
 	{ Ocall,   Kw, "blr %L0" },
 
 	{ Oacmp,   Ki, "cmp %0, %1" },
diff --git a/doc/il.txt b/doc/il.txt
@@ -698,7 +698,9 @@ or convert a floating point into an integer and vice versa.
   * `stosi` -- `I(ss)`
   * `dtosi` -- `I(dd)`
   * `swtof` -- `F(ww)`
+  * `uwtof` -- `F(ww)`
   * `sltof` -- `F(ll)`
+  * `ultof` -- `F(ll)`
 
 Extending the precision of a temporary is done using the
 `ext` family of instructions.  Because QBE types do not
@@ -717,9 +719,9 @@ zero.
 Converting between signed integers and floating points is
 done using `stosi` (single to signed integer), `dtosi`
 (double to signed integer), `swtof` (signed word to float),
-and `sltof` (signed long to float).  These instructions
-only handle signed integers, conversion to and from
-unsigned types are not yet supported.
+`uwtof` (unsigned word to float), `sltof` (signed long
+to float) and `ultof` (unsigned long to float).  Conversion
+from unsigned types is not yet supported.
 
 Because of <@ Subtyping >, there is no need to have an
 instruction to lower the precision of an integer temporary.
@@ -990,8 +992,10 @@ instructions unless you know exactly what you are doing.
       * `extuh`
       * `extuw`
       * `sltof`
+      * `ultof`
       * `stosi`
       * `swtof`
+      * `uwtof`
       * `truncd`
 
   * <@ Cast and Copy > :
diff --git a/fold.c b/fold.c
@@ -469,7 +469,9 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
 		case Odiv: xd = ld / rd; break;
 		case Omul: xd = ld * rd; break;
 		case Oswtof: xd = (int32_t)cl->bits.i; break;
+		case Ouwtof: xd = (uint32_t)cl->bits.i; break;
 		case Osltof: xd = (int64_t)cl->bits.i; break;
+		case Oultof: xd = (uint64_t)cl->bits.i; break;
 		case Oexts: xd = cl->bits.s; break;
 		case Ocast: xd = ld; break;
 		default: die("unreachable");
@@ -486,7 +488,9 @@ foldflt(Con *res, int op, int w, Con *cl, Con *cr)
 		case Odiv: xs = ls / rs; break;
 		case Omul: xs = ls * rs; break;
 		case Oswtof: xs = (int32_t)cl->bits.i; break;
+		case Ouwtof: xs = (uint32_t)cl->bits.i; break;
 		case Osltof: xs = (int64_t)cl->bits.i; break;
+		case Oultof: xs = (uint64_t)cl->bits.i; break;
 		case Otruncd: xs = cl->bits.d; break;
 		case Ocast: xs = ls; break;
 		default: die("unreachable");
diff --git a/ops.h b/ops.h
@@ -98,7 +98,9 @@ O(truncd,  T(e,e,d,e, e,e,x,e), 1) X(0, 0, 1)
 O(stosi,   T(s,s,e,e, x,x,e,e), 1) X(0, 0, 1)
 O(dtosi,   T(d,d,e,e, x,x,e,e), 1) X(0, 0, 1)
 O(swtof,   T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
+O(uwtof,   T(e,e,w,w, e,e,x,x), 1) X(0, 0, 1)
 O(sltof,   T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
+O(ultof,   T(e,e,l,l, e,e,x,x), 1) X(0, 0, 1)
 O(cast,    T(s,d,w,l, x,x,x,x), 1) X(0, 0, 1)
 
 /* Stack Allocation */
diff --git a/test/fpcnv.ssa b/test/fpcnv.ssa
@@ -17,13 +17,62 @@ function d $ftrunc(d %f) {
 	ret %rt
 }
 
+export
+function s $wtos(w %w) {
+@start
+	%rt =s uwtof %w
+	ret %rt
+}
+export
+function d $wtod(w %w) {
+@start
+	%rt =d uwtof %w
+	ret %rt
+}
+
+export
+function s $ltos(l %l) {
+@start
+	%rt =s ultof %l
+	ret %rt
+}
+export
+function d $ltod(l %l) {
+@start
+	%rt =d ultof %l
+	ret %rt
+}
+
 # >>> driver
 # extern float fneg(float);
 # extern double ftrunc(double);
+#
+# extern float wtos(unsigned int);
+# extern double wtod(unsigned int);
+# extern float ltos(long long unsigned int);
+# extern double ltod(long long unsigned int);
+#
+# unsigned long long iin[] = { 0, 1, 16, 234987, 427386245, 0x7fff0000,
+# 	0xffff0000, 23602938196141, 72259248152500195, 9589010795705032704ull,
+# 	0xdcf5fbe299d0148aull, 0xffffffff00000000ull, -1 };
+#
 # int main() {
+# 	int i;
+#
 # 	if (fneg(1.23f) != -1.23f)  return 1;
 # 	if (ftrunc(3.1415) != 3.0)  return 2;
 # 	if (ftrunc(-1.234) != -1.0) return 3;
+#
+# 	for (i=0; i<sizeof(iin)/sizeof(iin[0]); i++) {
+# 		if (wtos(iin[i]) != (float) (unsigned int)iin[i])
+# 			return 4;
+# 		if (wtod(iin[i]) != (double)(unsigned int)iin[i])
+# 			return 5;
+# 		if (ltos(iin[i]) != (float) iin[i])
+# 			return 6;
+# 		if (ltod(iin[i]) != (double)iin[i])
+# 			return 7;
+# 	}
 # 	return 0;
 # }
 # <<<

	qbe Internal scc patchset buffer for QBE
	Log \| Files \| Refs \| README \| LICENSE

M	amd64/isel.c	\|	45	+++++++++++++++++++++++++++++++++++++++++++--
M	arm64/emit.c	\|	2	++
M	doc/il.txt	\|	10	+++++++---
M	fold.c	\|	4	++++
M	ops.h	\|	2	++
M	test/fpcnv.ssa	\|	49	+++++++++++++++++++++++++++++++++++++++++++++++++