commit 804921a3ab463848aa0ffbe495ca542b3789c841
parent 3cbad4d9c465d3f298cbe19c46f7c16f6a9b9f0f
Author: Michael Forney <mforney@mforney.org>
Date: Tue, 17 Aug 2021 13:14:54 -0700
amd64/isel: fix floating < and <= result with NaN
When the two operands are Unordered (for instance if one of them
is NaN), ucomisd sets ZF=1, PF=1, and CF=1. When the result is
LessThan, it sets ZF=0, PF=0, and CF=1.
However, jb[e]/setb[e] only checks that CF=1 [or ZF=1] which causes
the result to be true for unordered operands.
To fix this, change the operand swap condition for these two floating
point comparison types: always rewrite x < y as y > x, and never
rewrite x > y as y < x.
Add a test to check the result of cltd, cled, cgtd, cged, ceqd, and
cned with arguments that are LessThan, Equal, GreaterThan, and
Unordered. Additionally, check three different implementations for
equality testing: one that uses the result of ceqd directly, one
that uses the result to control a conditional jump, and one that
uses the result both as a value and for a conditional jump. For
now, unordered equality tests are still broken so they are disabled.
Diffstat:
M | amd64/isel.c | | | 40 | +++++++++++++++++++++++++++------------- |
A | test/isel2.ssa | | | 108 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 135 insertions(+), 13 deletions(-)
diff --git a/amd64/isel.c b/amd64/isel.c
@@ -165,13 +165,25 @@ seladdr(Ref *r, ANum *an, Fn *fn)
}
static int
-selcmp(Ref arg[2], int k, Fn *fn)
+cmpswap(Ref arg[2], int op)
+{
+ switch (op) {
+ case NCmpI+Cflt:
+ case NCmpI+Cfle:
+ return 1;
+ case NCmpI+Cfgt:
+ case NCmpI+Cfge:
+ return 0;
+ }
+ return rtype(arg[0]) == RCon;
+}
+
+static void
+selcmp(Ref arg[2], int k, int swap, Fn *fn)
{
- int swap;
Ref r;
Ins *icmp;
- swap = rtype(arg[0]) == RCon;
if (swap) {
r = arg[1];
arg[1] = arg[0];
@@ -180,20 +192,20 @@ selcmp(Ref arg[2], int k, Fn *fn)
emit(Oxcmp, k, R, arg[1], arg[0]);
icmp = curi;
if (rtype(arg[0]) == RCon) {
- assert(k == Kl);
+ assert(k != Kw);
icmp->arg[1] = newtmp("isel", k, fn);
emit(Ocopy, k, icmp->arg[1], arg[0], R);
+ fixarg(&curi->arg[0], k, curi, fn);
}
fixarg(&icmp->arg[0], k, icmp, fn);
fixarg(&icmp->arg[1], k, icmp, fn);
- return swap;
}
static void
sel(Ins i, ANum *an, Fn *fn)
{
Ref r0, r1;
- int x, k, kc;
+ int x, k, kc, swap;
int64_t sz;
Ins *i0, *i1;
@@ -332,10 +344,11 @@ Emit:
if (isload(i.op))
goto case_Oload;
if (iscmp(i.op, &kc, &x)) {
+ swap = cmpswap(i.arg, x);
+ if (swap)
+ x = cmpop(x);
emit(Oflag+x, k, i.to, R, R);
- i1 = curi;
- if (selcmp(i.arg, kc, fn))
- i1->op = Oflag + cmpop(x);
+ selcmp(i.arg, kc, swap, fn);
break;
}
die("unknown instruction %s", optab[i.op].name);
@@ -365,7 +378,7 @@ static void
seljmp(Blk *b, Fn *fn)
{
Ref r;
- int c, k;
+ int c, k, swap;
Ins *fi;
Tmp *t;
@@ -384,14 +397,15 @@ seljmp(Blk *b, Fn *fn)
}
fi = flagi(b->ins, &b->ins[b->nins]);
if (!fi || !req(fi->to, r)) {
- selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, long jnz */
+ selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */
b->jmp.type = Jjf + Cine;
}
else if (iscmp(fi->op, &k, &c)) {
- if (rtype(fi->arg[0]) == RCon)
+ swap = cmpswap(fi->arg, c);
+ if (swap)
c = cmpop(c);
if (t->nuse == 1) {
- selcmp(fi->arg, k, fn);
+ selcmp(fi->arg, k, swap, fn);
*fi = (Ins){.op = Onop};
}
b->jmp.type = Jjf + c;
diff --git a/test/isel2.ssa b/test/isel2.ssa
@@ -0,0 +1,108 @@
+# tests that NaN is handled properly by
+# floating point comparisons
+#
+# TODO: fix eq[123](NAN, NAN) on amd64
+
+export function w $lt(d %x, d %y) {
+@start
+ %r =w cltd %x, %y
+ ret %r
+}
+
+export function w $le(d %x, d %y) {
+@start
+ %r =w cled %x, %y
+ ret %r
+}
+
+export function w $gt(d %x, d %y) {
+@start
+ %r =w cgtd %x, %y
+ ret %r
+}
+
+export function w $ge(d %x, d %y) {
+@start
+ %r =w cged %x, %y
+ ret %r
+}
+
+export function w $eq1(d %x, d %y) {
+@start
+ %r =w ceqd %x, %y
+ ret %r
+}
+
+export function w $eq2(d %x, d %y) {
+@start
+ %r =w ceqd %x, %y
+ jnz %r, @true, @false
+@true
+ ret 1
+@false
+ ret 0
+}
+
+export function w $eq3(d %x, d %y) {
+@start
+ %r =w ceqd %x, %y
+ jnz %r, @true, @false
+@true
+ ret %r
+@false
+ ret 0
+}
+
+export function w $ne1(d %x, d %y) {
+@start
+ %r =w cned %x, %y
+ ret %r
+}
+
+export function w $ne2(d %x, d %y) {
+@start
+ %r =w cned %x, %y
+ jnz %r, @true, @false
+@true
+ ret 1
+@false
+ ret 0
+}
+
+export function w $ne3(d %x, d %y) {
+@start
+ %r =w cned %x, %y
+ jnz %r, @true, @false
+@true
+ ret %r
+@false
+ ret 0
+}
+
+# >>> driver
+# #include <math.h>
+# extern int lt(double, double);
+# extern int le(double, double);
+# extern int gt(double, double);
+# extern int ge(double, double);
+# extern int eq1(double, double);
+# extern int eq2(double, double);
+# extern int eq3(double, double);
+# extern int ne1(double, double);
+# extern int ne2(double, double);
+# extern int ne3(double, double);
+# int main(void) {
+# /* LessThan Equal GreaterThan Unordered */
+# return !lt(0, 1) + lt(0, 0) + lt(1, 0) + lt(NAN, NAN)
+# + !le(0, 1) + !le(0, 0) + le(1, 0) + le(NAN, NAN)
+# + gt(0, 1) + gt(0, 0) + !gt(1, 0) + gt(NAN, NAN)
+# + ge(0, 1) + !ge(0, 0) + !ge(1, 0) + ge(NAN, NAN)
+# + eq1(0, 1) + !eq1(0, 0) + eq1(1, 0) /*+ eq1(NAN, NAN)*/
+# + eq2(0, 1) + !eq2(0, 0) + eq2(1, 0) /*+ eq2(NAN, NAN)*/
+# + eq3(0, 1) + !eq3(0, 0) + eq3(1, 0) /*+ eq3(NAN, NAN)*/
+# + !ne1(0, 1) + ne1(0, 0) + !ne1(1, 0) /*+ !ne1(NAN, NAN)*/
+# + !ne2(0, 1) + ne2(0, 0) + !ne2(1, 0) /*+ !ne2(NAN, NAN)*/
+# + !ne3(0, 1) + ne3(0, 0) + !ne3(1, 0) /*+ !ne3(NAN, NAN)*/
+# ;
+# }
+# <<<