[AArch64] Lower 2-CC FCCMPs (one/ueq) using AND'ed CCs.
The current behavior is incorrect, as the two CCs returned by
changeFPCCToAArch64CC, intended to be OR'ed, are instead used
in an AND ccmp chain.
Consider:
define i32 @t(float %a, float %b, float %c, float %d, i32 %e, i32 %f) {
%cc1 = fcmp one float %a, %b
%cc2 = fcmp olt float %c, %d
%and = and i1 %cc1, %cc2
%r = select i1 %and, i32 %e, i32 %f
ret i32 %r
}
Assuming (%a < %b) and (%c < %d); we used to do:
fcmp s0, s1 # nzcv <- 1000
orr w8, wzr, #0x1 # w8 <- 1
csel w9, w8, wzr, mi # w9 <- 1
csel w8, w8, w9, gt # w8 <- 1
fcmp s2, s3 # nzcv <- 1000
cset w9, mi # w9 <- 1
tst w8, w9 # (w8 & w9) == 1, so: nzcv <- 0000
csel w0, w0, w1, ne # w0 <- w0
We now do:
fcmp s2, s3 # nzcv <- 1000
fccmp s0, s1, #0, mi # mi, so: nzcv <- 1000
fccmp s0, s1, #8, le # !le, so: nzcv <- 1000
csel w0, w0, w1, pl # !pl, so: w0 <- w1
In other words, we transformed:
(c < d) && ((a < b) || (a > b))
into:
(c < d) && (a u>= b) && (a u<= b)
whereas, per De Morgan's, we wanted:
(c < d) && !((a u>= b) && (a u<= b))
Note that this problem doesn't occur in the test-suite.
changeFPCCToAArch64CC produces disjunct CCs; here, one -> mi/gt.
We can't represent that in the fccmp chain; it can't express
arbitrary OR sequences, as one comment explains:
In general we can create code for arbitrary "... (and (and A B) C)"
sequences. We can also implement some "or" expressions, because
"(or A B)" is equivalent to "not (and (not A) (not B))" and we can
implement some negation operations. [...] However there is no way
to negate the result of a partial sequence.
Instead, introduce changeFPCCToANDAArch64CC, which produces the
conjunct cond codes:
- (a one b)
== ((a olt b) || (a ogt b))
== ((a ord b) && (a une b))
- (a ueq b)
== ((a uno b) || (a oeq b))
== ((a ule b) && (a uge b))
Note that, at first, one might think that, when PushNegate is true,
we should use the disjunct CCs, in effect doing:
(a || b)
= !(!a && !(b))
= !(!a && !(b1 || b2)) <- changeFPCCToAArch64CC(b, b1, b2)
= !(!a && !b1 && !b2)
However, we can take advantage of the fact that the CC is already
negated, which lets us avoid special-casing PushNegate and doing
the simpler to reason about:
(a || b)
= !(!a && (!b))
= !(!a && (b1 && b2)) <- changeFPCCToANDAArch64CC(!b, b1, b2)
= !(!a && b1 && b2)
This makes both emitConditionalCompare cases behave identically,
and produces correct ccmp sequences for the 2-CC fcmps.
llvm-svn: 258533
This commit is contained in:
parent
6345b9ecfa
commit
99209b90a4
|
|
@ -1135,6 +1135,35 @@ static void changeFPCCToAArch64CC(ISD::CondCode CC,
|
|||
}
|
||||
}
|
||||
|
||||
/// Convert a DAG fp condition code to an AArch64 CC.
|
||||
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
|
||||
/// should be AND'ed instead of OR'ed.
|
||||
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
|
||||
AArch64CC::CondCode &CondCode,
|
||||
AArch64CC::CondCode &CondCode2) {
|
||||
CondCode2 = AArch64CC::AL;
|
||||
switch (CC) {
|
||||
default:
|
||||
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
|
||||
assert(CondCode2 == AArch64CC::AL);
|
||||
break;
|
||||
case ISD::SETONE:
|
||||
// (a one b)
|
||||
// == ((a olt b) || (a ogt b))
|
||||
// == ((a ord b) && (a une b))
|
||||
CondCode = AArch64CC::VC;
|
||||
CondCode2 = AArch64CC::NE;
|
||||
break;
|
||||
case ISD::SETUEQ:
|
||||
// (a ueq b)
|
||||
// == ((a uno b) || (a oeq b))
|
||||
// == ((a ule b) && (a uge b))
|
||||
CondCode = AArch64CC::PL;
|
||||
CondCode2 = AArch64CC::LE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
|
||||
/// CC usable with the vector instructions. Fewer operations are available
|
||||
/// without a real NZCV register, so we have to use less efficient combinations
|
||||
|
|
@ -1344,24 +1373,23 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
|
|||
} else {
|
||||
assert(LHS.getValueType().isFloatingPoint());
|
||||
AArch64CC::CondCode ExtraCC;
|
||||
changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
|
||||
// Surpisingly some floating point conditions can't be tested with a
|
||||
// single condition code. Construct an additional comparison in this case.
|
||||
// See comment below on how we deal with OR conditions.
|
||||
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
|
||||
// Some floating point conditions can't be tested with a single condition
|
||||
// code. Construct an additional comparison in this case.
|
||||
if (ExtraCC != AArch64CC::AL) {
|
||||
SDValue ExtraCmp;
|
||||
if (!CCOp.getNode())
|
||||
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
|
||||
else {
|
||||
SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
|
||||
// Note that we want the inverse of ExtraCC, so NZCV is not inversed.
|
||||
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
|
||||
AArch64CC::CondCode InvExtraCC =
|
||||
AArch64CC::getInvertedCondCode(ExtraCC);
|
||||
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvExtraCC);
|
||||
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
|
||||
NZCV, DL, DAG);
|
||||
}
|
||||
CCOp = ExtraCmp;
|
||||
Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
|
||||
OutCC = AArch64CC::getInvertedCondCode(OutCC);
|
||||
Predicate = ExtraCC;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -317,24 +317,6 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
|
|||
ret i64 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: select_complicated
|
||||
define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) {
|
||||
; CHECK: ldr [[REG:d[0-9]+]],
|
||||
; CHECK: fcmp d0, d2
|
||||
; CHECK-NEXT: fmov d2, #13.00000000
|
||||
; CHECK-NEXT: fccmp d1, d2, #4, ne
|
||||
; CHECK-NEXT: fccmp d0, d1, #1, ne
|
||||
; CHECK-NEXT: fccmp d0, d1, #4, vc
|
||||
; CEHCK-NEXT: csel w0, w0, w1, eq
|
||||
%1 = fcmp one double %v1, %v2
|
||||
%2 = fcmp oeq double %v2, 13.0
|
||||
%3 = fcmp oeq double %v1, 42.0
|
||||
%or0 = or i1 %2, %3
|
||||
%or1 = or i1 %1, %or0
|
||||
%sel = select i1 %or1, i16 %a, i16 %b
|
||||
ret i16 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: gccbug
|
||||
define i64 @gccbug(i64 %x0, i64 %x1) {
|
||||
; CHECK: cmp x0, #2
|
||||
|
|
@ -443,3 +425,163 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
|
|||
store volatile i32 %ext, i32* @g
|
||||
ret i64 %sel
|
||||
}
|
||||
|
||||
; Test the IR CCs that expand to two cond codes.
|
||||
|
||||
; CHECK-LABEL: _select_and_olt_one:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d2, d3, #4, mi
|
||||
; CHECK-NEXT: fccmp d2, d3, #1, ne
|
||||
; CHECK-NEXT: csel w0, w0, w1, vc
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt double %v0, %v1
|
||||
%c1 = fcmp one double %v2, %v3
|
||||
%cr = and i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_and_one_olt:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d0, d1, #1, ne
|
||||
; CHECK-NEXT: fccmp d2, d3, #0, vc
|
||||
; CHECK-NEXT: csel w0, w0, w1, mi
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp one double %v0, %v1
|
||||
%c1 = fcmp olt double %v2, %v3
|
||||
%cr = and i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_and_olt_ueq:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d2, d3, #0, mi
|
||||
; CHECK-NEXT: fccmp d2, d3, #8, le
|
||||
; CHECK-NEXT: csel w0, w0, w1, pl
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt double %v0, %v1
|
||||
%c1 = fcmp ueq double %v2, %v3
|
||||
%cr = and i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_and_ueq_olt:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d0, d1, #8, le
|
||||
; CHECK-NEXT: fccmp d2, d3, #0, pl
|
||||
; CHECK-NEXT: csel w0, w0, w1, mi
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp ueq double %v0, %v1
|
||||
%c1 = fcmp olt double %v2, %v3
|
||||
%cr = and i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_or_olt_one:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d2, d3, #0, pl
|
||||
; CHECK-NEXT: fccmp d2, d3, #8, le
|
||||
; CHECK-NEXT: csel w0, w0, w1, mi
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt double %v0, %v1
|
||||
%c1 = fcmp one double %v2, %v3
|
||||
%cr = or i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_or_one_olt:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d0, d1, #1, ne
|
||||
; CHECK-NEXT: fccmp d2, d3, #8, vs
|
||||
; CHECK-NEXT: csel w0, w0, w1, mi
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp one double %v0, %v1
|
||||
%c1 = fcmp olt double %v2, %v3
|
||||
%cr = or i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_or_olt_ueq:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d2, d3, #4, pl
|
||||
; CHECK-NEXT: fccmp d2, d3, #1, ne
|
||||
; CHECK-NEXT: csel w0, w0, w1, vs
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt double %v0, %v1
|
||||
%c1 = fcmp ueq double %v2, %v3
|
||||
%cr = or i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_or_ueq_olt:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d0, d1, #8, le
|
||||
; CHECK-NEXT: fccmp d2, d3, #8, mi
|
||||
; CHECK-NEXT: csel w0, w0, w1, mi
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp ueq double %v0, %v1
|
||||
%c1 = fcmp olt double %v2, %v3
|
||||
%cr = or i1 %c1, %c0
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_or_olt_ogt_ueq:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d2, d3, #0, pl
|
||||
; CHECK-NEXT: fccmp d4, d5, #4, le
|
||||
; CHECK-NEXT: fccmp d4, d5, #1, ne
|
||||
; CHECK-NEXT: csel w0, w0, w1, vs
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt double %v0, %v1
|
||||
%c1 = fcmp ogt double %v2, %v3
|
||||
%c2 = fcmp ueq double %v4, %v5
|
||||
%c3 = or i1 %c1, %c0
|
||||
%cr = or i1 %c2, %c3
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _select_or_olt_ueq_ogt:
|
||||
; CHECK-LABEL: ; BB#0:
|
||||
; CHECK-NEXT: fcmp d0, d1
|
||||
; CHECK-NEXT: fccmp d2, d3, #4, pl
|
||||
; CHECK-NEXT: fccmp d2, d3, #1, ne
|
||||
; CHECK-NEXT: fccmp d4, d5, #0, vc
|
||||
; CHECK-NEXT: csel w0, w0, w1, gt
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
|
||||
%c0 = fcmp olt double %v0, %v1
|
||||
%c1 = fcmp ueq double %v2, %v3
|
||||
%c2 = fcmp ogt double %v4, %v5
|
||||
%c3 = or i1 %c1, %c0
|
||||
%cr = or i1 %c2, %c3
|
||||
%sel = select i1 %cr, i32 %a, i32 %b
|
||||
ret i32 %sel
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
|
|||
Loading…
Reference in New Issue