[ISel] Expand saddsat and ssubsat via asr and xor
This changes the lowering of saddsat and ssubsat so that instead of using: r,o = saddo x, y c = setcc r < 0 s = c ? INTMAX : INTMIN ret o ? s : r into using asr and xor to materialize the INTMAX/INTMIN constants: r,o = saddo x, y s = ashr r, BW-1 x = xor s, INTMIN ret o ? x : r https://alive2.llvm.org/ce/z/TYufgD This seems to reduce the instruction count in most testcases across most architectures. X86 has some custom lowering added to compensate for cases where it can increase instruction count. Differential Revision: https://reviews.llvm.org/D105853
This commit is contained in:
parent
a9cc662722
commit
d10f23a25d
|
|
@ -816,7 +816,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
|
||||||
|
|
||||||
// Shift cannot use a min/max expansion, we can't detect overflow if all of
|
// Shift cannot use a min/max expansion, we can't detect overflow if all of
|
||||||
// the bits have been shifted out.
|
// the bits have been shifted out.
|
||||||
if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
|
if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
|
||||||
unsigned ShiftOp;
|
unsigned ShiftOp;
|
||||||
switch (Opcode) {
|
switch (Opcode) {
|
||||||
case ISD::SADDSAT:
|
case ISD::SADDSAT:
|
||||||
|
|
|
||||||
|
|
@ -8103,14 +8103,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
|
||||||
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
|
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
|
||||||
}
|
}
|
||||||
|
|
||||||
// SatMax -> Overflow && SumDiff < 0
|
// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
|
||||||
// SatMin -> Overflow && SumDiff >= 0
|
|
||||||
APInt MinVal = APInt::getSignedMinValue(BitWidth);
|
APInt MinVal = APInt::getSignedMinValue(BitWidth);
|
||||||
APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
|
|
||||||
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
|
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
|
||||||
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
|
SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
|
||||||
SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
|
DAG.getConstant(BitWidth - 1, dl, VT));
|
||||||
Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
|
Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
|
||||||
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
|
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -8421,7 +8419,7 @@ void TargetLowering::expandSADDSUBO(
|
||||||
|
|
||||||
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
|
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
|
||||||
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
|
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
|
||||||
if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
|
if (isOperationLegal(OpcSat, LHS.getValueType())) {
|
||||||
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
|
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
|
||||||
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
|
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
|
||||||
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
|
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
|
||||||
|
|
|
||||||
|
|
@ -207,6 +207,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||||
setOperationAction(ISD::ABS , MVT::i64 , Custom);
|
setOperationAction(ISD::ABS , MVT::i64 , Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Signed saturation subtraction.
|
||||||
|
setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
|
||||||
|
setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
|
||||||
|
setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
|
||||||
|
if (Subtarget.is64Bit())
|
||||||
|
setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
|
||||||
|
|
||||||
// Funnel shifts.
|
// Funnel shifts.
|
||||||
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
|
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
|
||||||
// For slow shld targets we only lower for code size.
|
// For slow shld targets we only lower for code size.
|
||||||
|
|
@ -1142,6 +1149,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||||
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
|
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
|
||||||
|
|
||||||
setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
|
setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
|
||||||
|
setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
|
||||||
|
setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
|
||||||
|
|
||||||
// FIXME: Do we need to handle scalar-to-vector here?
|
// FIXME: Do we need to handle scalar-to-vector here?
|
||||||
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
||||||
|
|
@ -27958,6 +27967,25 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
|
||||||
return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
|
return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
|
||||||
|
(!VT.isVector() || VT == MVT::v2i64)) {
|
||||||
|
unsigned BitWidth = VT.getScalarSizeInBits();
|
||||||
|
APInt MinVal = APInt::getSignedMinValue(BitWidth);
|
||||||
|
APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
|
||||||
|
SDValue Zero = DAG.getConstant(0, DL, VT);
|
||||||
|
SDValue Result =
|
||||||
|
DAG.getNode(Opcode == ISD::SADDSAT ? ISD::SADDO : ISD::SSUBO, DL,
|
||||||
|
DAG.getVTList(VT, SetCCResultType), X, Y);
|
||||||
|
SDValue SumDiff = Result.getValue(0);
|
||||||
|
SDValue Overflow = Result.getValue(1);
|
||||||
|
SDValue SatMin = DAG.getConstant(MinVal, DL, VT);
|
||||||
|
SDValue SatMax = DAG.getConstant(MaxVal, DL, VT);
|
||||||
|
SDValue SumNeg =
|
||||||
|
DAG.getSetCC(DL, SetCCResultType, SumDiff, Zero, ISD::SETLT);
|
||||||
|
Result = DAG.getSelect(DL, VT, SumNeg, SatMax, SatMin);
|
||||||
|
return DAG.getSelect(DL, VT, Overflow, Result, SumDiff);
|
||||||
|
}
|
||||||
|
|
||||||
// Use default expansion.
|
// Use default expansion.
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,9 @@ define i32 @func(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-LABEL: func:
|
; CHECK-LABEL: func:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: adds w8, w0, w1
|
; CHECK-NEXT: adds w8, w0, w1
|
||||||
; CHECK-NEXT: mov w9, #2147483647
|
; CHECK-NEXT: asr w9, w8, #31
|
||||||
; CHECK-NEXT: cmp w8, #0
|
; CHECK-NEXT: eor w9, w9, #0x80000000
|
||||||
; CHECK-NEXT: cinv w8, w9, ge
|
; CHECK-NEXT: csel w0, w9, w8, vs
|
||||||
; CHECK-NEXT: adds w9, w0, w1
|
|
||||||
; CHECK-NEXT: csel w0, w8, w9, vs
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %y);
|
%tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %y);
|
||||||
ret i32 %tmp;
|
ret i32 %tmp;
|
||||||
|
|
@ -26,11 +24,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-LABEL: func2:
|
; CHECK-LABEL: func2:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: adds x8, x0, x1
|
; CHECK-NEXT: adds x8, x0, x1
|
||||||
; CHECK-NEXT: mov x9, #9223372036854775807
|
; CHECK-NEXT: asr x9, x8, #63
|
||||||
; CHECK-NEXT: cmp x8, #0
|
; CHECK-NEXT: eor x9, x9, #0x8000000000000000
|
||||||
; CHECK-NEXT: cinv x8, x9, ge
|
; CHECK-NEXT: csel x0, x9, x8, vs
|
||||||
; CHECK-NEXT: adds x9, x0, x1
|
|
||||||
; CHECK-NEXT: csel x0, x8, x9, vs
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y);
|
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y);
|
||||||
ret i64 %tmp;
|
ret i64 %tmp;
|
||||||
|
|
|
||||||
|
|
@ -11,11 +11,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; CHECK-LABEL: func32:
|
; CHECK-LABEL: func32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mul w8, w1, w2
|
; CHECK-NEXT: mul w8, w1, w2
|
||||||
; CHECK-NEXT: adds w10, w0, w8
|
|
||||||
; CHECK-NEXT: mov w9, #2147483647
|
|
||||||
; CHECK-NEXT: cmp w10, #0
|
|
||||||
; CHECK-NEXT: cinv w9, w9, ge
|
|
||||||
; CHECK-NEXT: adds w8, w0, w8
|
; CHECK-NEXT: adds w8, w0, w8
|
||||||
|
; CHECK-NEXT: asr w9, w8, #31
|
||||||
|
; CHECK-NEXT: eor w9, w9, #0x80000000
|
||||||
; CHECK-NEXT: csel w0, w9, w8, vs
|
; CHECK-NEXT: csel w0, w9, w8, vs
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%a = mul i32 %y, %z
|
%a = mul i32 %y, %z
|
||||||
|
|
@ -27,11 +25,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-LABEL: func64:
|
; CHECK-LABEL: func64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: adds x8, x0, x2
|
; CHECK-NEXT: adds x8, x0, x2
|
||||||
; CHECK-NEXT: mov x9, #9223372036854775807
|
; CHECK-NEXT: asr x9, x8, #63
|
||||||
; CHECK-NEXT: cmp x8, #0
|
; CHECK-NEXT: eor x9, x9, #0x8000000000000000
|
||||||
; CHECK-NEXT: cinv x8, x9, ge
|
; CHECK-NEXT: csel x0, x9, x8, vs
|
||||||
; CHECK-NEXT: adds x9, x0, x2
|
|
||||||
; CHECK-NEXT: csel x0, x8, x9, vs
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%a = mul i64 %y, %z
|
%a = mul i64 %y, %z
|
||||||
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z)
|
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z)
|
||||||
|
|
|
||||||
|
|
@ -351,26 +351,23 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||||
; CHECK-LABEL: v2i128:
|
; CHECK-LABEL: v2i128:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: adds x8, x2, x6
|
; CHECK-NEXT: adds x8, x2, x6
|
||||||
; CHECK-NEXT: adcs x12, x3, x7
|
; CHECK-NEXT: adcs x11, x3, x7
|
||||||
; CHECK-NEXT: mov x9, #9223372036854775807
|
; CHECK-NEXT: eor x9, x3, x7
|
||||||
; CHECK-NEXT: eor x10, x3, x7
|
; CHECK-NEXT: eor x12, x3, x11
|
||||||
; CHECK-NEXT: cmp x12, #0
|
; CHECK-NEXT: bics xzr, x12, x9
|
||||||
; CHECK-NEXT: eor x13, x3, x12
|
; CHECK-NEXT: asr x9, x11, #63
|
||||||
; CHECK-NEXT: cinv x14, x9, ge
|
; CHECK-NEXT: eor x12, x9, #0x8000000000000000
|
||||||
; CHECK-NEXT: bics xzr, x13, x10
|
; CHECK-NEXT: csel x2, x9, x8, lt
|
||||||
; CHECK-NEXT: asr x10, x12, #63
|
; CHECK-NEXT: csel x3, x12, x11, lt
|
||||||
; CHECK-NEXT: csel x2, x10, x8, lt
|
|
||||||
; CHECK-NEXT: csel x3, x14, x12, lt
|
|
||||||
; CHECK-NEXT: adds x8, x0, x4
|
; CHECK-NEXT: adds x8, x0, x4
|
||||||
; CHECK-NEXT: adcs x10, x1, x5
|
; CHECK-NEXT: adcs x9, x1, x5
|
||||||
; CHECK-NEXT: eor x11, x1, x5
|
; CHECK-NEXT: eor x10, x1, x5
|
||||||
; CHECK-NEXT: cmp x10, #0
|
; CHECK-NEXT: eor x12, x1, x9
|
||||||
; CHECK-NEXT: eor x12, x1, x10
|
; CHECK-NEXT: asr x11, x9, #63
|
||||||
; CHECK-NEXT: cinv x9, x9, ge
|
; CHECK-NEXT: bics xzr, x12, x10
|
||||||
; CHECK-NEXT: bics xzr, x12, x11
|
; CHECK-NEXT: eor x13, x11, #0x8000000000000000
|
||||||
; CHECK-NEXT: asr x11, x10, #63
|
|
||||||
; CHECK-NEXT: csel x8, x11, x8, lt
|
; CHECK-NEXT: csel x8, x11, x8, lt
|
||||||
; CHECK-NEXT: csel x1, x9, x10, lt
|
; CHECK-NEXT: csel x1, x13, x9, lt
|
||||||
; CHECK-NEXT: fmov d0, x8
|
; CHECK-NEXT: fmov d0, x8
|
||||||
; CHECK-NEXT: mov v0.d[1], x1
|
; CHECK-NEXT: mov v0.d[1], x1
|
||||||
; CHECK-NEXT: fmov x0, d0
|
; CHECK-NEXT: fmov x0, d0
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,9 @@ define i32 @func(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-LABEL: func:
|
; CHECK-LABEL: func:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: subs w8, w0, w1
|
; CHECK-NEXT: subs w8, w0, w1
|
||||||
; CHECK-NEXT: mov w9, #2147483647
|
; CHECK-NEXT: asr w9, w8, #31
|
||||||
; CHECK-NEXT: cmp w8, #0
|
; CHECK-NEXT: eor w9, w9, #0x80000000
|
||||||
; CHECK-NEXT: cinv w8, w9, ge
|
; CHECK-NEXT: csel w0, w9, w8, vs
|
||||||
; CHECK-NEXT: subs w9, w0, w1
|
|
||||||
; CHECK-NEXT: csel w0, w8, w9, vs
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %y);
|
%tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %y);
|
||||||
ret i32 %tmp;
|
ret i32 %tmp;
|
||||||
|
|
@ -26,11 +24,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-LABEL: func2:
|
; CHECK-LABEL: func2:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: subs x8, x0, x1
|
; CHECK-NEXT: subs x8, x0, x1
|
||||||
; CHECK-NEXT: mov x9, #9223372036854775807
|
; CHECK-NEXT: asr x9, x8, #63
|
||||||
; CHECK-NEXT: cmp x8, #0
|
; CHECK-NEXT: eor x9, x9, #0x8000000000000000
|
||||||
; CHECK-NEXT: cinv x8, x9, ge
|
; CHECK-NEXT: csel x0, x9, x8, vs
|
||||||
; CHECK-NEXT: subs x9, x0, x1
|
|
||||||
; CHECK-NEXT: csel x0, x8, x9, vs
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y);
|
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y);
|
||||||
ret i64 %tmp;
|
ret i64 %tmp;
|
||||||
|
|
|
||||||
|
|
@ -11,11 +11,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; CHECK-LABEL: func32:
|
; CHECK-LABEL: func32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mul w8, w1, w2
|
; CHECK-NEXT: mul w8, w1, w2
|
||||||
; CHECK-NEXT: subs w10, w0, w8
|
|
||||||
; CHECK-NEXT: mov w9, #2147483647
|
|
||||||
; CHECK-NEXT: cmp w10, #0
|
|
||||||
; CHECK-NEXT: cinv w9, w9, ge
|
|
||||||
; CHECK-NEXT: subs w8, w0, w8
|
; CHECK-NEXT: subs w8, w0, w8
|
||||||
|
; CHECK-NEXT: asr w9, w8, #31
|
||||||
|
; CHECK-NEXT: eor w9, w9, #0x80000000
|
||||||
; CHECK-NEXT: csel w0, w9, w8, vs
|
; CHECK-NEXT: csel w0, w9, w8, vs
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%a = mul i32 %y, %z
|
%a = mul i32 %y, %z
|
||||||
|
|
@ -27,11 +25,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-LABEL: func64:
|
; CHECK-LABEL: func64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: subs x8, x0, x2
|
; CHECK-NEXT: subs x8, x0, x2
|
||||||
; CHECK-NEXT: mov x9, #9223372036854775807
|
; CHECK-NEXT: asr x9, x8, #63
|
||||||
; CHECK-NEXT: cmp x8, #0
|
; CHECK-NEXT: eor x9, x9, #0x8000000000000000
|
||||||
; CHECK-NEXT: cinv x8, x9, ge
|
; CHECK-NEXT: csel x0, x9, x8, vs
|
||||||
; CHECK-NEXT: subs x9, x0, x2
|
|
||||||
; CHECK-NEXT: csel x0, x8, x9, vs
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%a = mul i64 %y, %z
|
%a = mul i64 %y, %z
|
||||||
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %z)
|
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %z)
|
||||||
|
|
|
||||||
|
|
@ -354,26 +354,23 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||||
; CHECK-LABEL: v2i128:
|
; CHECK-LABEL: v2i128:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: subs x8, x2, x6
|
; CHECK-NEXT: subs x8, x2, x6
|
||||||
; CHECK-NEXT: sbcs x12, x3, x7
|
; CHECK-NEXT: sbcs x11, x3, x7
|
||||||
; CHECK-NEXT: mov x9, #9223372036854775807
|
; CHECK-NEXT: eor x9, x3, x7
|
||||||
; CHECK-NEXT: eor x10, x3, x7
|
; CHECK-NEXT: eor x12, x3, x11
|
||||||
; CHECK-NEXT: cmp x12, #0
|
; CHECK-NEXT: tst x9, x12
|
||||||
; CHECK-NEXT: eor x13, x3, x12
|
; CHECK-NEXT: asr x9, x11, #63
|
||||||
; CHECK-NEXT: cinv x14, x9, ge
|
; CHECK-NEXT: eor x12, x9, #0x8000000000000000
|
||||||
; CHECK-NEXT: tst x10, x13
|
; CHECK-NEXT: csel x2, x9, x8, lt
|
||||||
; CHECK-NEXT: asr x10, x12, #63
|
; CHECK-NEXT: csel x3, x12, x11, lt
|
||||||
; CHECK-NEXT: csel x2, x10, x8, lt
|
|
||||||
; CHECK-NEXT: csel x3, x14, x12, lt
|
|
||||||
; CHECK-NEXT: subs x8, x0, x4
|
; CHECK-NEXT: subs x8, x0, x4
|
||||||
; CHECK-NEXT: sbcs x10, x1, x5
|
; CHECK-NEXT: sbcs x9, x1, x5
|
||||||
; CHECK-NEXT: eor x11, x1, x5
|
; CHECK-NEXT: eor x10, x1, x5
|
||||||
; CHECK-NEXT: cmp x10, #0
|
; CHECK-NEXT: eor x12, x1, x9
|
||||||
; CHECK-NEXT: eor x12, x1, x10
|
; CHECK-NEXT: asr x11, x9, #63
|
||||||
; CHECK-NEXT: cinv x9, x9, ge
|
; CHECK-NEXT: tst x10, x12
|
||||||
; CHECK-NEXT: tst x11, x12
|
; CHECK-NEXT: eor x13, x11, #0x8000000000000000
|
||||||
; CHECK-NEXT: asr x11, x10, #63
|
|
||||||
; CHECK-NEXT: csel x8, x11, x8, lt
|
; CHECK-NEXT: csel x8, x11, x8, lt
|
||||||
; CHECK-NEXT: csel x1, x9, x10, lt
|
; CHECK-NEXT: csel x1, x13, x9, lt
|
||||||
; CHECK-NEXT: fmov d0, x8
|
; CHECK-NEXT: fmov d0, x8
|
||||||
; CHECK-NEXT: mov v0.d[1], x1
|
; CHECK-NEXT: mov v0.d[1], x1
|
||||||
; CHECK-NEXT: fmov x0, d0
|
; CHECK-NEXT: fmov x0, d0
|
||||||
|
|
|
||||||
|
|
@ -62,10 +62,8 @@ define i16 @v_saddsat_i16(i16 %lhs, i16 %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1
|
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1
|
||||||
; GFX8-NEXT: v_add_u16_e32 v1, v0, v1
|
; GFX8-NEXT: v_add_u16_e32 v1, v0, v1
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff8000
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fff
|
; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v1
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -93,10 +91,8 @@ define i32 @v_saddsat_i32(i32 %lhs, i32 %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
|
; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
|
||||||
; GFX6-NEXT: v_add_i32_e64 v1, s[4:5], v0, v1
|
; GFX6-NEXT: v_add_i32_e64 v1, s[4:5], v0, v1
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v0, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v1
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -107,10 +103,8 @@ define i32 @v_saddsat_i32(i32 %lhs, i32 %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
|
; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1
|
||||||
; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v0, v1
|
; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v0, v1
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v0, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v1
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -159,19 +153,18 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
|
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
||||||
; GFX8-NEXT: v_add_u16_e32 v4, v3, v2
|
; GFX8-NEXT: v_add_u16_e32 v4, v3, v2
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v5, 0xffff8000
|
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v6, 0x7fff
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v4
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v7, v5, v6, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2
|
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4
|
||||||
|
; GFX8-NEXT: s_movk_i32 s6, 0x8000
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v7, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1
|
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1
|
||||||
; GFX8-NEXT: v_add_u16_e32 v1, v0, v1
|
; GFX8-NEXT: v_add_u16_e32 v1, v0, v1
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v1
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
|
|
@ -228,26 +221,25 @@ define <3 x i16> @v_saddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
||||||
; GFX8-NEXT: v_add_u16_e32 v6, v5, v4
|
; GFX8-NEXT: v_add_u16_e32 v6, v5, v4
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v7, 0xffff8000
|
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v8, 0x7fff
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v6
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v9, v7, v8, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4
|
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
|
||||||
|
; GFX8-NEXT: s_movk_i32 s6, 0x8000
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v9, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3
|
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3
|
||||||
; GFX8-NEXT: v_add_u16_e32 v3, v1, v3
|
; GFX8-NEXT: v_add_u16_e32 v3, v1, v3
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v3
|
; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2
|
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2
|
||||||
; GFX8-NEXT: v_add_u16_e32 v2, v0, v2
|
; GFX8-NEXT: v_add_u16_e32 v2, v0, v2
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
|
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
|
||||||
|
|
@ -313,19 +305,18 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
||||||
; GFX8-NEXT: v_add_u16_e32 v6, v5, v4
|
; GFX8-NEXT: v_add_u16_e32 v6, v5, v4
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v7, 0xffff8000
|
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v8, 0x7fff
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v6
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v9, v7, v8, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4
|
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
|
||||||
|
; GFX8-NEXT: s_movk_i32 s6, 0x8000
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v9, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2
|
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2
|
||||||
; GFX8-NEXT: v_add_u16_e32 v2, v0, v2
|
; GFX8-NEXT: v_add_u16_e32 v2, v0, v2
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||||
|
|
@ -333,17 +324,17 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3
|
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
|
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
|
||||||
; GFX8-NEXT: v_add_u16_e32 v5, v4, v2
|
; GFX8-NEXT: v_add_u16_e32 v5, v4, v2
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v5
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2
|
; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3
|
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3
|
||||||
; GFX8-NEXT: v_add_u16_e32 v3, v1, v3
|
; GFX8-NEXT: v_add_u16_e32 v3, v1, v3
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v3
|
; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||||
|
|
@ -376,17 +367,16 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
|
; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
|
||||||
; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v0, v2
|
; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v0, v2
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v4, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v5, -2
|
; GFX6-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v4, v5, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v1, v3
|
; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v1, v3
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v4, v5, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -397,17 +387,16 @@ define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
|
; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2
|
||||||
; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v0, v2
|
; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v0, v2
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v4, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v5, -2
|
; GFX8-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v5, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v1, v3
|
; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v1, v3
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v5, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -438,13 +427,10 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc
|
; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc
|
||||||
; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
||||||
; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
|
; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v1, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
|
||||||
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
|
@ -455,13 +441,10 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc
|
; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc
|
||||||
; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
||||||
; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
|
; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v1, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
|
||||||
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
|
@ -472,13 +455,10 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
|
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc
|
||||||
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
||||||
; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
|
; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
|
||||||
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
|
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5
|
||||||
; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||||
; GFX9-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
|
||||||
; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
|
@ -487,15 +467,13 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
|
; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2
|
||||||
; GFX10-NEXT: v_bfrev_b32_e32 v6, -2
|
|
||||||
; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
|
|
||||||
; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3]
|
; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3]
|
||||||
; GFX10-NEXT: v_cmp_gt_i64_e64 s5, 0, v[4:5]
|
; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
|
||||||
|
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
|
||||||
; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
|
; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
|
||||||
; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
|
||||||
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x80000000, v6, s5
|
|
||||||
; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo
|
; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo
|
||||||
; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo
|
||||||
; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
|
; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs)
|
%result = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs)
|
||||||
|
|
|
||||||
|
|
@ -62,10 +62,8 @@ define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
|
; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff8000
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fff
|
; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v1
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -93,10 +91,8 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v1, s[4:5], v0, v1
|
; GFX6-NEXT: v_sub_i32_e64 v1, s[4:5], v0, v1
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v0, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v1
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -107,10 +103,8 @@ define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v0, v1
|
; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v0, v1
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v0, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v1
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -159,19 +153,18 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
|
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v4, v3, v2
|
; GFX8-NEXT: v_sub_u16_e32 v4, v3, v2
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v5, 0xffff8000
|
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v6, 0x7fff
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v4
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v7, v5, v6, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4
|
||||||
|
; GFX8-NEXT: s_movk_i32 s6, 0x8000
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v7, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
|
; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v1
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
|
||||||
|
|
@ -229,26 +222,25 @@ define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
|
; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v7, 0xffff8000
|
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v8, 0x7fff
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v6
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v9, v7, v8, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
|
||||||
|
; GFX8-NEXT: s_movk_i32 s6, 0x8000
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v9, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
|
; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v3
|
; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
|
; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
|
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
|
||||||
|
|
@ -314,19 +306,18 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
|
; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v7, 0xffff8000
|
|
||||||
; GFX8-NEXT: v_mov_b32_e32 v8, 0x7fff
|
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v6
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v9, v7, v8, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6
|
||||||
|
; GFX8-NEXT: s_movk_i32 s6, 0x8000
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v9, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
|
; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
|
||||||
|
|
@ -334,17 +325,17 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3
|
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3
|
||||||
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
|
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v5, v4, v2
|
; GFX8-NEXT: v_sub_u16_e32 v5, v4, v2
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v5
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2
|
||||||
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4
|
||||||
|
; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
|
; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3
|
||||||
; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
|
; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3
|
||||||
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i16_e64 s[6:7], 0, v3
|
; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, v8, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||||
|
|
@ -377,17 +368,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v2
|
; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v2
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v4, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v5, -2
|
; GFX6-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v4, v5, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v3
|
; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v3
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v4, v5, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -398,17 +388,16 @@ define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v0, v2
|
; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v0, v2
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v4, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v5, -2
|
; GFX8-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v5, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v1, v3
|
; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v1, v3
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v2
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v5, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -438,24 +427,23 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v3
|
; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v3
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v6, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v7, -2
|
; GFX6-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v3
|
; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v4
|
; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v4
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v3
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v3
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
|
; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v3
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v3
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -466,24 +454,23 @@ define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v0, v3
|
; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v0, v3
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v6, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v7, -2
|
; GFX8-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v3
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v1, v4
|
; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v1, v4
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v3
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v6, v7, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v3
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v2, v5
|
; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v2, v5
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v3
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v3
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -515,31 +502,30 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v0, v4
|
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v0, v4
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v8, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v4
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v9, -2
|
; GFX6-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v8, v9, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v5
|
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v5
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v8, v9, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v4
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6
|
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v2, v8, v9, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v4
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v7
|
; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v7
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v3, v8, v9, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v4
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -550,31 +536,30 @@ define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v0, v4
|
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v0, v4
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v8, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v4
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v9, -2
|
; GFX8-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v8, v9, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v1, v5
|
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v1, v5
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v8, v9, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v4
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v2, v6
|
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v2, v6
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v9, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v4
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v7
|
; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v7
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v4
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v3, v8, v9, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v4
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -608,59 +593,59 @@ define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v8
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v8
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v16, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v8
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v17, -2
|
; GFX6-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v16, v17, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v1, v9
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v1, v9
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v16, v17, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v8
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v10
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v10
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v2, v16, v17, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v8
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v3, v16, v17, s[6:7]
|
; GFX6-NEXT: v_bfrev_b32_e32 v16, 1
|
||||||
|
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v8
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v3, v16, v3
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v4, v16, v17, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v8
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v4, v16, v4
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v5, v16, v17, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v8
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v5, v16, v5
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v6, v16, v17, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v8
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v6, v16, v6
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15
|
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v7, v16, v17, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v8
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v7, v16, v7
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -671,59 +656,59 @@ define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v0, v8
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v0, v8
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v16, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v8
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v17, -2
|
; GFX8-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v16, v17, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v1, v9
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v1, v9
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v16, v17, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v8
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v2, v10
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v2, v10
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v2, v16, v17, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v8
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v3, v16, v17, s[6:7]
|
; GFX8-NEXT: v_bfrev_b32_e32 v16, 1
|
||||||
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v8
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v3, v16, v3
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v4, v16, v17, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v8
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v4, v16, v4
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v5, v16, v17, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v8
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v5, v16, v5
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v6, v16, v17, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v8
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v6, v16, v6
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15
|
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v7, v16, v17, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v8
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v7, v16, v7
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -765,115 +750,115 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v0, v16
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v0, v16
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v32, 1
|
; GFX6-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v33, -2
|
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v16
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v0, v32, v33, s[6:7]
|
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v1, v17
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v1, v17
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v16
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v32, v33, s[6:7]
|
; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v2, v18
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v2, v18
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v2, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v3, v32, v33, s[6:7]
|
; GFX6-NEXT: v_bfrev_b32_e32 v17, 1
|
||||||
|
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v3, v17, v3
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v4, v20
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v4, v20
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v4, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v4, v17, v4
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v5, v21
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v5, v21
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v5
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v5
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v5, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v5, v17, v5
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v6, v22
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v6, v22
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v6
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v6
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v6, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v6, v17, v6
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v7, v23
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v7, v23
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v7
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v7
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v7, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v7, v17, v7
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v8, v24
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v8, v24
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v8
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v8
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v8, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v8, v17, v8
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v9, v25
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v9, v25
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v9
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v9
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v9, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v9, v17, v9
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v10, v26
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v10, v26
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v10
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v10
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v10, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v10, v17, v10
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v11, v27
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v11, v27
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v11
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v11
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v11, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v11, v17, v11
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v12, v28
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v12, v28
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v12
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v12
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v12, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v12, v17, v12
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v13, v29
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v13, v29
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v13
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v13
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v13, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v13, v17, v13
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v14, v30
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v14, v30
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v14
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v14
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v14, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v14, v17, v14
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
|
||||||
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v31
|
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v31
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v31
|
|
||||||
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
|
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
|
||||||
; GFX6-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v31
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v15, v32, v33, s[6:7]
|
; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v16
|
||||||
|
; GFX6-NEXT: v_xor_b32_e32 v15, v17, v15
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -884,115 +869,115 @@ define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v0, v16
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v0, v16
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v32, 1
|
; GFX8-NEXT: s_brev_b32 s6, 1
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v33, -2
|
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v16
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v0, v32, v33, s[6:7]
|
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v1, v17
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v1, v17
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v16
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v32, v33, s[6:7]
|
; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v2, v18
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v2, v18
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v2, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v3, v32, v33, s[6:7]
|
; GFX8-NEXT: v_bfrev_b32_e32 v17, 1
|
||||||
|
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v3, v17, v3
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v4, v20
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v4, v20
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v4, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v4, v17, v4
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v5, v21
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v5, v21
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v5
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v5
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v5, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v5, v17, v5
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v6, v22
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v6, v22
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v6
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v6
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v6, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v6, v17, v6
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v7, v23
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v7, v23
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v7
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v7
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v7, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v7, v17, v7
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v8, v24
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v8, v24
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v8
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v8
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v8, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v8, v17, v8
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v9, v25
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v9, v25
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v9
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v9
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v9, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v9, v17, v9
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v10, v26
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v10, v26
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v10
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v10
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v10, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v10, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v10, v17, v10
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v11, v27
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v11, v27
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v11
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v11
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v11, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v11, v17, v11
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v12, v28
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v12, v28
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v12
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v12
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v12, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v12, v17, v12
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v13, v29
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v13, v29
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v13
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v13
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v13, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v13, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v13, v17, v13
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v14, v30
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v14, v30
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v14
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v14
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v14, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v14, v17, v14
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
|
||||||
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v31
|
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v31
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v31
|
|
||||||
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
|
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
|
||||||
; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v16
|
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v31
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v15, v32, v33, s[6:7]
|
; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v16
|
||||||
|
; GFX8-NEXT: v_xor_b32_e32 v15, v17, v15
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
|
@ -1052,13 +1037,10 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
|
; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
|
||||||
; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
||||||
; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
|
; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v1, 1
|
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5
|
||||||
; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
|
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||||
; GFX6-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
|
||||||
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
||||||
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
; GFX6-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
|
@ -1069,13 +1051,10 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
|
; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
|
||||||
; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
||||||
; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
|
; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v1, 1
|
; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5
|
||||||
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
|
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||||
; GFX8-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
|
||||||
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
||||||
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
||||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
|
@ -1086,13 +1065,10 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
|
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
|
||||||
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
|
||||||
; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
|
; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
|
||||||
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
|
; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5
|
||||||
; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc
|
||||||
; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5]
|
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||||
; GFX9-NEXT: v_bfrev_b32_e32 v2, -2
|
; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
|
||||||
; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5]
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
|
||||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
|
|
@ -1101,15 +1077,13 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
|
; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
|
||||||
; GFX10-NEXT: v_bfrev_b32_e32 v6, -2
|
|
||||||
; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
|
|
||||||
; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3]
|
; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3]
|
||||||
; GFX10-NEXT: v_cmp_gt_i64_e64 s5, 0, v[4:5]
|
; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
|
||||||
|
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
|
||||||
; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
|
; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
|
||||||
; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v5
|
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6
|
||||||
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x80000000, v6, s5
|
|
||||||
; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo
|
; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo
|
||||||
; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
|
; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo
|
||||||
; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
|
; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
|
%result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
|
||||||
|
|
|
||||||
|
|
@ -7,32 +7,14 @@
|
||||||
define i32 @qdadd(i32 %x, i32 %y) nounwind {
|
define i32 @qdadd(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-T2NODSP-LABEL: qdadd:
|
; CHECK-T2NODSP-LABEL: qdadd:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: .save {r7, lr}
|
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
|
||||||
; CHECK-T2NODSP-NEXT: push {r7, lr}
|
|
||||||
; CHECK-T2NODSP-NEXT: movs r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi r3, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w lr, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r12, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r3, r12
|
|
||||||
; CHECK-T2NODSP-NEXT: adds r0, r3, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: movmi.w lr, #1
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp.w lr, #0
|
; CHECK-T2NODSP-NEXT: adds r0, r0, r1
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp r0, r3
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r2, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: pop {r7, pc}
|
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: qdadd:
|
; CHECK-T2DSP-LABEL: qdadd:
|
||||||
; CHECK-T2DSP: @ %bb.0:
|
; CHECK-T2DSP: @ %bb.0:
|
||||||
|
|
@ -51,32 +33,14 @@ define i32 @qdadd(i32 %x, i32 %y) nounwind {
|
||||||
define i32 @qdadd_c(i32 %x, i32 %y) nounwind {
|
define i32 @qdadd_c(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-T2NODSP-LABEL: qdadd_c:
|
; CHECK-T2NODSP-LABEL: qdadd_c:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: .save {r7, lr}
|
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
|
||||||
; CHECK-T2NODSP-NEXT: push {r7, lr}
|
|
||||||
; CHECK-T2NODSP-NEXT: movs r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi r3, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w lr, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r12, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r3, r12
|
|
||||||
; CHECK-T2NODSP-NEXT: adds r0, r1, r3
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: movmi.w lr, #1
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp.w lr, #0
|
; CHECK-T2NODSP-NEXT: adds r0, r0, r1
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp r0, r1
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r2, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: pop {r7, pc}
|
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: qdadd_c:
|
; CHECK-T2DSP-LABEL: qdadd_c:
|
||||||
; CHECK-T2DSP: @ %bb.0:
|
; CHECK-T2DSP: @ %bb.0:
|
||||||
|
|
@ -95,32 +59,14 @@ define i32 @qdadd_c(i32 %x, i32 %y) nounwind {
|
||||||
define i32 @qdsub(i32 %x, i32 %y) nounwind {
|
define i32 @qdsub(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-T2NODSP-LABEL: qdsub:
|
; CHECK-T2NODSP-LABEL: qdsub:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: .save {r7, lr}
|
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
|
||||||
; CHECK-T2NODSP-NEXT: push {r7, lr}
|
|
||||||
; CHECK-T2NODSP-NEXT: movs r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi r3, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w lr, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r12, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r3, r12
|
|
||||||
; CHECK-T2NODSP-NEXT: subs r0, r1, r3
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: movmi.w lr, #1
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp.w lr, #0
|
; CHECK-T2NODSP-NEXT: subs r0, r1, r0
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp r1, r3
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r2, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: pop {r7, pc}
|
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: qdsub:
|
; CHECK-T2DSP-LABEL: qdsub:
|
||||||
; CHECK-T2DSP: @ %bb.0:
|
; CHECK-T2DSP: @ %bb.0:
|
||||||
|
|
@ -139,32 +85,14 @@ define i32 @qdsub(i32 %x, i32 %y) nounwind {
|
||||||
define i32 @qdsub_c(i32 %x, i32 %y) nounwind {
|
define i32 @qdsub_c(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-T2NODSP-LABEL: qdsub_c:
|
; CHECK-T2NODSP-LABEL: qdsub_c:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: .save {r7, lr}
|
; CHECK-T2NODSP-NEXT: adds r0, r0, r0
|
||||||
; CHECK-T2NODSP-NEXT: push {r7, lr}
|
|
||||||
; CHECK-T2NODSP-NEXT: movs r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi r3, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w lr, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r12, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r3, r12
|
|
||||||
; CHECK-T2NODSP-NEXT: subs r0, r3, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: movmi.w lr, #1
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp.w lr, #0
|
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r2, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, r1
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r2, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: pop {r7, pc}
|
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: qdsub_c:
|
; CHECK-T2DSP-LABEL: qdsub_c:
|
||||||
; CHECK-T2DSP: @ %bb.0:
|
; CHECK-T2DSP: @ %bb.0:
|
||||||
|
|
|
||||||
|
|
@ -16,48 +16,22 @@ declare i64 @llvm.sadd.sat.i64(i64, i64)
|
||||||
define i32 @func(i32 %x, i32 %y) nounwind {
|
define i32 @func(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-T1-LABEL: func:
|
; CHECK-T1-LABEL: func:
|
||||||
; CHECK-T1: @ %bb.0:
|
; CHECK-T1: @ %bb.0:
|
||||||
; CHECK-T1-NEXT: mov r2, r0
|
|
||||||
; CHECK-T1-NEXT: movs r3, #1
|
|
||||||
; CHECK-T1-NEXT: adds r0, r0, r1
|
; CHECK-T1-NEXT: adds r0, r0, r1
|
||||||
; CHECK-T1-NEXT: mov r1, r3
|
; CHECK-T1-NEXT: bvc .LBB0_2
|
||||||
; CHECK-T1-NEXT: bmi .LBB0_2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: movs r1, #0
|
; CHECK-T1-NEXT: asrs r1, r0, #31
|
||||||
|
; CHECK-T1-NEXT: movs r0, #1
|
||||||
|
; CHECK-T1-NEXT: lsls r0, r0, #31
|
||||||
|
; CHECK-T1-NEXT: eors r0, r1
|
||||||
; CHECK-T1-NEXT: .LBB0_2:
|
; CHECK-T1-NEXT: .LBB0_2:
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
|
||||||
; CHECK-T1-NEXT: bne .LBB0_4
|
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
|
||||||
; CHECK-T1-NEXT: lsls r1, r3, #31
|
|
||||||
; CHECK-T1-NEXT: cmp r0, r2
|
|
||||||
; CHECK-T1-NEXT: bvs .LBB0_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_4:
|
|
||||||
; CHECK-T1-NEXT: ldr r1, .LCPI0_0
|
|
||||||
; CHECK-T1-NEXT: cmp r0, r2
|
|
||||||
; CHECK-T1-NEXT: bvc .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_5:
|
|
||||||
; CHECK-T1-NEXT: mov r0, r1
|
|
||||||
; CHECK-T1-NEXT: .LBB0_6:
|
|
||||||
; CHECK-T1-NEXT: bx lr
|
; CHECK-T1-NEXT: bx lr
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
|
||||||
; CHECK-T1-NEXT: .LCPI0_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2NODSP-LABEL: func:
|
; CHECK-T2NODSP-LABEL: func:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: adds r2, r0, r1
|
; CHECK-T2NODSP-NEXT: adds r0, r0, r1
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648
|
; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: movmi r3, #1
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r1, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r1, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r2, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r1, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: bx lr
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: func:
|
; CHECK-T2DSP-LABEL: func:
|
||||||
|
|
@ -67,15 +41,9 @@ define i32 @func(i32 %x, i32 %y) nounwind {
|
||||||
;
|
;
|
||||||
; CHECK-ARMNODPS-LABEL: func:
|
; CHECK-ARMNODPS-LABEL: func:
|
||||||
; CHECK-ARMNODPS: @ %bb.0:
|
; CHECK-ARMNODPS: @ %bb.0:
|
||||||
; CHECK-ARMNODPS-NEXT: adds r2, r0, r1
|
; CHECK-ARMNODPS-NEXT: adds r0, r0, r1
|
||||||
; CHECK-ARMNODPS-NEXT: mov r3, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: movmi r3, #1
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r1, #-2147483648
|
; CHECK-ARMNODPS-NEXT: mov r1, #-2147483648
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r3, #0
|
; CHECK-ARMNODPS-NEXT: eorvs r0, r1, r0, asr #31
|
||||||
; CHECK-ARMNODPS-NEXT: mvnne r1, #-2147483648
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r2, r0
|
|
||||||
; CHECK-ARMNODPS-NEXT: movvc r1, r2
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r0, r1
|
|
||||||
; CHECK-ARMNODPS-NEXT: bx lr
|
; CHECK-ARMNODPS-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-ARMBASEDSP-LABEL: func:
|
; CHECK-ARMBASEDSP-LABEL: func:
|
||||||
|
|
@ -97,36 +65,28 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-T1-NEXT: .save {r4, lr}
|
; CHECK-T1-NEXT: .save {r4, lr}
|
||||||
; CHECK-T1-NEXT: push {r4, lr}
|
; CHECK-T1-NEXT: push {r4, lr}
|
||||||
; CHECK-T1-NEXT: mov r4, r1
|
; CHECK-T1-NEXT: mov r4, r1
|
||||||
; CHECK-T1-NEXT: eors r4, r3
|
|
||||||
; CHECK-T1-NEXT: adds r0, r0, r2
|
|
||||||
; CHECK-T1-NEXT: adcs r3, r1
|
|
||||||
; CHECK-T1-NEXT: eors r1, r3
|
; CHECK-T1-NEXT: eors r1, r3
|
||||||
; CHECK-T1-NEXT: bics r1, r4
|
; CHECK-T1-NEXT: adds r2, r0, r2
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_2
|
; CHECK-T1-NEXT: adcs r3, r4
|
||||||
|
; CHECK-T1-NEXT: eors r4, r3
|
||||||
|
; CHECK-T1-NEXT: bics r4, r1
|
||||||
|
; CHECK-T1-NEXT: asrs r1, r3, #31
|
||||||
|
; CHECK-T1-NEXT: cmp r4, #0
|
||||||
|
; CHECK-T1-NEXT: mov r0, r1
|
||||||
|
; CHECK-T1-NEXT: bmi .LBB1_2
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: asrs r0, r3, #31
|
; CHECK-T1-NEXT: mov r0, r2
|
||||||
; CHECK-T1-NEXT: .LBB1_2:
|
; CHECK-T1-NEXT: .LBB1_2:
|
||||||
; CHECK-T1-NEXT: cmp r3, #0
|
; CHECK-T1-NEXT: cmp r4, #0
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_4
|
; CHECK-T1-NEXT: bmi .LBB1_4
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
; CHECK-T1-NEXT: @ %bb.3:
|
||||||
|
; CHECK-T1-NEXT: mov r1, r3
|
||||||
|
; CHECK-T1-NEXT: pop {r4, pc}
|
||||||
|
; CHECK-T1-NEXT: .LBB1_4:
|
||||||
; CHECK-T1-NEXT: movs r2, #1
|
; CHECK-T1-NEXT: movs r2, #1
|
||||||
; CHECK-T1-NEXT: lsls r2, r2, #31
|
; CHECK-T1-NEXT: lsls r2, r2, #31
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
; CHECK-T1-NEXT: eors r1, r2
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB1_6
|
|
||||||
; CHECK-T1-NEXT: .LBB1_4:
|
|
||||||
; CHECK-T1-NEXT: ldr r2, .LCPI1_0
|
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_6
|
|
||||||
; CHECK-T1-NEXT: .LBB1_5:
|
|
||||||
; CHECK-T1-NEXT: mov r2, r3
|
|
||||||
; CHECK-T1-NEXT: .LBB1_6:
|
|
||||||
; CHECK-T1-NEXT: mov r1, r2
|
|
||||||
; CHECK-T1-NEXT: pop {r4, pc}
|
; CHECK-T1-NEXT: pop {r4, pc}
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
|
||||||
; CHECK-T1-NEXT: .LCPI1_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2-LABEL: func2:
|
; CHECK-T2-LABEL: func2:
|
||||||
; CHECK-T2: @ %bb.0:
|
; CHECK-T2: @ %bb.0:
|
||||||
|
|
@ -134,17 +94,14 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-T2-NEXT: eor.w r12, r1, r3
|
; CHECK-T2-NEXT: eor.w r12, r1, r3
|
||||||
; CHECK-T2-NEXT: adc.w r2, r1, r3
|
; CHECK-T2-NEXT: adc.w r2, r1, r3
|
||||||
; CHECK-T2-NEXT: eors r1, r2
|
; CHECK-T2-NEXT: eors r1, r2
|
||||||
; CHECK-T2-NEXT: bic.w r3, r1, r12
|
; CHECK-T2-NEXT: bic.w r1, r1, r12
|
||||||
|
; CHECK-T2-NEXT: cmp r1, #0
|
||||||
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
||||||
; CHECK-T2-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-T2-NEXT: cmp r2, #0
|
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: mvnmi r1, #-2147483648
|
; CHECK-T2-NEXT: eormi.w r2, r1, r2, asr #31
|
||||||
; CHECK-T2-NEXT: cmp r3, #0
|
; CHECK-T2-NEXT: mov r1, r2
|
||||||
; CHECK-T2-NEXT: it pl
|
|
||||||
; CHECK-T2-NEXT: movpl r1, r2
|
|
||||||
; CHECK-T2-NEXT: bx lr
|
; CHECK-T2-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-ARM-LABEL: func2:
|
; CHECK-ARM-LABEL: func2:
|
||||||
|
|
@ -153,14 +110,12 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-ARM-NEXT: eor r12, r1, r3
|
; CHECK-ARM-NEXT: eor r12, r1, r3
|
||||||
; CHECK-ARM-NEXT: adc r2, r1, r3
|
; CHECK-ARM-NEXT: adc r2, r1, r3
|
||||||
; CHECK-ARM-NEXT: eor r1, r1, r2
|
; CHECK-ARM-NEXT: eor r1, r1, r2
|
||||||
; CHECK-ARM-NEXT: bic r3, r1, r12
|
; CHECK-ARM-NEXT: bic r1, r1, r12
|
||||||
|
; CHECK-ARM-NEXT: cmp r1, #0
|
||||||
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
||||||
; CHECK-ARM-NEXT: cmp r3, #0
|
|
||||||
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-ARM-NEXT: cmp r2, #0
|
; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31
|
||||||
; CHECK-ARM-NEXT: mvnmi r1, #-2147483648
|
; CHECK-ARM-NEXT: mov r1, r2
|
||||||
; CHECK-ARM-NEXT: cmp r3, #0
|
|
||||||
; CHECK-ARM-NEXT: movpl r1, r2
|
|
||||||
; CHECK-ARM-NEXT: bx lr
|
; CHECK-ARM-NEXT: bx lr
|
||||||
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y)
|
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y)
|
||||||
ret i64 %tmp
|
ret i64 %tmp
|
||||||
|
|
|
||||||
|
|
@ -13,49 +13,24 @@ declare i64 @llvm.sadd.sat.i64(i64, i64)
|
||||||
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; CHECK-T1-LABEL: func32:
|
; CHECK-T1-LABEL: func32:
|
||||||
; CHECK-T1: @ %bb.0:
|
; CHECK-T1: @ %bb.0:
|
||||||
; CHECK-T1-NEXT: mov r3, r0
|
|
||||||
; CHECK-T1-NEXT: muls r1, r2, r1
|
; CHECK-T1-NEXT: muls r1, r2, r1
|
||||||
; CHECK-T1-NEXT: movs r2, #1
|
|
||||||
; CHECK-T1-NEXT: adds r0, r0, r1
|
; CHECK-T1-NEXT: adds r0, r0, r1
|
||||||
; CHECK-T1-NEXT: mov r1, r2
|
; CHECK-T1-NEXT: bvc .LBB0_2
|
||||||
; CHECK-T1-NEXT: bmi .LBB0_2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: movs r1, #0
|
; CHECK-T1-NEXT: asrs r1, r0, #31
|
||||||
|
; CHECK-T1-NEXT: movs r0, #1
|
||||||
|
; CHECK-T1-NEXT: lsls r0, r0, #31
|
||||||
|
; CHECK-T1-NEXT: eors r0, r1
|
||||||
; CHECK-T1-NEXT: .LBB0_2:
|
; CHECK-T1-NEXT: .LBB0_2:
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
|
||||||
; CHECK-T1-NEXT: bne .LBB0_4
|
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
|
||||||
; CHECK-T1-NEXT: lsls r1, r2, #31
|
|
||||||
; CHECK-T1-NEXT: cmp r0, r3
|
|
||||||
; CHECK-T1-NEXT: bvs .LBB0_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_4:
|
|
||||||
; CHECK-T1-NEXT: ldr r1, .LCPI0_0
|
|
||||||
; CHECK-T1-NEXT: cmp r0, r3
|
|
||||||
; CHECK-T1-NEXT: bvc .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_5:
|
|
||||||
; CHECK-T1-NEXT: mov r0, r1
|
|
||||||
; CHECK-T1-NEXT: .LBB0_6:
|
|
||||||
; CHECK-T1-NEXT: bx lr
|
; CHECK-T1-NEXT: bx lr
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
|
||||||
; CHECK-T1-NEXT: .LCPI0_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2NODSP-LABEL: func32:
|
; CHECK-T2NODSP-LABEL: func32:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: mla r2, r1, r2, r0
|
; CHECK-T2NODSP-NEXT: mla r1, r1, r2, r0
|
||||||
; CHECK-T2NODSP-NEXT: movs r3, #0
|
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648
|
; CHECK-T2NODSP-NEXT: cmp r1, r0
|
||||||
; CHECK-T2NODSP-NEXT: cmp r2, #0
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: eorvs.w r1, r2, r1, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: movmi r3, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r1, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r2, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r1, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r1
|
; CHECK-T2NODSP-NEXT: mov r0, r1
|
||||||
; CHECK-T2NODSP-NEXT: bx lr
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
;
|
;
|
||||||
|
|
@ -84,35 +59,28 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-T1-NEXT: mov r2, r1
|
; CHECK-T1-NEXT: mov r2, r1
|
||||||
; CHECK-T1-NEXT: eors r2, r3
|
; CHECK-T1-NEXT: eors r2, r3
|
||||||
; CHECK-T1-NEXT: ldr r4, [sp, #8]
|
; CHECK-T1-NEXT: ldr r4, [sp, #8]
|
||||||
; CHECK-T1-NEXT: adds r0, r0, r4
|
; CHECK-T1-NEXT: adds r4, r0, r4
|
||||||
; CHECK-T1-NEXT: adcs r3, r1
|
; CHECK-T1-NEXT: adcs r3, r1
|
||||||
; CHECK-T1-NEXT: eors r1, r3
|
; CHECK-T1-NEXT: eors r1, r3
|
||||||
; CHECK-T1-NEXT: bics r1, r2
|
; CHECK-T1-NEXT: bics r1, r2
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_2
|
; CHECK-T1-NEXT: asrs r2, r3, #31
|
||||||
|
; CHECK-T1-NEXT: cmp r1, #0
|
||||||
|
; CHECK-T1-NEXT: mov r0, r2
|
||||||
|
; CHECK-T1-NEXT: bmi .LBB1_2
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: asrs r0, r3, #31
|
; CHECK-T1-NEXT: mov r0, r4
|
||||||
; CHECK-T1-NEXT: .LBB1_2:
|
; CHECK-T1-NEXT: .LBB1_2:
|
||||||
; CHECK-T1-NEXT: cmp r3, #0
|
; CHECK-T1-NEXT: cmp r1, #0
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_4
|
; CHECK-T1-NEXT: bmi .LBB1_4
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
; CHECK-T1-NEXT: @ %bb.3:
|
||||||
; CHECK-T1-NEXT: movs r2, #1
|
; CHECK-T1-NEXT: mov r1, r3
|
||||||
; CHECK-T1-NEXT: lsls r2, r2, #31
|
; CHECK-T1-NEXT: pop {r4, pc}
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB1_6
|
|
||||||
; CHECK-T1-NEXT: .LBB1_4:
|
; CHECK-T1-NEXT: .LBB1_4:
|
||||||
; CHECK-T1-NEXT: ldr r2, .LCPI1_0
|
; CHECK-T1-NEXT: movs r1, #1
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
; CHECK-T1-NEXT: lsls r1, r1, #31
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_6
|
; CHECK-T1-NEXT: eors r2, r1
|
||||||
; CHECK-T1-NEXT: .LBB1_5:
|
|
||||||
; CHECK-T1-NEXT: mov r2, r3
|
|
||||||
; CHECK-T1-NEXT: .LBB1_6:
|
|
||||||
; CHECK-T1-NEXT: mov r1, r2
|
; CHECK-T1-NEXT: mov r1, r2
|
||||||
; CHECK-T1-NEXT: pop {r4, pc}
|
; CHECK-T1-NEXT: pop {r4, pc}
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
|
||||||
; CHECK-T1-NEXT: .LCPI1_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2-LABEL: func64:
|
; CHECK-T2-LABEL: func64:
|
||||||
; CHECK-T2: @ %bb.0:
|
; CHECK-T2: @ %bb.0:
|
||||||
|
|
@ -122,17 +90,14 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-T2-NEXT: adc.w r2, r1, r12
|
; CHECK-T2-NEXT: adc.w r2, r1, r12
|
||||||
; CHECK-T2-NEXT: eor.w r3, r1, r12
|
; CHECK-T2-NEXT: eor.w r3, r1, r12
|
||||||
; CHECK-T2-NEXT: eors r1, r2
|
; CHECK-T2-NEXT: eors r1, r2
|
||||||
; CHECK-T2-NEXT: bic.w r3, r1, r3
|
; CHECK-T2-NEXT: bics r1, r3
|
||||||
|
; CHECK-T2-NEXT: cmp r1, #0
|
||||||
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
||||||
; CHECK-T2-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-T2-NEXT: cmp r2, #0
|
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: mvnmi r1, #-2147483648
|
; CHECK-T2-NEXT: eormi.w r2, r1, r2, asr #31
|
||||||
; CHECK-T2-NEXT: cmp r3, #0
|
; CHECK-T2-NEXT: mov r1, r2
|
||||||
; CHECK-T2-NEXT: it pl
|
|
||||||
; CHECK-T2-NEXT: movpl r1, r2
|
|
||||||
; CHECK-T2-NEXT: bx lr
|
; CHECK-T2-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-ARM-LABEL: func64:
|
; CHECK-ARM-LABEL: func64:
|
||||||
|
|
@ -143,14 +108,12 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-ARM-NEXT: eor r3, r1, r2
|
; CHECK-ARM-NEXT: eor r3, r1, r2
|
||||||
; CHECK-ARM-NEXT: adc r2, r1, r2
|
; CHECK-ARM-NEXT: adc r2, r1, r2
|
||||||
; CHECK-ARM-NEXT: eor r1, r1, r2
|
; CHECK-ARM-NEXT: eor r1, r1, r2
|
||||||
; CHECK-ARM-NEXT: bic r3, r1, r3
|
; CHECK-ARM-NEXT: bic r1, r1, r3
|
||||||
|
; CHECK-ARM-NEXT: cmp r1, #0
|
||||||
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
||||||
; CHECK-ARM-NEXT: cmp r3, #0
|
|
||||||
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-ARM-NEXT: cmp r2, #0
|
; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31
|
||||||
; CHECK-ARM-NEXT: mvnmi r1, #-2147483648
|
; CHECK-ARM-NEXT: mov r1, r2
|
||||||
; CHECK-ARM-NEXT: cmp r3, #0
|
|
||||||
; CHECK-ARM-NEXT: movpl r1, r2
|
|
||||||
; CHECK-ARM-NEXT: bx lr
|
; CHECK-ARM-NEXT: bx lr
|
||||||
%a = mul i64 %y, %z
|
%a = mul i64 %y, %z
|
||||||
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z)
|
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z)
|
||||||
|
|
|
||||||
|
|
@ -16,50 +16,22 @@ declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
|
||||||
define i32 @func(i32 %x, i32 %y) nounwind {
|
define i32 @func(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-T1-LABEL: func:
|
; CHECK-T1-LABEL: func:
|
||||||
; CHECK-T1: @ %bb.0:
|
; CHECK-T1: @ %bb.0:
|
||||||
; CHECK-T1-NEXT: .save {r4, lr}
|
|
||||||
; CHECK-T1-NEXT: push {r4, lr}
|
|
||||||
; CHECK-T1-NEXT: mov r2, r0
|
|
||||||
; CHECK-T1-NEXT: movs r3, #1
|
|
||||||
; CHECK-T1-NEXT: subs r0, r0, r1
|
; CHECK-T1-NEXT: subs r0, r0, r1
|
||||||
; CHECK-T1-NEXT: mov r4, r3
|
; CHECK-T1-NEXT: bvc .LBB0_2
|
||||||
; CHECK-T1-NEXT: bmi .LBB0_2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: movs r4, #0
|
; CHECK-T1-NEXT: asrs r1, r0, #31
|
||||||
|
; CHECK-T1-NEXT: movs r0, #1
|
||||||
|
; CHECK-T1-NEXT: lsls r0, r0, #31
|
||||||
|
; CHECK-T1-NEXT: eors r0, r1
|
||||||
; CHECK-T1-NEXT: .LBB0_2:
|
; CHECK-T1-NEXT: .LBB0_2:
|
||||||
; CHECK-T1-NEXT: cmp r4, #0
|
; CHECK-T1-NEXT: bx lr
|
||||||
; CHECK-T1-NEXT: bne .LBB0_4
|
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
|
||||||
; CHECK-T1-NEXT: lsls r3, r3, #31
|
|
||||||
; CHECK-T1-NEXT: cmp r2, r1
|
|
||||||
; CHECK-T1-NEXT: bvs .LBB0_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_4:
|
|
||||||
; CHECK-T1-NEXT: ldr r3, .LCPI0_0
|
|
||||||
; CHECK-T1-NEXT: cmp r2, r1
|
|
||||||
; CHECK-T1-NEXT: bvc .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_5:
|
|
||||||
; CHECK-T1-NEXT: mov r0, r3
|
|
||||||
; CHECK-T1-NEXT: .LBB0_6:
|
|
||||||
; CHECK-T1-NEXT: pop {r4, pc}
|
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
|
||||||
; CHECK-T1-NEXT: .LCPI0_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2NODSP-LABEL: func:
|
; CHECK-T2NODSP-LABEL: func:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: subs.w r12, r0, r1
|
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r3, #0
|
; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r1, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: movmi r3, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r0, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r2, r12
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: bx lr
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: func:
|
; CHECK-T2DSP-LABEL: func:
|
||||||
|
|
@ -69,15 +41,9 @@ define i32 @func(i32 %x, i32 %y) nounwind {
|
||||||
;
|
;
|
||||||
; CHECK-ARMNODPS-LABEL: func:
|
; CHECK-ARMNODPS-LABEL: func:
|
||||||
; CHECK-ARMNODPS: @ %bb.0:
|
; CHECK-ARMNODPS: @ %bb.0:
|
||||||
; CHECK-ARMNODPS-NEXT: subs r12, r0, r1
|
; CHECK-ARMNODPS-NEXT: subs r0, r0, r1
|
||||||
; CHECK-ARMNODPS-NEXT: mov r3, #0
|
; CHECK-ARMNODPS-NEXT: mov r1, #-2147483648
|
||||||
; CHECK-ARMNODPS-NEXT: movmi r3, #1
|
; CHECK-ARMNODPS-NEXT: eorvs r0, r1, r0, asr #31
|
||||||
; CHECK-ARMNODPS-NEXT: mov r2, #-2147483648
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r3, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: mvnne r2, #-2147483648
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r0, r1
|
|
||||||
; CHECK-ARMNODPS-NEXT: movvc r2, r12
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r0, r2
|
|
||||||
; CHECK-ARMNODPS-NEXT: bx lr
|
; CHECK-ARMNODPS-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-ARMBASEDSP-LABEL: func:
|
; CHECK-ARMBASEDSP-LABEL: func:
|
||||||
|
|
@ -98,38 +64,30 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-T1: @ %bb.0:
|
; CHECK-T1: @ %bb.0:
|
||||||
; CHECK-T1-NEXT: .save {r4, r5, r7, lr}
|
; CHECK-T1-NEXT: .save {r4, r5, r7, lr}
|
||||||
; CHECK-T1-NEXT: push {r4, r5, r7, lr}
|
; CHECK-T1-NEXT: push {r4, r5, r7, lr}
|
||||||
; CHECK-T1-NEXT: mov r5, r1
|
|
||||||
; CHECK-T1-NEXT: eors r5, r3
|
|
||||||
; CHECK-T1-NEXT: subs r0, r0, r2
|
|
||||||
; CHECK-T1-NEXT: mov r4, r1
|
; CHECK-T1-NEXT: mov r4, r1
|
||||||
; CHECK-T1-NEXT: sbcs r4, r3
|
; CHECK-T1-NEXT: eors r1, r3
|
||||||
; CHECK-T1-NEXT: eors r1, r4
|
; CHECK-T1-NEXT: subs r5, r0, r2
|
||||||
; CHECK-T1-NEXT: ands r1, r5
|
; CHECK-T1-NEXT: mov r2, r4
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_2
|
; CHECK-T1-NEXT: sbcs r2, r3
|
||||||
|
; CHECK-T1-NEXT: eors r4, r2
|
||||||
|
; CHECK-T1-NEXT: ands r4, r1
|
||||||
|
; CHECK-T1-NEXT: asrs r1, r2, #31
|
||||||
|
; CHECK-T1-NEXT: cmp r4, #0
|
||||||
|
; CHECK-T1-NEXT: mov r0, r1
|
||||||
|
; CHECK-T1-NEXT: bmi .LBB1_2
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: asrs r0, r4, #31
|
; CHECK-T1-NEXT: mov r0, r5
|
||||||
; CHECK-T1-NEXT: .LBB1_2:
|
; CHECK-T1-NEXT: .LBB1_2:
|
||||||
; CHECK-T1-NEXT: cmp r4, #0
|
; CHECK-T1-NEXT: cmp r4, #0
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_4
|
; CHECK-T1-NEXT: bmi .LBB1_4
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
; CHECK-T1-NEXT: @ %bb.3:
|
||||||
; CHECK-T1-NEXT: movs r2, #1
|
|
||||||
; CHECK-T1-NEXT: lsls r2, r2, #31
|
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB1_6
|
|
||||||
; CHECK-T1-NEXT: .LBB1_4:
|
|
||||||
; CHECK-T1-NEXT: ldr r2, .LCPI1_0
|
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_6
|
|
||||||
; CHECK-T1-NEXT: .LBB1_5:
|
|
||||||
; CHECK-T1-NEXT: mov r2, r4
|
|
||||||
; CHECK-T1-NEXT: .LBB1_6:
|
|
||||||
; CHECK-T1-NEXT: mov r1, r2
|
; CHECK-T1-NEXT: mov r1, r2
|
||||||
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
|
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
; CHECK-T1-NEXT: .LBB1_4:
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
; CHECK-T1-NEXT: movs r2, #1
|
||||||
; CHECK-T1-NEXT: .LCPI1_0:
|
; CHECK-T1-NEXT: lsls r2, r2, #31
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
; CHECK-T1-NEXT: eors r1, r2
|
||||||
|
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
|
||||||
;
|
;
|
||||||
; CHECK-T2-LABEL: func2:
|
; CHECK-T2-LABEL: func2:
|
||||||
; CHECK-T2: @ %bb.0:
|
; CHECK-T2: @ %bb.0:
|
||||||
|
|
@ -137,16 +95,13 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-T2-NEXT: eor.w r12, r1, r3
|
; CHECK-T2-NEXT: eor.w r12, r1, r3
|
||||||
; CHECK-T2-NEXT: sbc.w r2, r1, r3
|
; CHECK-T2-NEXT: sbc.w r2, r1, r3
|
||||||
; CHECK-T2-NEXT: eors r1, r2
|
; CHECK-T2-NEXT: eors r1, r2
|
||||||
; CHECK-T2-NEXT: ands.w r3, r12, r1
|
; CHECK-T2-NEXT: ands.w r1, r1, r12
|
||||||
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-T2-NEXT: cmp r2, #0
|
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: mvnmi r1, #-2147483648
|
; CHECK-T2-NEXT: eormi.w r2, r1, r2, asr #31
|
||||||
; CHECK-T2-NEXT: cmp r3, #0
|
; CHECK-T2-NEXT: mov r1, r2
|
||||||
; CHECK-T2-NEXT: it pl
|
|
||||||
; CHECK-T2-NEXT: movpl r1, r2
|
|
||||||
; CHECK-T2-NEXT: bx lr
|
; CHECK-T2-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-ARM-LABEL: func2:
|
; CHECK-ARM-LABEL: func2:
|
||||||
|
|
@ -155,13 +110,11 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; CHECK-ARM-NEXT: eor r12, r1, r3
|
; CHECK-ARM-NEXT: eor r12, r1, r3
|
||||||
; CHECK-ARM-NEXT: sbc r2, r1, r3
|
; CHECK-ARM-NEXT: sbc r2, r1, r3
|
||||||
; CHECK-ARM-NEXT: eor r1, r1, r2
|
; CHECK-ARM-NEXT: eor r1, r1, r2
|
||||||
; CHECK-ARM-NEXT: ands r3, r12, r1
|
; CHECK-ARM-NEXT: ands r1, r12, r1
|
||||||
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
|
||||||
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-ARM-NEXT: cmp r2, #0
|
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
||||||
; CHECK-ARM-NEXT: mvnmi r1, #-2147483648
|
; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31
|
||||||
; CHECK-ARM-NEXT: cmp r3, #0
|
; CHECK-ARM-NEXT: mov r1, r2
|
||||||
; CHECK-ARM-NEXT: movpl r1, r2
|
|
||||||
; CHECK-ARM-NEXT: bx lr
|
; CHECK-ARM-NEXT: bx lr
|
||||||
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y)
|
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y)
|
||||||
ret i64 %tmp
|
ret i64 %tmp
|
||||||
|
|
@ -373,165 +326,64 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
|
||||||
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||||
; CHECK-T1-LABEL: vec:
|
; CHECK-T1-LABEL: vec:
|
||||||
; CHECK-T1: @ %bb.0:
|
; CHECK-T1: @ %bb.0:
|
||||||
; CHECK-T1-NEXT: .save {r4, r5, r6, r7, lr}
|
; CHECK-T1-NEXT: .save {r4, r5, r6, lr}
|
||||||
; CHECK-T1-NEXT: push {r4, r5, r6, r7, lr}
|
; CHECK-T1-NEXT: push {r4, r5, r6, lr}
|
||||||
; CHECK-T1-NEXT: .pad #12
|
; CHECK-T1-NEXT: mov r4, r0
|
||||||
; CHECK-T1-NEXT: sub sp, #12
|
; CHECK-T1-NEXT: ldr r6, [sp, #16]
|
||||||
; CHECK-T1-NEXT: str r3, [sp] @ 4-byte Spill
|
; CHECK-T1-NEXT: subs r0, r0, r6
|
||||||
; CHECK-T1-NEXT: mov r4, r1
|
; CHECK-T1-NEXT: movs r5, #1
|
||||||
; CHECK-T1-NEXT: mov r1, r0
|
; CHECK-T1-NEXT: lsls r5, r5, #31
|
||||||
; CHECK-T1-NEXT: ldr r5, [sp, #32]
|
; CHECK-T1-NEXT: cmp r4, r6
|
||||||
; CHECK-T1-NEXT: movs r7, #1
|
; CHECK-T1-NEXT: bvc .LBB5_2
|
||||||
; CHECK-T1-NEXT: movs r0, #0
|
|
||||||
; CHECK-T1-NEXT: str r0, [sp, #8] @ 4-byte Spill
|
|
||||||
; CHECK-T1-NEXT: subs r0, r1, r5
|
|
||||||
; CHECK-T1-NEXT: str r0, [sp, #4] @ 4-byte Spill
|
|
||||||
; CHECK-T1-NEXT: mov r6, r7
|
|
||||||
; CHECK-T1-NEXT: bmi .LBB5_2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
|
; CHECK-T1-NEXT: asrs r0, r0, #31
|
||||||
|
; CHECK-T1-NEXT: eors r0, r5
|
||||||
; CHECK-T1-NEXT: .LBB5_2:
|
; CHECK-T1-NEXT: .LBB5_2:
|
||||||
; CHECK-T1-NEXT: lsls r3, r7, #31
|
; CHECK-T1-NEXT: ldr r4, [sp, #20]
|
||||||
; CHECK-T1-NEXT: ldr r0, .LCPI5_0
|
; CHECK-T1-NEXT: subs r1, r1, r4
|
||||||
; CHECK-T1-NEXT: cmp r6, #0
|
; CHECK-T1-NEXT: bvc .LBB5_4
|
||||||
; CHECK-T1-NEXT: mov r6, r0
|
|
||||||
; CHECK-T1-NEXT: bne .LBB5_4
|
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
; CHECK-T1-NEXT: @ %bb.3:
|
||||||
; CHECK-T1-NEXT: mov r6, r3
|
; CHECK-T1-NEXT: asrs r1, r1, #31
|
||||||
|
; CHECK-T1-NEXT: eors r1, r5
|
||||||
; CHECK-T1-NEXT: .LBB5_4:
|
; CHECK-T1-NEXT: .LBB5_4:
|
||||||
; CHECK-T1-NEXT: cmp r1, r5
|
; CHECK-T1-NEXT: ldr r4, [sp, #24]
|
||||||
|
; CHECK-T1-NEXT: subs r2, r2, r4
|
||||||
; CHECK-T1-NEXT: bvc .LBB5_6
|
; CHECK-T1-NEXT: bvc .LBB5_6
|
||||||
; CHECK-T1-NEXT: @ %bb.5:
|
; CHECK-T1-NEXT: @ %bb.5:
|
||||||
; CHECK-T1-NEXT: str r6, [sp, #4] @ 4-byte Spill
|
; CHECK-T1-NEXT: asrs r2, r2, #31
|
||||||
|
; CHECK-T1-NEXT: eors r2, r5
|
||||||
; CHECK-T1-NEXT: .LBB5_6:
|
; CHECK-T1-NEXT: .LBB5_6:
|
||||||
; CHECK-T1-NEXT: ldr r5, [sp, #36]
|
; CHECK-T1-NEXT: ldr r4, [sp, #28]
|
||||||
; CHECK-T1-NEXT: subs r1, r4, r5
|
; CHECK-T1-NEXT: subs r3, r3, r4
|
||||||
; CHECK-T1-NEXT: mov r6, r7
|
; CHECK-T1-NEXT: bvc .LBB5_8
|
||||||
; CHECK-T1-NEXT: bmi .LBB5_8
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
; CHECK-T1-NEXT: @ %bb.7:
|
||||||
; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
|
; CHECK-T1-NEXT: asrs r3, r3, #31
|
||||||
|
; CHECK-T1-NEXT: eors r3, r5
|
||||||
; CHECK-T1-NEXT: .LBB5_8:
|
; CHECK-T1-NEXT: .LBB5_8:
|
||||||
; CHECK-T1-NEXT: cmp r6, #0
|
; CHECK-T1-NEXT: pop {r4, r5, r6, pc}
|
||||||
; CHECK-T1-NEXT: mov r6, r0
|
|
||||||
; CHECK-T1-NEXT: bne .LBB5_10
|
|
||||||
; CHECK-T1-NEXT: @ %bb.9:
|
|
||||||
; CHECK-T1-NEXT: mov r6, r3
|
|
||||||
; CHECK-T1-NEXT: .LBB5_10:
|
|
||||||
; CHECK-T1-NEXT: cmp r4, r5
|
|
||||||
; CHECK-T1-NEXT: bvc .LBB5_12
|
|
||||||
; CHECK-T1-NEXT: @ %bb.11:
|
|
||||||
; CHECK-T1-NEXT: mov r1, r6
|
|
||||||
; CHECK-T1-NEXT: .LBB5_12:
|
|
||||||
; CHECK-T1-NEXT: ldr r5, [sp, #40]
|
|
||||||
; CHECK-T1-NEXT: subs r4, r2, r5
|
|
||||||
; CHECK-T1-NEXT: mov r6, r7
|
|
||||||
; CHECK-T1-NEXT: bmi .LBB5_14
|
|
||||||
; CHECK-T1-NEXT: @ %bb.13:
|
|
||||||
; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
|
|
||||||
; CHECK-T1-NEXT: .LBB5_14:
|
|
||||||
; CHECK-T1-NEXT: cmp r6, #0
|
|
||||||
; CHECK-T1-NEXT: mov r6, r0
|
|
||||||
; CHECK-T1-NEXT: bne .LBB5_16
|
|
||||||
; CHECK-T1-NEXT: @ %bb.15:
|
|
||||||
; CHECK-T1-NEXT: mov r6, r3
|
|
||||||
; CHECK-T1-NEXT: .LBB5_16:
|
|
||||||
; CHECK-T1-NEXT: cmp r2, r5
|
|
||||||
; CHECK-T1-NEXT: bvc .LBB5_18
|
|
||||||
; CHECK-T1-NEXT: @ %bb.17:
|
|
||||||
; CHECK-T1-NEXT: mov r4, r6
|
|
||||||
; CHECK-T1-NEXT: .LBB5_18:
|
|
||||||
; CHECK-T1-NEXT: ldr r2, [sp, #44]
|
|
||||||
; CHECK-T1-NEXT: ldr r6, [sp] @ 4-byte Reload
|
|
||||||
; CHECK-T1-NEXT: subs r5, r6, r2
|
|
||||||
; CHECK-T1-NEXT: bpl .LBB5_23
|
|
||||||
; CHECK-T1-NEXT: @ %bb.19:
|
|
||||||
; CHECK-T1-NEXT: cmp r7, #0
|
|
||||||
; CHECK-T1-NEXT: beq .LBB5_24
|
|
||||||
; CHECK-T1-NEXT: .LBB5_20:
|
|
||||||
; CHECK-T1-NEXT: cmp r6, r2
|
|
||||||
; CHECK-T1-NEXT: bvc .LBB5_22
|
|
||||||
; CHECK-T1-NEXT: .LBB5_21:
|
|
||||||
; CHECK-T1-NEXT: mov r5, r0
|
|
||||||
; CHECK-T1-NEXT: .LBB5_22:
|
|
||||||
; CHECK-T1-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
|
|
||||||
; CHECK-T1-NEXT: mov r2, r4
|
|
||||||
; CHECK-T1-NEXT: mov r3, r5
|
|
||||||
; CHECK-T1-NEXT: add sp, #12
|
|
||||||
; CHECK-T1-NEXT: pop {r4, r5, r6, r7, pc}
|
|
||||||
; CHECK-T1-NEXT: .LBB5_23:
|
|
||||||
; CHECK-T1-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
|
|
||||||
; CHECK-T1-NEXT: cmp r7, #0
|
|
||||||
; CHECK-T1-NEXT: bne .LBB5_20
|
|
||||||
; CHECK-T1-NEXT: .LBB5_24:
|
|
||||||
; CHECK-T1-NEXT: mov r0, r3
|
|
||||||
; CHECK-T1-NEXT: cmp r6, r2
|
|
||||||
; CHECK-T1-NEXT: bvs .LBB5_21
|
|
||||||
; CHECK-T1-NEXT: b .LBB5_22
|
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.25:
|
|
||||||
; CHECK-T1-NEXT: .LCPI5_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2NODSP-LABEL: vec:
|
; CHECK-T2NODSP-LABEL: vec:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: .save {r4, r5, r6, r7, lr}
|
; CHECK-T2NODSP-NEXT: .save {r7, lr}
|
||||||
; CHECK-T2NODSP-NEXT: push {r4, r5, r6, r7, lr}
|
; CHECK-T2NODSP-NEXT: push {r7, lr}
|
||||||
; CHECK-T2NODSP-NEXT: .pad #4
|
; CHECK-T2NODSP-NEXT: ldr.w r12, [sp, #8]
|
||||||
; CHECK-T2NODSP-NEXT: sub sp, #4
|
; CHECK-T2NODSP-NEXT: ldr.w lr, [sp, #12]
|
||||||
; CHECK-T2NODSP-NEXT: ldr r4, [sp, #24]
|
; CHECK-T2NODSP-NEXT: subs.w r0, r0, r12
|
||||||
; CHECK-T2NODSP-NEXT: mov lr, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: ldr r7, [sp, #28]
|
|
||||||
; CHECK-T2NODSP-NEXT: movs r5, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: subs r6, r0, r4
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r0, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi r0, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r0, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r0, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r12, #-2147483648
|
; CHECK-T2NODSP-NEXT: mov.w r12, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r0, #-2147483648
|
; CHECK-T2NODSP-NEXT: eorvs.w r0, r12, r0, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp lr, r4
|
; CHECK-T2NODSP-NEXT: subs.w r1, r1, lr
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
; CHECK-T2NODSP-NEXT: ldr.w lr, [sp, #16]
|
||||||
; CHECK-T2NODSP-NEXT: movvc r0, r6
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: subs r6, r1, r7
|
; CHECK-T2NODSP-NEXT: eorvs.w r1, r12, r1, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r4, #0
|
; CHECK-T2NODSP-NEXT: subs.w r2, r2, lr
|
||||||
; CHECK-T2NODSP-NEXT: mov.w lr, #-2147483648
|
; CHECK-T2NODSP-NEXT: ldr.w lr, [sp, #20]
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: movmi r4, #1
|
; CHECK-T2NODSP-NEXT: eorvs.w r2, r12, r2, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp r4, #0
|
; CHECK-T2NODSP-NEXT: subs.w r3, r3, lr
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: mvnne lr, #-2147483648
|
; CHECK-T2NODSP-NEXT: eorvs.w r3, r12, r3, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: cmp r1, r7
|
; CHECK-T2NODSP-NEXT: pop {r7, pc}
|
||||||
; CHECK-T2NODSP-NEXT: ldr r1, [sp, #32]
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r4, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc lr, r6
|
|
||||||
; CHECK-T2NODSP-NEXT: subs r6, r2, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi r4, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r4, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r4, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r4, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r2, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: ldr r1, [sp, #36]
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r4, r6
|
|
||||||
; CHECK-T2NODSP-NEXT: subs r2, r3, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi r5, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r5, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r12, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r3, r1
|
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
|
||||||
; CHECK-T2NODSP-NEXT: movvc r12, r2
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r1, lr
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r2, r4
|
|
||||||
; CHECK-T2NODSP-NEXT: mov r3, r12
|
|
||||||
; CHECK-T2NODSP-NEXT: add sp, #4
|
|
||||||
; CHECK-T2NODSP-NEXT: pop {r4, r5, r6, r7, pc}
|
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: vec:
|
; CHECK-T2DSP-LABEL: vec:
|
||||||
; CHECK-T2DSP: @ %bb.0:
|
; CHECK-T2DSP: @ %bb.0:
|
||||||
|
|
@ -547,49 +399,22 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||||
;
|
;
|
||||||
; CHECK-ARMNODPS-LABEL: vec:
|
; CHECK-ARMNODPS-LABEL: vec:
|
||||||
; CHECK-ARMNODPS: @ %bb.0:
|
; CHECK-ARMNODPS: @ %bb.0:
|
||||||
; CHECK-ARMNODPS-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
; CHECK-ARMNODPS-NEXT: .save {r11, lr}
|
||||||
; CHECK-ARMNODPS-NEXT: push {r4, r5, r6, r7, r11, lr}
|
; CHECK-ARMNODPS-NEXT: push {r11, lr}
|
||||||
; CHECK-ARMNODPS-NEXT: ldr r4, [sp, #24]
|
; CHECK-ARMNODPS-NEXT: ldr r12, [sp, #8]
|
||||||
; CHECK-ARMNODPS-NEXT: mov lr, r0
|
; CHECK-ARMNODPS-NEXT: ldr lr, [sp, #12]
|
||||||
; CHECK-ARMNODPS-NEXT: ldr r7, [sp, #28]
|
; CHECK-ARMNODPS-NEXT: subs r0, r0, r12
|
||||||
; CHECK-ARMNODPS-NEXT: mov r5, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: subs r6, r0, r4
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r0, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: movmi r0, #1
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r0, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r0, #-2147483648
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r12, #-2147483648
|
; CHECK-ARMNODPS-NEXT: mov r12, #-2147483648
|
||||||
; CHECK-ARMNODPS-NEXT: mvnne r0, #-2147483648
|
; CHECK-ARMNODPS-NEXT: eorvs r0, r12, r0, asr #31
|
||||||
; CHECK-ARMNODPS-NEXT: cmp lr, r4
|
; CHECK-ARMNODPS-NEXT: subs r1, r1, lr
|
||||||
; CHECK-ARMNODPS-NEXT: movvc r0, r6
|
; CHECK-ARMNODPS-NEXT: ldr lr, [sp, #16]
|
||||||
; CHECK-ARMNODPS-NEXT: subs r6, r1, r7
|
; CHECK-ARMNODPS-NEXT: eorvs r1, r12, r1, asr #31
|
||||||
; CHECK-ARMNODPS-NEXT: mov r4, #0
|
; CHECK-ARMNODPS-NEXT: subs r2, r2, lr
|
||||||
; CHECK-ARMNODPS-NEXT: mov lr, #-2147483648
|
; CHECK-ARMNODPS-NEXT: ldr lr, [sp, #20]
|
||||||
; CHECK-ARMNODPS-NEXT: movmi r4, #1
|
; CHECK-ARMNODPS-NEXT: eorvs r2, r12, r2, asr #31
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r4, #0
|
; CHECK-ARMNODPS-NEXT: subs r3, r3, lr
|
||||||
; CHECK-ARMNODPS-NEXT: mvnne lr, #-2147483648
|
; CHECK-ARMNODPS-NEXT: eorvs r3, r12, r3, asr #31
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r1, r7
|
; CHECK-ARMNODPS-NEXT: pop {r11, pc}
|
||||||
; CHECK-ARMNODPS-NEXT: ldr r1, [sp, #32]
|
|
||||||
; CHECK-ARMNODPS-NEXT: movvc lr, r6
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r4, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: subs r6, r2, r1
|
|
||||||
; CHECK-ARMNODPS-NEXT: movmi r4, #1
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r4, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r4, #-2147483648
|
|
||||||
; CHECK-ARMNODPS-NEXT: mvnne r4, #-2147483648
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r2, r1
|
|
||||||
; CHECK-ARMNODPS-NEXT: ldr r1, [sp, #36]
|
|
||||||
; CHECK-ARMNODPS-NEXT: movvc r4, r6
|
|
||||||
; CHECK-ARMNODPS-NEXT: subs r2, r3, r1
|
|
||||||
; CHECK-ARMNODPS-NEXT: movmi r5, #1
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r5, #0
|
|
||||||
; CHECK-ARMNODPS-NEXT: mvnne r12, #-2147483648
|
|
||||||
; CHECK-ARMNODPS-NEXT: cmp r3, r1
|
|
||||||
; CHECK-ARMNODPS-NEXT: movvc r12, r2
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r1, lr
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r2, r4
|
|
||||||
; CHECK-ARMNODPS-NEXT: mov r3, r12
|
|
||||||
; CHECK-ARMNODPS-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
|
||||||
;
|
;
|
||||||
; CHECK-ARMBASEDSP-LABEL: vec:
|
; CHECK-ARMBASEDSP-LABEL: vec:
|
||||||
; CHECK-ARMBASEDSP: @ %bb.0:
|
; CHECK-ARMBASEDSP: @ %bb.0:
|
||||||
|
|
|
||||||
|
|
@ -13,56 +13,27 @@ declare i64 @llvm.ssub.sat.i64(i64, i64)
|
||||||
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; CHECK-T1-LABEL: func32:
|
; CHECK-T1-LABEL: func32:
|
||||||
; CHECK-T1: @ %bb.0:
|
; CHECK-T1: @ %bb.0:
|
||||||
; CHECK-T1-NEXT: .save {r4, lr}
|
|
||||||
; CHECK-T1-NEXT: push {r4, lr}
|
|
||||||
; CHECK-T1-NEXT: mov r3, r0
|
|
||||||
; CHECK-T1-NEXT: muls r1, r2, r1
|
; CHECK-T1-NEXT: muls r1, r2, r1
|
||||||
; CHECK-T1-NEXT: movs r2, #1
|
|
||||||
; CHECK-T1-NEXT: subs r0, r0, r1
|
; CHECK-T1-NEXT: subs r0, r0, r1
|
||||||
; CHECK-T1-NEXT: mov r4, r2
|
; CHECK-T1-NEXT: bvc .LBB0_2
|
||||||
; CHECK-T1-NEXT: bmi .LBB0_2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: movs r4, #0
|
; CHECK-T1-NEXT: asrs r1, r0, #31
|
||||||
|
; CHECK-T1-NEXT: movs r0, #1
|
||||||
|
; CHECK-T1-NEXT: lsls r0, r0, #31
|
||||||
|
; CHECK-T1-NEXT: eors r0, r1
|
||||||
; CHECK-T1-NEXT: .LBB0_2:
|
; CHECK-T1-NEXT: .LBB0_2:
|
||||||
; CHECK-T1-NEXT: cmp r4, #0
|
; CHECK-T1-NEXT: bx lr
|
||||||
; CHECK-T1-NEXT: bne .LBB0_4
|
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
|
||||||
; CHECK-T1-NEXT: lsls r2, r2, #31
|
|
||||||
; CHECK-T1-NEXT: cmp r3, r1
|
|
||||||
; CHECK-T1-NEXT: bvs .LBB0_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_4:
|
|
||||||
; CHECK-T1-NEXT: ldr r2, .LCPI0_0
|
|
||||||
; CHECK-T1-NEXT: cmp r3, r1
|
|
||||||
; CHECK-T1-NEXT: bvc .LBB0_6
|
|
||||||
; CHECK-T1-NEXT: .LBB0_5:
|
|
||||||
; CHECK-T1-NEXT: mov r0, r2
|
|
||||||
; CHECK-T1-NEXT: .LBB0_6:
|
|
||||||
; CHECK-T1-NEXT: pop {r4, pc}
|
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
|
||||||
; CHECK-T1-NEXT: .LCPI0_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2NODSP-LABEL: func32:
|
; CHECK-T2NODSP-LABEL: func32:
|
||||||
; CHECK-T2NODSP: @ %bb.0:
|
; CHECK-T2NODSP: @ %bb.0:
|
||||||
; CHECK-T2NODSP-NEXT: .save {r7, lr}
|
; CHECK-T2NODSP-NEXT: mls r3, r1, r2, r0
|
||||||
; CHECK-T2NODSP-NEXT: push {r7, lr}
|
; CHECK-T2NODSP-NEXT: mov.w r12, #-2147483648
|
||||||
; CHECK-T2NODSP-NEXT: mls r12, r1, r2, r0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w lr, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
|
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
|
||||||
; CHECK-T2NODSP-NEXT: cmp.w r12, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it mi
|
|
||||||
; CHECK-T2NODSP-NEXT: movmi.w lr, #1
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp.w lr, #0
|
|
||||||
; CHECK-T2NODSP-NEXT: it ne
|
|
||||||
; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648
|
|
||||||
; CHECK-T2NODSP-NEXT: cmp r0, r1
|
; CHECK-T2NODSP-NEXT: cmp r0, r1
|
||||||
; CHECK-T2NODSP-NEXT: it vc
|
; CHECK-T2NODSP-NEXT: it vs
|
||||||
; CHECK-T2NODSP-NEXT: movvc r3, r12
|
; CHECK-T2NODSP-NEXT: eorvs.w r3, r12, r3, asr #31
|
||||||
; CHECK-T2NODSP-NEXT: mov r0, r3
|
; CHECK-T2NODSP-NEXT: mov r0, r3
|
||||||
; CHECK-T2NODSP-NEXT: pop {r7, pc}
|
; CHECK-T2NODSP-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-T2DSP-LABEL: func32:
|
; CHECK-T2DSP-LABEL: func32:
|
||||||
; CHECK-T2DSP: @ %bb.0:
|
; CHECK-T2DSP: @ %bb.0:
|
||||||
|
|
@ -83,42 +54,35 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-T1-LABEL: func64:
|
; CHECK-T1-LABEL: func64:
|
||||||
; CHECK-T1: @ %bb.0:
|
; CHECK-T1: @ %bb.0:
|
||||||
; CHECK-T1-NEXT: .save {r4, lr}
|
; CHECK-T1-NEXT: .save {r4, r5, r7, lr}
|
||||||
; CHECK-T1-NEXT: push {r4, lr}
|
; CHECK-T1-NEXT: push {r4, r5, r7, lr}
|
||||||
; CHECK-T1-NEXT: ldr r2, [sp, #12]
|
; CHECK-T1-NEXT: ldr r2, [sp, #20]
|
||||||
; CHECK-T1-NEXT: mov r4, r1
|
; CHECK-T1-NEXT: mov r5, r1
|
||||||
; CHECK-T1-NEXT: eors r4, r2
|
; CHECK-T1-NEXT: eors r5, r2
|
||||||
; CHECK-T1-NEXT: ldr r3, [sp, #8]
|
; CHECK-T1-NEXT: ldr r3, [sp, #16]
|
||||||
; CHECK-T1-NEXT: subs r0, r0, r3
|
; CHECK-T1-NEXT: subs r4, r0, r3
|
||||||
; CHECK-T1-NEXT: mov r3, r1
|
; CHECK-T1-NEXT: mov r3, r1
|
||||||
; CHECK-T1-NEXT: sbcs r3, r2
|
; CHECK-T1-NEXT: sbcs r3, r2
|
||||||
; CHECK-T1-NEXT: eors r1, r3
|
; CHECK-T1-NEXT: eors r1, r3
|
||||||
; CHECK-T1-NEXT: ands r1, r4
|
; CHECK-T1-NEXT: ands r1, r5
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_2
|
; CHECK-T1-NEXT: asrs r2, r3, #31
|
||||||
|
; CHECK-T1-NEXT: cmp r1, #0
|
||||||
|
; CHECK-T1-NEXT: mov r0, r2
|
||||||
|
; CHECK-T1-NEXT: bmi .LBB1_2
|
||||||
; CHECK-T1-NEXT: @ %bb.1:
|
; CHECK-T1-NEXT: @ %bb.1:
|
||||||
; CHECK-T1-NEXT: asrs r0, r3, #31
|
; CHECK-T1-NEXT: mov r0, r4
|
||||||
; CHECK-T1-NEXT: .LBB1_2:
|
; CHECK-T1-NEXT: .LBB1_2:
|
||||||
; CHECK-T1-NEXT: cmp r3, #0
|
; CHECK-T1-NEXT: cmp r1, #0
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_4
|
; CHECK-T1-NEXT: bmi .LBB1_4
|
||||||
; CHECK-T1-NEXT: @ %bb.3:
|
; CHECK-T1-NEXT: @ %bb.3:
|
||||||
; CHECK-T1-NEXT: movs r2, #1
|
; CHECK-T1-NEXT: mov r1, r3
|
||||||
; CHECK-T1-NEXT: lsls r2, r2, #31
|
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
|
||||||
; CHECK-T1-NEXT: bpl .LBB1_5
|
|
||||||
; CHECK-T1-NEXT: b .LBB1_6
|
|
||||||
; CHECK-T1-NEXT: .LBB1_4:
|
; CHECK-T1-NEXT: .LBB1_4:
|
||||||
; CHECK-T1-NEXT: ldr r2, .LCPI1_0
|
; CHECK-T1-NEXT: movs r1, #1
|
||||||
; CHECK-T1-NEXT: cmp r1, #0
|
; CHECK-T1-NEXT: lsls r1, r1, #31
|
||||||
; CHECK-T1-NEXT: bmi .LBB1_6
|
; CHECK-T1-NEXT: eors r2, r1
|
||||||
; CHECK-T1-NEXT: .LBB1_5:
|
|
||||||
; CHECK-T1-NEXT: mov r2, r3
|
|
||||||
; CHECK-T1-NEXT: .LBB1_6:
|
|
||||||
; CHECK-T1-NEXT: mov r1, r2
|
; CHECK-T1-NEXT: mov r1, r2
|
||||||
; CHECK-T1-NEXT: pop {r4, pc}
|
; CHECK-T1-NEXT: pop {r4, r5, r7, pc}
|
||||||
; CHECK-T1-NEXT: .p2align 2
|
|
||||||
; CHECK-T1-NEXT: @ %bb.7:
|
|
||||||
; CHECK-T1-NEXT: .LCPI1_0:
|
|
||||||
; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff
|
|
||||||
;
|
;
|
||||||
; CHECK-T2-LABEL: func64:
|
; CHECK-T2-LABEL: func64:
|
||||||
; CHECK-T2: @ %bb.0:
|
; CHECK-T2: @ %bb.0:
|
||||||
|
|
@ -128,16 +92,13 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-T2-NEXT: sbc.w r2, r1, r12
|
; CHECK-T2-NEXT: sbc.w r2, r1, r12
|
||||||
; CHECK-T2-NEXT: eor.w r3, r1, r12
|
; CHECK-T2-NEXT: eor.w r3, r1, r12
|
||||||
; CHECK-T2-NEXT: eors r1, r2
|
; CHECK-T2-NEXT: eors r1, r2
|
||||||
; CHECK-T2-NEXT: ands r3, r1
|
; CHECK-T2-NEXT: ands r1, r3
|
||||||
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
; CHECK-T2-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-T2-NEXT: cmp r2, #0
|
; CHECK-T2-NEXT: mov.w r1, #-2147483648
|
||||||
; CHECK-T2-NEXT: it mi
|
; CHECK-T2-NEXT: it mi
|
||||||
; CHECK-T2-NEXT: mvnmi r1, #-2147483648
|
; CHECK-T2-NEXT: eormi.w r2, r1, r2, asr #31
|
||||||
; CHECK-T2-NEXT: cmp r3, #0
|
; CHECK-T2-NEXT: mov r1, r2
|
||||||
; CHECK-T2-NEXT: it pl
|
|
||||||
; CHECK-T2-NEXT: movpl r1, r2
|
|
||||||
; CHECK-T2-NEXT: bx lr
|
; CHECK-T2-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-ARM-LABEL: func64:
|
; CHECK-ARM-LABEL: func64:
|
||||||
|
|
@ -148,13 +109,11 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; CHECK-ARM-NEXT: eor r3, r1, r2
|
; CHECK-ARM-NEXT: eor r3, r1, r2
|
||||||
; CHECK-ARM-NEXT: sbc r2, r1, r2
|
; CHECK-ARM-NEXT: sbc r2, r1, r2
|
||||||
; CHECK-ARM-NEXT: eor r1, r1, r2
|
; CHECK-ARM-NEXT: eor r1, r1, r2
|
||||||
; CHECK-ARM-NEXT: ands r3, r3, r1
|
; CHECK-ARM-NEXT: ands r1, r3, r1
|
||||||
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
|
||||||
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
; CHECK-ARM-NEXT: asrmi r0, r2, #31
|
||||||
; CHECK-ARM-NEXT: cmp r2, #0
|
; CHECK-ARM-NEXT: mov r1, #-2147483648
|
||||||
; CHECK-ARM-NEXT: mvnmi r1, #-2147483648
|
; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31
|
||||||
; CHECK-ARM-NEXT: cmp r3, #0
|
; CHECK-ARM-NEXT: mov r1, r2
|
||||||
; CHECK-ARM-NEXT: movpl r1, r2
|
|
||||||
; CHECK-ARM-NEXT: bx lr
|
; CHECK-ARM-NEXT: bx lr
|
||||||
%a = mul i64 %y, %z
|
%a = mul i64 %y, %z
|
||||||
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %z)
|
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %z)
|
||||||
|
|
|
||||||
|
|
@ -747,118 +747,114 @@ define <4 x i128> @sadd(<4 x i128> %a, <4 x i128> %b) local_unnamed_addr {
|
||||||
; CHECK-NEXT: vadduqm 0, 2, 6
|
; CHECK-NEXT: vadduqm 0, 2, 6
|
||||||
; CHECK-NEXT: xxswapd 0, 34
|
; CHECK-NEXT: xxswapd 0, 34
|
||||||
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
|
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
|
||||||
; CHECK-NEXT: li 3, -1
|
; CHECK-NEXT: addis 3, 2, .LCPI48_0@toc@ha
|
||||||
; CHECK-NEXT: vadduqm 1, 3, 7
|
; CHECK-NEXT: vadduqm 1, 3, 7
|
||||||
; CHECK-NEXT: xxswapd 1, 35
|
; CHECK-NEXT: xxswapd 1, 35
|
||||||
; CHECK-NEXT: xxswapd 2, 32
|
; CHECK-NEXT: addi 3, 3, .LCPI48_0@toc@l
|
||||||
|
; CHECK-NEXT: xxswapd 3, 32
|
||||||
; CHECK-NEXT: mfvsrd 4, 34
|
; CHECK-NEXT: mfvsrd 4, 34
|
||||||
; CHECK-NEXT: mfvsrd 9, 32
|
; CHECK-NEXT: mfvsrd 8, 32
|
||||||
; CHECK-NEXT: mffprd 0, 0
|
; CHECK-NEXT: xxswapd 2, 36
|
||||||
|
; CHECK-NEXT: mffprd 12, 0
|
||||||
; CHECK-NEXT: xxswapd 0, 33
|
; CHECK-NEXT: xxswapd 0, 33
|
||||||
; CHECK-NEXT: mfvsrd 5, 38
|
; CHECK-NEXT: vadduqm 10, 4, 8
|
||||||
; CHECK-NEXT: cmpld 9, 4
|
; CHECK-NEXT: cmpld 8, 4
|
||||||
; CHECK-NEXT: cmpd 1, 9, 4
|
; CHECK-NEXT: cmpd 1, 8, 4
|
||||||
; CHECK-NEXT: vadduqm 6, 4, 8
|
; CHECK-NEXT: mffprd 4, 3
|
||||||
; CHECK-NEXT: mffprd 4, 2
|
; CHECK-NEXT: lxvd2x 3, 0, 3
|
||||||
; CHECK-NEXT: sradi 5, 5, 63
|
; CHECK-NEXT: sradi 3, 8, 63
|
||||||
; CHECK-NEXT: mffprd 30, 1
|
; CHECK-NEXT: mffprd 0, 1
|
||||||
; CHECK-NEXT: xxswapd 1, 36
|
|
||||||
; CHECK-NEXT: crandc 20, 4, 2
|
|
||||||
; CHECK-NEXT: cmpld 1, 4, 0
|
|
||||||
; CHECK-NEXT: mffprd 4, 0
|
|
||||||
; CHECK-NEXT: xxswapd 0, 38
|
|
||||||
; CHECK-NEXT: mfvsrd 6, 35
|
|
||||||
; CHECK-NEXT: vadduqm 10, 5, 9
|
|
||||||
; CHECK-NEXT: cmpld 6, 4, 30
|
|
||||||
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
|
|
||||||
; CHECK-NEXT: mfvsrd 10, 33
|
|
||||||
; CHECK-NEXT: mfvsrd 7, 36
|
|
||||||
; CHECK-NEXT: mfvsrd 11, 38
|
|
||||||
; CHECK-NEXT: crand 21, 2, 4
|
|
||||||
; CHECK-NEXT: cmpld 10, 6
|
|
||||||
; CHECK-NEXT: cmpd 1, 10, 6
|
|
||||||
; CHECK-NEXT: mffprd 6, 1
|
|
||||||
; CHECK-NEXT: xxswapd 1, 37
|
; CHECK-NEXT: xxswapd 1, 37
|
||||||
|
; CHECK-NEXT: mfvsrd 5, 35
|
||||||
|
; CHECK-NEXT: vadduqm 11, 5, 9
|
||||||
|
; CHECK-NEXT: xxswapd 34, 3
|
||||||
|
; CHECK-NEXT: mfvsrd 9, 33
|
||||||
|
; CHECK-NEXT: crandc 20, 4, 2
|
||||||
|
; CHECK-NEXT: cmpld 1, 4, 12
|
||||||
; CHECK-NEXT: mffprd 4, 0
|
; CHECK-NEXT: mffprd 4, 0
|
||||||
; CHECK-NEXT: xxswapd 0, 42
|
; CHECK-NEXT: xxswapd 0, 42
|
||||||
; CHECK-NEXT: mfvsrd 8, 37
|
; CHECK-NEXT: mfvsrd 6, 36
|
||||||
; CHECK-NEXT: mfvsrd 12, 42
|
; CHECK-NEXT: mfvsrd 10, 42
|
||||||
|
; CHECK-NEXT: cmpld 6, 4, 0
|
||||||
|
; CHECK-NEXT: crand 21, 2, 4
|
||||||
|
; CHECK-NEXT: cmpld 9, 5
|
||||||
|
; CHECK-NEXT: cmpd 1, 9, 5
|
||||||
|
; CHECK-NEXT: mffprd 5, 1
|
||||||
|
; CHECK-NEXT: xxswapd 1, 43
|
||||||
|
; CHECK-NEXT: mffprd 30, 2
|
||||||
|
; CHECK-NEXT: mffprd 4, 0
|
||||||
|
; CHECK-NEXT: mfvsrd 7, 37
|
||||||
|
; CHECK-NEXT: mfvsrd 11, 43
|
||||||
; CHECK-NEXT: crandc 22, 4, 2
|
; CHECK-NEXT: crandc 22, 4, 2
|
||||||
; CHECK-NEXT: cmpd 1, 11, 7
|
; CHECK-NEXT: cmpd 1, 10, 6
|
||||||
; CHECK-NEXT: crand 23, 2, 24
|
; CHECK-NEXT: crand 23, 2, 24
|
||||||
; CHECK-NEXT: cmpld 11, 7
|
; CHECK-NEXT: cmpld 10, 6
|
||||||
; CHECK-NEXT: crandc 24, 4, 2
|
; CHECK-NEXT: crandc 24, 4, 2
|
||||||
; CHECK-NEXT: cmpld 1, 4, 6
|
; CHECK-NEXT: cmpld 1, 4, 30
|
||||||
|
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
|
||||||
; CHECK-NEXT: mffprd 4, 1
|
; CHECK-NEXT: mffprd 4, 1
|
||||||
; CHECK-NEXT: mffprd 6, 0
|
; CHECK-NEXT: mfvsrd 6, 38
|
||||||
; CHECK-NEXT: crand 25, 2, 4
|
; CHECK-NEXT: crand 25, 2, 4
|
||||||
; CHECK-NEXT: cmpld 12, 8
|
; CHECK-NEXT: cmpld 11, 7
|
||||||
; CHECK-NEXT: cmpd 1, 12, 8
|
; CHECK-NEXT: cmpd 1, 11, 7
|
||||||
; CHECK-NEXT: crandc 26, 4, 2
|
; CHECK-NEXT: crandc 26, 4, 2
|
||||||
; CHECK-NEXT: cmpld 1, 6, 4
|
; CHECK-NEXT: cmpld 1, 4, 5
|
||||||
|
; CHECK-NEXT: sradi 4, 6, 63
|
||||||
|
; CHECK-NEXT: mtfprd 0, 4
|
||||||
; CHECK-NEXT: mfvsrd 4, 39
|
; CHECK-NEXT: mfvsrd 4, 39
|
||||||
; CHECK-NEXT: mtfprd 0, 5
|
; CHECK-NEXT: mfvsrd 5, 40
|
||||||
|
; CHECK-NEXT: mfvsrd 6, 41
|
||||||
; CHECK-NEXT: sradi 4, 4, 63
|
; CHECK-NEXT: sradi 4, 4, 63
|
||||||
; CHECK-NEXT: mfvsrd 5, 41
|
|
||||||
; CHECK-NEXT: mtfprd 1, 4
|
; CHECK-NEXT: mtfprd 1, 4
|
||||||
; CHECK-NEXT: xxspltd 34, 0, 0
|
|
||||||
; CHECK-NEXT: mfvsrd 4, 40
|
|
||||||
; CHECK-NEXT: crnor 20, 21, 20
|
|
||||||
; CHECK-NEXT: sradi 4, 4, 63
|
|
||||||
; CHECK-NEXT: crand 27, 2, 4
|
|
||||||
; CHECK-NEXT: mtfprd 2, 4
|
|
||||||
; CHECK-NEXT: sradi 4, 5, 63
|
; CHECK-NEXT: sradi 4, 5, 63
|
||||||
; CHECK-NEXT: sradi 5, 10, 63
|
; CHECK-NEXT: mtfprd 2, 4
|
||||||
; CHECK-NEXT: mtfprd 3, 4
|
; CHECK-NEXT: sradi 4, 6, 63
|
||||||
; CHECK-NEXT: isel 4, 0, 3, 20
|
; CHECK-NEXT: mtfprd 5, 3
|
||||||
; CHECK-NEXT: xxspltd 36, 2, 0
|
; CHECK-NEXT: sradi 3, 10, 63
|
||||||
; CHECK-NEXT: crnor 20, 23, 22
|
|
||||||
; CHECK-NEXT: mtfprd 4, 4
|
; CHECK-NEXT: mtfprd 4, 4
|
||||||
; CHECK-NEXT: sradi 4, 9, 63
|
; CHECK-NEXT: sradi 4, 9, 63
|
||||||
; CHECK-NEXT: mtfprd 0, 4
|
; CHECK-NEXT: mtfprd 6, 4
|
||||||
; CHECK-NEXT: addis 4, 2, .LCPI48_0@toc@ha
|
; CHECK-NEXT: xxspltd 35, 5, 0
|
||||||
; CHECK-NEXT: mtfprd 5, 5
|
; CHECK-NEXT: sradi 4, 11, 63
|
||||||
; CHECK-NEXT: xxspltd 35, 4, 0
|
; CHECK-NEXT: crnor 20, 21, 20
|
||||||
; CHECK-NEXT: addi 4, 4, .LCPI48_0@toc@l
|
; CHECK-NEXT: xxspltd 38, 4, 0
|
||||||
; CHECK-NEXT: isel 5, 0, 3, 20
|
; CHECK-NEXT: mtfprd 3, 3
|
||||||
; CHECK-NEXT: lxvd2x 6, 0, 4
|
; CHECK-NEXT: li 3, -1
|
||||||
; CHECK-NEXT: mtfprd 4, 5
|
; CHECK-NEXT: xxspltd 36, 6, 0
|
||||||
; CHECK-NEXT: addis 5, 2, .LCPI48_1@toc@ha
|
; CHECK-NEXT: mtfprd 5, 4
|
||||||
; CHECK-NEXT: xxspltd 37, 5, 0
|
; CHECK-NEXT: crand 27, 2, 4
|
||||||
; CHECK-NEXT: addi 4, 5, .LCPI48_1@toc@l
|
; CHECK-NEXT: xxspltd 37, 3, 0
|
||||||
; CHECK-NEXT: xxlxor 7, 34, 35
|
; CHECK-NEXT: xxlxor 3, 35, 34
|
||||||
; CHECK-NEXT: xxspltd 34, 1, 0
|
; CHECK-NEXT: xxspltd 35, 5, 0
|
||||||
; CHECK-NEXT: sradi 5, 11, 63
|
|
||||||
; CHECK-NEXT: lxvd2x 8, 0, 4
|
|
||||||
; CHECK-NEXT: xxspltd 35, 4, 0
|
|
||||||
; CHECK-NEXT: crnor 20, 25, 24
|
|
||||||
; CHECK-NEXT: sradi 4, 12, 63
|
|
||||||
; CHECK-NEXT: crnor 21, 27, 26
|
|
||||||
; CHECK-NEXT: xxswapd 4, 6
|
|
||||||
; CHECK-NEXT: mtfprd 1, 5
|
|
||||||
; CHECK-NEXT: mtfprd 9, 4
|
|
||||||
; CHECK-NEXT: xxswapd 6, 8
|
|
||||||
; CHECK-NEXT: xxlxor 2, 34, 35
|
|
||||||
; CHECK-NEXT: xxspltd 35, 0, 0
|
|
||||||
; CHECK-NEXT: isel 4, 0, 3, 20
|
; CHECK-NEXT: isel 4, 0, 3, 20
|
||||||
; CHECK-NEXT: xxspltd 39, 1, 0
|
; CHECK-NEXT: mtfprd 8, 4
|
||||||
; CHECK-NEXT: isel 3, 0, 3, 21
|
; CHECK-NEXT: crnor 20, 23, 22
|
||||||
; CHECK-NEXT: xxspltd 40, 9, 0
|
; CHECK-NEXT: crnor 21, 25, 24
|
||||||
; CHECK-NEXT: mtfprd 0, 4
|
; CHECK-NEXT: crnor 22, 27, 26
|
||||||
; CHECK-NEXT: xxspltd 34, 3, 0
|
; CHECK-NEXT: xxlxor 5, 36, 34
|
||||||
; CHECK-NEXT: mtfprd 1, 3
|
; CHECK-NEXT: xxspltd 36, 2, 0
|
||||||
; CHECK-NEXT: xxsel 3, 6, 4, 39
|
; CHECK-NEXT: xxlxor 6, 37, 34
|
||||||
; CHECK-NEXT: xxspltd 41, 0, 0
|
; CHECK-NEXT: xxlxor 7, 35, 34
|
||||||
; CHECK-NEXT: xxsel 0, 6, 4, 35
|
; CHECK-NEXT: xxspltd 34, 0, 0
|
||||||
; CHECK-NEXT: xxspltd 35, 1, 0
|
; CHECK-NEXT: xxspltd 35, 8, 0
|
||||||
; CHECK-NEXT: xxsel 1, 6, 4, 37
|
; CHECK-NEXT: isel 4, 0, 3, 20
|
||||||
; CHECK-NEXT: xxsel 4, 6, 4, 40
|
; CHECK-NEXT: isel 5, 0, 3, 21
|
||||||
; CHECK-NEXT: xxlxor 5, 36, 41
|
; CHECK-NEXT: isel 3, 0, 3, 22
|
||||||
; CHECK-NEXT: xxlxor 6, 34, 35
|
; CHECK-NEXT: xxlxor 0, 34, 35
|
||||||
; CHECK-NEXT: xxsel 34, 32, 0, 7
|
; CHECK-NEXT: xxspltd 34, 1, 0
|
||||||
; CHECK-NEXT: xxsel 35, 33, 1, 2
|
; CHECK-NEXT: mtfprd 8, 4
|
||||||
; CHECK-NEXT: xxsel 36, 38, 3, 5
|
; CHECK-NEXT: mtfprd 1, 5
|
||||||
; CHECK-NEXT: xxsel 37, 42, 4, 6
|
; CHECK-NEXT: mtfprd 9, 3
|
||||||
|
; CHECK-NEXT: xxspltd 35, 8, 0
|
||||||
|
; CHECK-NEXT: xxspltd 37, 1, 0
|
||||||
|
; CHECK-NEXT: xxspltd 39, 9, 0
|
||||||
|
; CHECK-NEXT: xxlxor 1, 34, 35
|
||||||
|
; CHECK-NEXT: xxsel 34, 32, 3, 0
|
||||||
|
; CHECK-NEXT: xxlxor 2, 36, 37
|
||||||
|
; CHECK-NEXT: xxlxor 4, 38, 39
|
||||||
|
; CHECK-NEXT: xxsel 35, 33, 5, 1
|
||||||
|
; CHECK-NEXT: xxsel 36, 42, 6, 2
|
||||||
|
; CHECK-NEXT: xxsel 37, 43, 7, 4
|
||||||
; CHECK-NEXT: blr
|
; CHECK-NEXT: blr
|
||||||
%c = call <4 x i128> @llvm.sadd.sat.v4i128(<4 x i128> %a, <4 x i128> %b)
|
%c = call <4 x i128> @llvm.sadd.sat.v4i128(<4 x i128> %a, <4 x i128> %b)
|
||||||
ret <4 x i128> %c
|
ret <4 x i128> %c
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
; RV32I-NEXT: slti a1, a1, 0
|
; RV32I-NEXT: slti a1, a1, 0
|
||||||
; RV32I-NEXT: beq a1, a2, .LBB0_2
|
; RV32I-NEXT: beq a1, a2, .LBB0_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: slti a0, a0, 0
|
; RV32I-NEXT: srai a0, a0, 31
|
||||||
; RV32I-NEXT: lui a1, 524288
|
; RV32I-NEXT: lui a1, 524288
|
||||||
; RV32I-NEXT: sub a0, a1, a0
|
; RV32I-NEXT: xor a0, a0, a1
|
||||||
; RV32I-NEXT: .LBB0_2:
|
; RV32I-NEXT: .LBB0_2:
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -52,9 +52,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: slti a1, a1, 0
|
; RV32IZbbNOZbt-NEXT: slti a1, a1, 0
|
||||||
; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2
|
; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2
|
||||||
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV32IZbbNOZbt-NEXT: srai a0, a0, 31
|
||||||
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
; RV32IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV32IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -70,14 +70,13 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
; RV32IZbbZbt-LABEL: func:
|
; RV32IZbbZbt-LABEL: func:
|
||||||
; RV32IZbbZbt: # %bb.0:
|
; RV32IZbbZbt: # %bb.0:
|
||||||
; RV32IZbbZbt-NEXT: add a2, a0, a1
|
; RV32IZbbZbt-NEXT: add a2, a0, a1
|
||||||
; RV32IZbbZbt-NEXT: slti a3, a2, 0
|
|
||||||
; RV32IZbbZbt-NEXT: lui a4, 524288
|
|
||||||
; RV32IZbbZbt-NEXT: addi a5, a4, -1
|
|
||||||
; RV32IZbbZbt-NEXT: cmov a3, a3, a5, a4
|
|
||||||
; RV32IZbbZbt-NEXT: slt a0, a2, a0
|
; RV32IZbbZbt-NEXT: slt a0, a2, a0
|
||||||
; RV32IZbbZbt-NEXT: slti a1, a1, 0
|
; RV32IZbbZbt-NEXT: slti a1, a1, 0
|
||||||
; RV32IZbbZbt-NEXT: xor a0, a1, a0
|
; RV32IZbbZbt-NEXT: xor a0, a1, a0
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a0, a3, a2
|
; RV32IZbbZbt-NEXT: srai a1, a2, 31
|
||||||
|
; RV32IZbbZbt-NEXT: lui a3, 524288
|
||||||
|
; RV32IZbbZbt-NEXT: xor a1, a1, a3
|
||||||
|
; RV32IZbbZbt-NEXT: cmov a0, a0, a1, a2
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
%tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %y);
|
%tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %y);
|
||||||
ret i32 %tmp;
|
ret i32 %tmp;
|
||||||
|
|
@ -98,11 +97,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV32I-NEXT: and a2, a3, a2
|
; RV32I-NEXT: and a2, a3, a2
|
||||||
; RV32I-NEXT: bgez a2, .LBB1_2
|
; RV32I-NEXT: bgez a2, .LBB1_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: slti a0, a1, 0
|
|
||||||
; RV32I-NEXT: lui a2, 524288
|
|
||||||
; RV32I-NEXT: sub a2, a2, a0
|
|
||||||
; RV32I-NEXT: srai a0, a1, 31
|
; RV32I-NEXT: srai a0, a1, 31
|
||||||
; RV32I-NEXT: mv a1, a2
|
; RV32I-NEXT: lui a1, 524288
|
||||||
|
; RV32I-NEXT: xor a1, a0, a1
|
||||||
; RV32I-NEXT: .LBB1_2:
|
; RV32I-NEXT: .LBB1_2:
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -114,10 +111,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV64I-NEXT: slti a1, a1, 0
|
; RV64I-NEXT: slti a1, a1, 0
|
||||||
; RV64I-NEXT: beq a1, a2, .LBB1_2
|
; RV64I-NEXT: beq a1, a2, .LBB1_2
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: slti a0, a0, 0
|
; RV64I-NEXT: srai a0, a0, 63
|
||||||
; RV64I-NEXT: addi a1, zero, -1
|
; RV64I-NEXT: addi a1, zero, -1
|
||||||
; RV64I-NEXT: slli a1, a1, 63
|
; RV64I-NEXT: slli a1, a1, 63
|
||||||
; RV64I-NEXT: sub a0, a1, a0
|
; RV64I-NEXT: xor a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB1_2:
|
; RV64I-NEXT: .LBB1_2:
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -134,11 +131,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: andn a2, a2, a3
|
; RV32IZbbNOZbt-NEXT: andn a2, a2, a3
|
||||||
; RV32IZbbNOZbt-NEXT: bgez a2, .LBB1_2
|
; RV32IZbbNOZbt-NEXT: bgez a2, .LBB1_2
|
||||||
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a1, 0
|
|
||||||
; RV32IZbbNOZbt-NEXT: lui a2, 524288
|
|
||||||
; RV32IZbbNOZbt-NEXT: sub a2, a2, a0
|
|
||||||
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
||||||
; RV32IZbbNOZbt-NEXT: mv a1, a2
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
|
; RV32IZbbNOZbt-NEXT: xor a1, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -150,10 +145,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV64IZbbNOZbt-NEXT: slti a1, a1, 0
|
; RV64IZbbNOZbt-NEXT: slti a1, a1, 0
|
||||||
; RV64IZbbNOZbt-NEXT: beq a1, a2, .LBB1_2
|
; RV64IZbbNOZbt-NEXT: beq a1, a2, .LBB1_2
|
||||||
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV64IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV64IZbbNOZbt-NEXT: srai a0, a0, 63
|
||||||
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
||||||
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
||||||
; RV64IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV64IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV64IZbbNOZbt-NEXT: ret
|
; RV64IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -163,31 +158,28 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV32IZbbZbt-NEXT: add a2, a0, a2
|
; RV32IZbbZbt-NEXT: add a2, a0, a2
|
||||||
; RV32IZbbZbt-NEXT: sltu a0, a2, a0
|
; RV32IZbbZbt-NEXT: sltu a0, a2, a0
|
||||||
; RV32IZbbZbt-NEXT: add a0, a4, a0
|
; RV32IZbbZbt-NEXT: add a0, a4, a0
|
||||||
; RV32IZbbZbt-NEXT: slti a4, a0, 0
|
; RV32IZbbZbt-NEXT: srai a4, a0, 31
|
||||||
; RV32IZbbZbt-NEXT: lui a6, 524288
|
; RV32IZbbZbt-NEXT: lui a5, 524288
|
||||||
; RV32IZbbZbt-NEXT: addi a5, a6, -1
|
; RV32IZbbZbt-NEXT: xor a6, a4, a5
|
||||||
; RV32IZbbZbt-NEXT: cmov a4, a4, a5, a6
|
|
||||||
; RV32IZbbZbt-NEXT: xor a5, a1, a0
|
; RV32IZbbZbt-NEXT: xor a5, a1, a0
|
||||||
; RV32IZbbZbt-NEXT: xor a1, a1, a3
|
; RV32IZbbZbt-NEXT: xor a1, a1, a3
|
||||||
; RV32IZbbZbt-NEXT: andn a1, a5, a1
|
; RV32IZbbZbt-NEXT: andn a1, a5, a1
|
||||||
; RV32IZbbZbt-NEXT: slti a3, a1, 0
|
; RV32IZbbZbt-NEXT: slti a3, a1, 0
|
||||||
; RV32IZbbZbt-NEXT: cmov a1, a3, a4, a0
|
; RV32IZbbZbt-NEXT: cmov a1, a3, a6, a0
|
||||||
; RV32IZbbZbt-NEXT: srai a0, a0, 31
|
; RV32IZbbZbt-NEXT: cmov a0, a3, a4, a2
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a3, a0, a2
|
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IZbbZbt-LABEL: func2:
|
; RV64IZbbZbt-LABEL: func2:
|
||||||
; RV64IZbbZbt: # %bb.0:
|
; RV64IZbbZbt: # %bb.0:
|
||||||
; RV64IZbbZbt-NEXT: add a2, a0, a1
|
; RV64IZbbZbt-NEXT: add a2, a0, a1
|
||||||
; RV64IZbbZbt-NEXT: slti a3, a2, 0
|
|
||||||
; RV64IZbbZbt-NEXT: addi a4, zero, -1
|
|
||||||
; RV64IZbbZbt-NEXT: slli a5, a4, 63
|
|
||||||
; RV64IZbbZbt-NEXT: srli a4, a4, 1
|
|
||||||
; RV64IZbbZbt-NEXT: cmov a3, a3, a4, a5
|
|
||||||
; RV64IZbbZbt-NEXT: slt a0, a2, a0
|
; RV64IZbbZbt-NEXT: slt a0, a2, a0
|
||||||
; RV64IZbbZbt-NEXT: slti a1, a1, 0
|
; RV64IZbbZbt-NEXT: slti a1, a1, 0
|
||||||
; RV64IZbbZbt-NEXT: xor a0, a1, a0
|
; RV64IZbbZbt-NEXT: xor a0, a1, a0
|
||||||
; RV64IZbbZbt-NEXT: cmov a0, a0, a3, a2
|
; RV64IZbbZbt-NEXT: srai a1, a2, 63
|
||||||
|
; RV64IZbbZbt-NEXT: addi a3, zero, -1
|
||||||
|
; RV64IZbbZbt-NEXT: slli a3, a3, 63
|
||||||
|
; RV64IZbbZbt-NEXT: xor a1, a1, a3
|
||||||
|
; RV64IZbbZbt-NEXT: cmov a0, a0, a1, a2
|
||||||
; RV64IZbbZbt-NEXT: ret
|
; RV64IZbbZbt-NEXT: ret
|
||||||
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y);
|
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y);
|
||||||
ret i64 %tmp;
|
ret i64 %tmp;
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; RV32I-NEXT: slti a1, a1, 0
|
; RV32I-NEXT: slti a1, a1, 0
|
||||||
; RV32I-NEXT: beq a1, a2, .LBB0_2
|
; RV32I-NEXT: beq a1, a2, .LBB0_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: slti a0, a0, 0
|
; RV32I-NEXT: srai a0, a0, 31
|
||||||
; RV32I-NEXT: lui a1, 524288
|
; RV32I-NEXT: lui a1, 524288
|
||||||
; RV32I-NEXT: sub a0, a1, a0
|
; RV32I-NEXT: xor a0, a0, a1
|
||||||
; RV32I-NEXT: .LBB0_2:
|
; RV32I-NEXT: .LBB0_2:
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -56,9 +56,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: slti a1, a1, 0
|
; RV32IZbbNOZbt-NEXT: slti a1, a1, 0
|
||||||
; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2
|
; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2
|
||||||
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV32IZbbNOZbt-NEXT: srai a0, a0, 31
|
||||||
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
; RV32IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV32IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -80,10 +80,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; RV32IZbbZbt-NEXT: slt a0, a2, a0
|
; RV32IZbbZbt-NEXT: slt a0, a2, a0
|
||||||
; RV32IZbbZbt-NEXT: slti a1, a1, 0
|
; RV32IZbbZbt-NEXT: slti a1, a1, 0
|
||||||
; RV32IZbbZbt-NEXT: xor a0, a1, a0
|
; RV32IZbbZbt-NEXT: xor a0, a1, a0
|
||||||
; RV32IZbbZbt-NEXT: slti a1, a2, 0
|
; RV32IZbbZbt-NEXT: srai a1, a2, 31
|
||||||
; RV32IZbbZbt-NEXT: lui a3, 524288
|
; RV32IZbbZbt-NEXT: lui a3, 524288
|
||||||
; RV32IZbbZbt-NEXT: addi a4, a3, -1
|
; RV32IZbbZbt-NEXT: xor a1, a1, a3
|
||||||
; RV32IZbbZbt-NEXT: cmov a1, a1, a4, a3
|
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a0, a1, a2
|
; RV32IZbbZbt-NEXT: cmov a0, a0, a1, a2
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
%a = mul i32 %y, %z
|
%a = mul i32 %y, %z
|
||||||
|
|
@ -106,11 +105,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV32I-NEXT: and a2, a2, a3
|
; RV32I-NEXT: and a2, a2, a3
|
||||||
; RV32I-NEXT: bgez a2, .LBB1_2
|
; RV32I-NEXT: bgez a2, .LBB1_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: slti a0, a1, 0
|
|
||||||
; RV32I-NEXT: lui a2, 524288
|
|
||||||
; RV32I-NEXT: sub a2, a2, a0
|
|
||||||
; RV32I-NEXT: srai a0, a1, 31
|
; RV32I-NEXT: srai a0, a1, 31
|
||||||
; RV32I-NEXT: mv a1, a2
|
; RV32I-NEXT: lui a1, 524288
|
||||||
|
; RV32I-NEXT: xor a1, a0, a1
|
||||||
; RV32I-NEXT: .LBB1_2:
|
; RV32I-NEXT: .LBB1_2:
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -122,10 +119,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV64I-NEXT: slti a2, a2, 0
|
; RV64I-NEXT: slti a2, a2, 0
|
||||||
; RV64I-NEXT: beq a2, a1, .LBB1_2
|
; RV64I-NEXT: beq a2, a1, .LBB1_2
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: slti a0, a0, 0
|
; RV64I-NEXT: srai a0, a0, 63
|
||||||
; RV64I-NEXT: addi a1, zero, -1
|
; RV64I-NEXT: addi a1, zero, -1
|
||||||
; RV64I-NEXT: slli a1, a1, 63
|
; RV64I-NEXT: slli a1, a1, 63
|
||||||
; RV64I-NEXT: sub a0, a1, a0
|
; RV64I-NEXT: xor a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB1_2:
|
; RV64I-NEXT: .LBB1_2:
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -142,11 +139,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: andn a2, a3, a2
|
; RV32IZbbNOZbt-NEXT: andn a2, a3, a2
|
||||||
; RV32IZbbNOZbt-NEXT: bgez a2, .LBB1_2
|
; RV32IZbbNOZbt-NEXT: bgez a2, .LBB1_2
|
||||||
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a1, 0
|
|
||||||
; RV32IZbbNOZbt-NEXT: lui a2, 524288
|
|
||||||
; RV32IZbbNOZbt-NEXT: sub a2, a2, a0
|
|
||||||
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
||||||
; RV32IZbbNOZbt-NEXT: mv a1, a2
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
|
; RV32IZbbNOZbt-NEXT: xor a1, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -158,10 +153,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV64IZbbNOZbt-NEXT: slti a2, a2, 0
|
; RV64IZbbNOZbt-NEXT: slti a2, a2, 0
|
||||||
; RV64IZbbNOZbt-NEXT: beq a2, a1, .LBB1_2
|
; RV64IZbbNOZbt-NEXT: beq a2, a1, .LBB1_2
|
||||||
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV64IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV64IZbbNOZbt-NEXT: srai a0, a0, 63
|
||||||
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
||||||
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
||||||
; RV64IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV64IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV64IZbbNOZbt-NEXT: ret
|
; RV64IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -171,31 +166,28 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV32IZbbZbt-NEXT: add a3, a0, a4
|
; RV32IZbbZbt-NEXT: add a3, a0, a4
|
||||||
; RV32IZbbZbt-NEXT: sltu a0, a3, a0
|
; RV32IZbbZbt-NEXT: sltu a0, a3, a0
|
||||||
; RV32IZbbZbt-NEXT: add a0, a2, a0
|
; RV32IZbbZbt-NEXT: add a0, a2, a0
|
||||||
; RV32IZbbZbt-NEXT: slti a2, a0, 0
|
; RV32IZbbZbt-NEXT: srai a2, a0, 31
|
||||||
; RV32IZbbZbt-NEXT: lui a6, 524288
|
; RV32IZbbZbt-NEXT: lui a4, 524288
|
||||||
; RV32IZbbZbt-NEXT: addi a4, a6, -1
|
; RV32IZbbZbt-NEXT: xor a6, a2, a4
|
||||||
; RV32IZbbZbt-NEXT: cmov a2, a2, a4, a6
|
|
||||||
; RV32IZbbZbt-NEXT: xor a4, a1, a0
|
; RV32IZbbZbt-NEXT: xor a4, a1, a0
|
||||||
; RV32IZbbZbt-NEXT: xor a1, a1, a5
|
; RV32IZbbZbt-NEXT: xor a1, a1, a5
|
||||||
; RV32IZbbZbt-NEXT: andn a1, a4, a1
|
; RV32IZbbZbt-NEXT: andn a1, a4, a1
|
||||||
; RV32IZbbZbt-NEXT: slti a4, a1, 0
|
; RV32IZbbZbt-NEXT: slti a4, a1, 0
|
||||||
; RV32IZbbZbt-NEXT: cmov a1, a4, a2, a0
|
; RV32IZbbZbt-NEXT: cmov a1, a4, a6, a0
|
||||||
; RV32IZbbZbt-NEXT: srai a0, a0, 31
|
; RV32IZbbZbt-NEXT: cmov a0, a4, a2, a3
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a4, a0, a3
|
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IZbbZbt-LABEL: func64:
|
; RV64IZbbZbt-LABEL: func64:
|
||||||
; RV64IZbbZbt: # %bb.0:
|
; RV64IZbbZbt: # %bb.0:
|
||||||
; RV64IZbbZbt-NEXT: add a1, a0, a2
|
; RV64IZbbZbt-NEXT: add a1, a0, a2
|
||||||
; RV64IZbbZbt-NEXT: slti a3, a1, 0
|
|
||||||
; RV64IZbbZbt-NEXT: addi a4, zero, -1
|
|
||||||
; RV64IZbbZbt-NEXT: slli a5, a4, 63
|
|
||||||
; RV64IZbbZbt-NEXT: srli a4, a4, 1
|
|
||||||
; RV64IZbbZbt-NEXT: cmov a3, a3, a4, a5
|
|
||||||
; RV64IZbbZbt-NEXT: slt a0, a1, a0
|
; RV64IZbbZbt-NEXT: slt a0, a1, a0
|
||||||
; RV64IZbbZbt-NEXT: slti a2, a2, 0
|
; RV64IZbbZbt-NEXT: slti a2, a2, 0
|
||||||
; RV64IZbbZbt-NEXT: xor a0, a2, a0
|
; RV64IZbbZbt-NEXT: xor a0, a2, a0
|
||||||
; RV64IZbbZbt-NEXT: cmov a0, a0, a3, a1
|
; RV64IZbbZbt-NEXT: srai a2, a1, 63
|
||||||
|
; RV64IZbbZbt-NEXT: addi a3, zero, -1
|
||||||
|
; RV64IZbbZbt-NEXT: slli a3, a3, 63
|
||||||
|
; RV64IZbbZbt-NEXT: xor a2, a2, a3
|
||||||
|
; RV64IZbbZbt-NEXT: cmov a0, a0, a2, a1
|
||||||
; RV64IZbbZbt-NEXT: ret
|
; RV64IZbbZbt-NEXT: ret
|
||||||
%a = mul i64 %y, %z
|
%a = mul i64 %y, %z
|
||||||
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z)
|
%tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z)
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
; RV32I-NEXT: slt a1, a0, a2
|
; RV32I-NEXT: slt a1, a0, a2
|
||||||
; RV32I-NEXT: beq a3, a1, .LBB0_2
|
; RV32I-NEXT: beq a3, a1, .LBB0_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: slti a0, a0, 0
|
; RV32I-NEXT: srai a0, a0, 31
|
||||||
; RV32I-NEXT: lui a1, 524288
|
; RV32I-NEXT: lui a1, 524288
|
||||||
; RV32I-NEXT: sub a0, a1, a0
|
; RV32I-NEXT: xor a0, a0, a1
|
||||||
; RV32I-NEXT: .LBB0_2:
|
; RV32I-NEXT: .LBB0_2:
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -52,9 +52,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: slt a1, a0, a2
|
; RV32IZbbNOZbt-NEXT: slt a1, a0, a2
|
||||||
; RV32IZbbNOZbt-NEXT: beq a3, a1, .LBB0_2
|
; RV32IZbbNOZbt-NEXT: beq a3, a1, .LBB0_2
|
||||||
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV32IZbbNOZbt-NEXT: srai a0, a0, 31
|
||||||
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
; RV32IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV32IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -73,10 +73,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
|
||||||
; RV32IZbbZbt-NEXT: sub a1, a0, a1
|
; RV32IZbbZbt-NEXT: sub a1, a0, a1
|
||||||
; RV32IZbbZbt-NEXT: slt a0, a1, a0
|
; RV32IZbbZbt-NEXT: slt a0, a1, a0
|
||||||
; RV32IZbbZbt-NEXT: xor a0, a2, a0
|
; RV32IZbbZbt-NEXT: xor a0, a2, a0
|
||||||
; RV32IZbbZbt-NEXT: slti a2, a1, 0
|
; RV32IZbbZbt-NEXT: srai a2, a1, 31
|
||||||
; RV32IZbbZbt-NEXT: lui a3, 524288
|
; RV32IZbbZbt-NEXT: lui a3, 524288
|
||||||
; RV32IZbbZbt-NEXT: addi a4, a3, -1
|
; RV32IZbbZbt-NEXT: xor a2, a2, a3
|
||||||
; RV32IZbbZbt-NEXT: cmov a2, a2, a4, a3
|
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a0, a2, a1
|
; RV32IZbbZbt-NEXT: cmov a0, a0, a2, a1
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
%tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %y);
|
%tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %y);
|
||||||
|
|
@ -98,11 +97,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV32I-NEXT: sub a0, a0, a2
|
; RV32I-NEXT: sub a0, a0, a2
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
; RV32I-NEXT: .LBB1_2:
|
; RV32I-NEXT: .LBB1_2:
|
||||||
; RV32I-NEXT: slti a0, a1, 0
|
|
||||||
; RV32I-NEXT: lui a2, 524288
|
|
||||||
; RV32I-NEXT: sub a2, a2, a0
|
|
||||||
; RV32I-NEXT: srai a0, a1, 31
|
; RV32I-NEXT: srai a0, a1, 31
|
||||||
; RV32I-NEXT: mv a1, a2
|
; RV32I-NEXT: lui a1, 524288
|
||||||
|
; RV32I-NEXT: xor a1, a0, a1
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64I-LABEL: func2:
|
; RV64I-LABEL: func2:
|
||||||
|
|
@ -113,10 +110,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV64I-NEXT: slt a1, a0, a2
|
; RV64I-NEXT: slt a1, a0, a2
|
||||||
; RV64I-NEXT: beq a3, a1, .LBB1_2
|
; RV64I-NEXT: beq a3, a1, .LBB1_2
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: slti a0, a0, 0
|
; RV64I-NEXT: srai a0, a0, 63
|
||||||
; RV64I-NEXT: addi a1, zero, -1
|
; RV64I-NEXT: addi a1, zero, -1
|
||||||
; RV64I-NEXT: slli a1, a1, 63
|
; RV64I-NEXT: slli a1, a1, 63
|
||||||
; RV64I-NEXT: sub a0, a1, a0
|
; RV64I-NEXT: xor a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB1_2:
|
; RV64I-NEXT: .LBB1_2:
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -134,11 +131,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: sub a0, a0, a2
|
; RV32IZbbNOZbt-NEXT: sub a0, a0, a2
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a1, 0
|
|
||||||
; RV32IZbbNOZbt-NEXT: lui a2, 524288
|
|
||||||
; RV32IZbbNOZbt-NEXT: sub a2, a2, a0
|
|
||||||
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
||||||
; RV32IZbbNOZbt-NEXT: mv a1, a2
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
|
; RV32IZbbNOZbt-NEXT: xor a1, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IZbbNOZbt-LABEL: func2:
|
; RV64IZbbNOZbt-LABEL: func2:
|
||||||
|
|
@ -149,10 +144,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV64IZbbNOZbt-NEXT: slt a1, a0, a2
|
; RV64IZbbNOZbt-NEXT: slt a1, a0, a2
|
||||||
; RV64IZbbNOZbt-NEXT: beq a3, a1, .LBB1_2
|
; RV64IZbbNOZbt-NEXT: beq a3, a1, .LBB1_2
|
||||||
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV64IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV64IZbbNOZbt-NEXT: srai a0, a0, 63
|
||||||
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
||||||
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
||||||
; RV64IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV64IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV64IZbbNOZbt-NEXT: ret
|
; RV64IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -161,18 +156,16 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV32IZbbZbt-NEXT: sltu a4, a0, a2
|
; RV32IZbbZbt-NEXT: sltu a4, a0, a2
|
||||||
; RV32IZbbZbt-NEXT: sub a5, a1, a3
|
; RV32IZbbZbt-NEXT: sub a5, a1, a3
|
||||||
; RV32IZbbZbt-NEXT: sub a4, a5, a4
|
; RV32IZbbZbt-NEXT: sub a4, a5, a4
|
||||||
; RV32IZbbZbt-NEXT: slti a7, a4, 0
|
; RV32IZbbZbt-NEXT: srai a6, a4, 31
|
||||||
; RV32IZbbZbt-NEXT: lui a6, 524288
|
; RV32IZbbZbt-NEXT: lui a5, 524288
|
||||||
; RV32IZbbZbt-NEXT: addi a5, a6, -1
|
; RV32IZbbZbt-NEXT: xor a7, a6, a5
|
||||||
; RV32IZbbZbt-NEXT: cmov a6, a7, a5, a6
|
|
||||||
; RV32IZbbZbt-NEXT: xor a5, a1, a4
|
; RV32IZbbZbt-NEXT: xor a5, a1, a4
|
||||||
; RV32IZbbZbt-NEXT: xor a1, a1, a3
|
; RV32IZbbZbt-NEXT: xor a1, a1, a3
|
||||||
; RV32IZbbZbt-NEXT: and a1, a1, a5
|
; RV32IZbbZbt-NEXT: and a1, a1, a5
|
||||||
; RV32IZbbZbt-NEXT: slti a3, a1, 0
|
; RV32IZbbZbt-NEXT: slti a3, a1, 0
|
||||||
; RV32IZbbZbt-NEXT: cmov a1, a3, a6, a4
|
; RV32IZbbZbt-NEXT: cmov a1, a3, a7, a4
|
||||||
; RV32IZbbZbt-NEXT: srai a4, a4, 31
|
|
||||||
; RV32IZbbZbt-NEXT: sub a0, a0, a2
|
; RV32IZbbZbt-NEXT: sub a0, a0, a2
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a3, a4, a0
|
; RV32IZbbZbt-NEXT: cmov a0, a3, a6, a0
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IZbbZbt-LABEL: func2:
|
; RV64IZbbZbt-LABEL: func2:
|
||||||
|
|
@ -181,11 +174,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; RV64IZbbZbt-NEXT: sub a1, a0, a1
|
; RV64IZbbZbt-NEXT: sub a1, a0, a1
|
||||||
; RV64IZbbZbt-NEXT: slt a0, a1, a0
|
; RV64IZbbZbt-NEXT: slt a0, a1, a0
|
||||||
; RV64IZbbZbt-NEXT: xor a0, a2, a0
|
; RV64IZbbZbt-NEXT: xor a0, a2, a0
|
||||||
; RV64IZbbZbt-NEXT: slti a2, a1, 0
|
; RV64IZbbZbt-NEXT: srai a2, a1, 63
|
||||||
; RV64IZbbZbt-NEXT: addi a3, zero, -1
|
; RV64IZbbZbt-NEXT: addi a3, zero, -1
|
||||||
; RV64IZbbZbt-NEXT: slli a4, a3, 63
|
; RV64IZbbZbt-NEXT: slli a3, a3, 63
|
||||||
; RV64IZbbZbt-NEXT: srli a3, a3, 1
|
; RV64IZbbZbt-NEXT: xor a2, a2, a3
|
||||||
; RV64IZbbZbt-NEXT: cmov a2, a2, a3, a4
|
|
||||||
; RV64IZbbZbt-NEXT: cmov a0, a0, a2, a1
|
; RV64IZbbZbt-NEXT: cmov a0, a0, a2, a1
|
||||||
; RV64IZbbZbt-NEXT: ret
|
; RV64IZbbZbt-NEXT: ret
|
||||||
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y);
|
%tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y);
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; RV32I-NEXT: slt a2, a0, a3
|
; RV32I-NEXT: slt a2, a0, a3
|
||||||
; RV32I-NEXT: beq a1, a2, .LBB0_2
|
; RV32I-NEXT: beq a1, a2, .LBB0_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: slti a0, a0, 0
|
; RV32I-NEXT: srai a0, a0, 31
|
||||||
; RV32I-NEXT: lui a1, 524288
|
; RV32I-NEXT: lui a1, 524288
|
||||||
; RV32I-NEXT: sub a0, a1, a0
|
; RV32I-NEXT: xor a0, a0, a1
|
||||||
; RV32I-NEXT: .LBB0_2:
|
; RV32I-NEXT: .LBB0_2:
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -56,9 +56,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: slt a2, a0, a3
|
; RV32IZbbNOZbt-NEXT: slt a2, a0, a3
|
||||||
; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2
|
; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2
|
||||||
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
; RV32IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV32IZbbNOZbt-NEXT: srai a0, a0, 31
|
||||||
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
; RV32IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV32IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
; RV32IZbbNOZbt-NEXT: .LBB0_2:
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -80,10 +80,9 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; RV32IZbbZbt-NEXT: sub a1, a0, a1
|
; RV32IZbbZbt-NEXT: sub a1, a0, a1
|
||||||
; RV32IZbbZbt-NEXT: slt a0, a1, a0
|
; RV32IZbbZbt-NEXT: slt a0, a1, a0
|
||||||
; RV32IZbbZbt-NEXT: xor a0, a2, a0
|
; RV32IZbbZbt-NEXT: xor a0, a2, a0
|
||||||
; RV32IZbbZbt-NEXT: slti a2, a1, 0
|
; RV32IZbbZbt-NEXT: srai a2, a1, 31
|
||||||
; RV32IZbbZbt-NEXT: lui a3, 524288
|
; RV32IZbbZbt-NEXT: lui a3, 524288
|
||||||
; RV32IZbbZbt-NEXT: addi a4, a3, -1
|
; RV32IZbbZbt-NEXT: xor a2, a2, a3
|
||||||
; RV32IZbbZbt-NEXT: cmov a2, a2, a4, a3
|
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a0, a2, a1
|
; RV32IZbbZbt-NEXT: cmov a0, a0, a2, a1
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
%a = mul i32 %y, %z
|
%a = mul i32 %y, %z
|
||||||
|
|
@ -106,11 +105,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV32I-NEXT: sub a0, a0, a4
|
; RV32I-NEXT: sub a0, a0, a4
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
; RV32I-NEXT: .LBB1_2:
|
; RV32I-NEXT: .LBB1_2:
|
||||||
; RV32I-NEXT: slti a0, a1, 0
|
|
||||||
; RV32I-NEXT: lui a2, 524288
|
|
||||||
; RV32I-NEXT: sub a2, a2, a0
|
|
||||||
; RV32I-NEXT: srai a0, a1, 31
|
; RV32I-NEXT: srai a0, a1, 31
|
||||||
; RV32I-NEXT: mv a1, a2
|
; RV32I-NEXT: lui a1, 524288
|
||||||
|
; RV32I-NEXT: xor a1, a0, a1
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64I-LABEL: func64:
|
; RV64I-LABEL: func64:
|
||||||
|
|
@ -121,10 +118,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV64I-NEXT: slt a1, a0, a1
|
; RV64I-NEXT: slt a1, a0, a1
|
||||||
; RV64I-NEXT: beq a3, a1, .LBB1_2
|
; RV64I-NEXT: beq a3, a1, .LBB1_2
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: # %bb.1:
|
||||||
; RV64I-NEXT: slti a0, a0, 0
|
; RV64I-NEXT: srai a0, a0, 63
|
||||||
; RV64I-NEXT: addi a1, zero, -1
|
; RV64I-NEXT: addi a1, zero, -1
|
||||||
; RV64I-NEXT: slli a1, a1, 63
|
; RV64I-NEXT: slli a1, a1, 63
|
||||||
; RV64I-NEXT: sub a0, a1, a0
|
; RV64I-NEXT: xor a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB1_2:
|
; RV64I-NEXT: .LBB1_2:
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -142,11 +139,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV32IZbbNOZbt-NEXT: sub a0, a0, a4
|
; RV32IZbbNOZbt-NEXT: sub a0, a0, a4
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
; RV32IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV32IZbbNOZbt-NEXT: slti a0, a1, 0
|
|
||||||
; RV32IZbbNOZbt-NEXT: lui a2, 524288
|
|
||||||
; RV32IZbbNOZbt-NEXT: sub a2, a2, a0
|
|
||||||
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
; RV32IZbbNOZbt-NEXT: srai a0, a1, 31
|
||||||
; RV32IZbbNOZbt-NEXT: mv a1, a2
|
; RV32IZbbNOZbt-NEXT: lui a1, 524288
|
||||||
|
; RV32IZbbNOZbt-NEXT: xor a1, a0, a1
|
||||||
; RV32IZbbNOZbt-NEXT: ret
|
; RV32IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IZbbNOZbt-LABEL: func64:
|
; RV64IZbbNOZbt-LABEL: func64:
|
||||||
|
|
@ -157,10 +152,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV64IZbbNOZbt-NEXT: slt a1, a0, a1
|
; RV64IZbbNOZbt-NEXT: slt a1, a0, a1
|
||||||
; RV64IZbbNOZbt-NEXT: beq a3, a1, .LBB1_2
|
; RV64IZbbNOZbt-NEXT: beq a3, a1, .LBB1_2
|
||||||
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
; RV64IZbbNOZbt-NEXT: # %bb.1:
|
||||||
; RV64IZbbNOZbt-NEXT: slti a0, a0, 0
|
; RV64IZbbNOZbt-NEXT: srai a0, a0, 63
|
||||||
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
; RV64IZbbNOZbt-NEXT: addi a1, zero, -1
|
||||||
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
; RV64IZbbNOZbt-NEXT: slli a1, a1, 63
|
||||||
; RV64IZbbNOZbt-NEXT: sub a0, a1, a0
|
; RV64IZbbNOZbt-NEXT: xor a0, a0, a1
|
||||||
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
; RV64IZbbNOZbt-NEXT: .LBB1_2:
|
||||||
; RV64IZbbNOZbt-NEXT: ret
|
; RV64IZbbNOZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
|
|
@ -169,18 +164,16 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV32IZbbZbt-NEXT: sltu a2, a0, a4
|
; RV32IZbbZbt-NEXT: sltu a2, a0, a4
|
||||||
; RV32IZbbZbt-NEXT: sub a3, a1, a5
|
; RV32IZbbZbt-NEXT: sub a3, a1, a5
|
||||||
; RV32IZbbZbt-NEXT: sub a2, a3, a2
|
; RV32IZbbZbt-NEXT: sub a2, a3, a2
|
||||||
; RV32IZbbZbt-NEXT: slti a7, a2, 0
|
; RV32IZbbZbt-NEXT: srai a6, a2, 31
|
||||||
; RV32IZbbZbt-NEXT: lui a6, 524288
|
; RV32IZbbZbt-NEXT: lui a3, 524288
|
||||||
; RV32IZbbZbt-NEXT: addi a3, a6, -1
|
; RV32IZbbZbt-NEXT: xor a7, a6, a3
|
||||||
; RV32IZbbZbt-NEXT: cmov a6, a7, a3, a6
|
|
||||||
; RV32IZbbZbt-NEXT: xor a3, a1, a2
|
; RV32IZbbZbt-NEXT: xor a3, a1, a2
|
||||||
; RV32IZbbZbt-NEXT: xor a1, a1, a5
|
; RV32IZbbZbt-NEXT: xor a1, a1, a5
|
||||||
; RV32IZbbZbt-NEXT: and a1, a1, a3
|
; RV32IZbbZbt-NEXT: and a1, a1, a3
|
||||||
; RV32IZbbZbt-NEXT: slti a3, a1, 0
|
; RV32IZbbZbt-NEXT: slti a3, a1, 0
|
||||||
; RV32IZbbZbt-NEXT: cmov a1, a3, a6, a2
|
; RV32IZbbZbt-NEXT: cmov a1, a3, a7, a2
|
||||||
; RV32IZbbZbt-NEXT: srai a2, a2, 31
|
|
||||||
; RV32IZbbZbt-NEXT: sub a0, a0, a4
|
; RV32IZbbZbt-NEXT: sub a0, a0, a4
|
||||||
; RV32IZbbZbt-NEXT: cmov a0, a3, a2, a0
|
; RV32IZbbZbt-NEXT: cmov a0, a3, a6, a0
|
||||||
; RV32IZbbZbt-NEXT: ret
|
; RV32IZbbZbt-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IZbbZbt-LABEL: func64:
|
; RV64IZbbZbt-LABEL: func64:
|
||||||
|
|
@ -189,11 +182,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; RV64IZbbZbt-NEXT: sub a2, a0, a2
|
; RV64IZbbZbt-NEXT: sub a2, a0, a2
|
||||||
; RV64IZbbZbt-NEXT: slt a0, a2, a0
|
; RV64IZbbZbt-NEXT: slt a0, a2, a0
|
||||||
; RV64IZbbZbt-NEXT: xor a0, a1, a0
|
; RV64IZbbZbt-NEXT: xor a0, a1, a0
|
||||||
; RV64IZbbZbt-NEXT: slti a1, a2, 0
|
; RV64IZbbZbt-NEXT: srai a1, a2, 63
|
||||||
; RV64IZbbZbt-NEXT: addi a3, zero, -1
|
; RV64IZbbZbt-NEXT: addi a3, zero, -1
|
||||||
; RV64IZbbZbt-NEXT: slli a4, a3, 63
|
; RV64IZbbZbt-NEXT: slli a3, a3, 63
|
||||||
; RV64IZbbZbt-NEXT: srli a3, a3, 1
|
; RV64IZbbZbt-NEXT: xor a1, a1, a3
|
||||||
; RV64IZbbZbt-NEXT: cmov a1, a1, a3, a4
|
|
||||||
; RV64IZbbZbt-NEXT: cmov a0, a0, a1, a2
|
; RV64IZbbZbt-NEXT: cmov a0, a0, a1, a2
|
||||||
; RV64IZbbZbt-NEXT: ret
|
; RV64IZbbZbt-NEXT: ret
|
||||||
%a = mul i64 %y, %z
|
%a = mul i64 %y, %z
|
||||||
|
|
|
||||||
|
|
@ -60,20 +60,14 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
||||||
; CHECK-NEXT: cmp r5, #0
|
; CHECK-NEXT: cmp r5, #0
|
||||||
; CHECK-NEXT: it ne
|
; CHECK-NEXT: it ne
|
||||||
; CHECK-NEXT: asrne r3, r1, #31
|
; CHECK-NEXT: asrne r3, r1, #31
|
||||||
; CHECK-NEXT: cmp r0, #0
|
|
||||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||||
; CHECK-NEXT: cset r2, mi
|
|
||||||
; CHECK-NEXT: mvn r3, #-2147483648
|
|
||||||
; CHECK-NEXT: cmp r2, #0
|
|
||||||
; CHECK-NEXT: cinv r2, r3, eq
|
|
||||||
; CHECK-NEXT: cmp.w r12, #0
|
; CHECK-NEXT: cmp.w r12, #0
|
||||||
; CHECK-NEXT: csel r0, r2, r0, ne
|
; CHECK-NEXT: mov.w r2, #-2147483648
|
||||||
; CHECK-NEXT: cmp r1, #0
|
; CHECK-NEXT: it ne
|
||||||
; CHECK-NEXT: cset r2, mi
|
; CHECK-NEXT: eorne.w r0, r2, r0, asr #31
|
||||||
; CHECK-NEXT: cmp r2, #0
|
|
||||||
; CHECK-NEXT: cinv r2, r3, eq
|
|
||||||
; CHECK-NEXT: cmp r5, #0
|
; CHECK-NEXT: cmp r5, #0
|
||||||
; CHECK-NEXT: csel r1, r2, r1, ne
|
; CHECK-NEXT: it ne
|
||||||
|
; CHECK-NEXT: eorne.w r1, r2, r1, asr #31
|
||||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -183,40 +177,34 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2)
|
||||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||||
; CHECK-NEXT: vmov r0, r1, d3
|
; CHECK-NEXT: vmov r0, r1, d3
|
||||||
; CHECK-NEXT: vmov r2, r3, d1
|
; CHECK-NEXT: vmov r2, r3, d1
|
||||||
; CHECK-NEXT: subs r0, r2, r0
|
; CHECK-NEXT: subs r2, r2, r0
|
||||||
; CHECK-NEXT: eor.w r12, r3, r1
|
; CHECK-NEXT: eor.w r12, r3, r1
|
||||||
; CHECK-NEXT: sbc.w r1, r3, r1
|
; CHECK-NEXT: sbc.w r0, r3, r1
|
||||||
; CHECK-NEXT: eor.w r2, r3, r1
|
; CHECK-NEXT: eor.w r1, r3, r0
|
||||||
; CHECK-NEXT: vmov r3, r4, d0
|
; CHECK-NEXT: vmov r3, r4, d0
|
||||||
; CHECK-NEXT: ands.w r2, r2, r12
|
; CHECK-NEXT: ands.w r1, r1, r12
|
||||||
; CHECK-NEXT: vmov lr, r2, d2
|
; CHECK-NEXT: vmov lr, r1, d2
|
||||||
; CHECK-NEXT: cset r12, mi
|
; CHECK-NEXT: cset r12, mi
|
||||||
; CHECK-NEXT: cmp.w r12, #0
|
; CHECK-NEXT: cmp.w r12, #0
|
||||||
; CHECK-NEXT: it ne
|
; CHECK-NEXT: it ne
|
||||||
; CHECK-NEXT: asrne r0, r1, #31
|
; CHECK-NEXT: asrne r2, r0, #31
|
||||||
; CHECK-NEXT: subs.w r3, r3, lr
|
; CHECK-NEXT: subs.w r3, r3, lr
|
||||||
; CHECK-NEXT: eor.w r5, r4, r2
|
; CHECK-NEXT: eor.w r5, r4, r1
|
||||||
; CHECK-NEXT: sbc.w r2, r4, r2
|
; CHECK-NEXT: sbc.w r1, r4, r1
|
||||||
; CHECK-NEXT: eors r4, r2
|
; CHECK-NEXT: eors r4, r1
|
||||||
; CHECK-NEXT: ands r5, r4
|
; CHECK-NEXT: ands r5, r4
|
||||||
; CHECK-NEXT: cset r5, mi
|
; CHECK-NEXT: cset r5, mi
|
||||||
; CHECK-NEXT: cmp r5, #0
|
; CHECK-NEXT: cmp r5, #0
|
||||||
; CHECK-NEXT: it ne
|
; CHECK-NEXT: it ne
|
||||||
; CHECK-NEXT: asrne r3, r2, #31
|
; CHECK-NEXT: asrne r3, r1, #31
|
||||||
; CHECK-NEXT: cmp r1, #0
|
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
|
||||||
; CHECK-NEXT: vmov q0[2], q0[0], r3, r0
|
|
||||||
; CHECK-NEXT: cset r0, mi
|
|
||||||
; CHECK-NEXT: mvn r3, #-2147483648
|
|
||||||
; CHECK-NEXT: cmp r0, #0
|
|
||||||
; CHECK-NEXT: cinv r0, r3, eq
|
|
||||||
; CHECK-NEXT: cmp.w r12, #0
|
; CHECK-NEXT: cmp.w r12, #0
|
||||||
; CHECK-NEXT: csel r0, r0, r1, ne
|
; CHECK-NEXT: mov.w r2, #-2147483648
|
||||||
; CHECK-NEXT: cmp r2, #0
|
; CHECK-NEXT: it ne
|
||||||
; CHECK-NEXT: cset r1, mi
|
; CHECK-NEXT: eorne.w r0, r2, r0, asr #31
|
||||||
; CHECK-NEXT: cmp r1, #0
|
|
||||||
; CHECK-NEXT: cinv r1, r3, eq
|
|
||||||
; CHECK-NEXT: cmp r5, #0
|
; CHECK-NEXT: cmp r5, #0
|
||||||
; CHECK-NEXT: csel r1, r1, r2, ne
|
; CHECK-NEXT: it ne
|
||||||
|
; CHECK-NEXT: eorne.w r1, r2, r1, asr #31
|
||||||
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
|
||||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||||
entry:
|
entry:
|
||||||
|
|
|
||||||
|
|
@ -77,11 +77,10 @@ define <8 x i16> @combine_constfold_undef_v8i16() {
|
||||||
define i32 @combine_constant_i32(i32 %a0) {
|
define i32 @combine_constant_i32(i32 %a0) {
|
||||||
; CHECK-LABEL: combine_constant_i32:
|
; CHECK-LABEL: combine_constant_i32:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: xorl %eax, %eax
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; CHECK-NEXT: movl %edi, %ecx
|
; CHECK-NEXT: leal 1(%rdi), %eax
|
||||||
; CHECK-NEXT: incl %ecx
|
; CHECK-NEXT: sarl $31, %eax
|
||||||
; CHECK-NEXT: setns %al
|
; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000
|
||||||
; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
|
|
||||||
; CHECK-NEXT: incl %edi
|
; CHECK-NEXT: incl %edi
|
||||||
; CHECK-NEXT: cmovnol %edi, %eax
|
; CHECK-NEXT: cmovnol %edi, %eax
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
|
@ -125,13 +124,13 @@ define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) {
|
||||||
define i32 @combine_no_overflow_i32(i32 %a0, i32 %a1) {
|
define i32 @combine_no_overflow_i32(i32 %a0, i32 %a1) {
|
||||||
; CHECK-LABEL: combine_no_overflow_i32:
|
; CHECK-LABEL: combine_no_overflow_i32:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
|
||||||
|
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; CHECK-NEXT: sarl $16, %edi
|
; CHECK-NEXT: sarl $16, %edi
|
||||||
; CHECK-NEXT: shrl $16, %esi
|
; CHECK-NEXT: shrl $16, %esi
|
||||||
; CHECK-NEXT: xorl %eax, %eax
|
; CHECK-NEXT: leal (%rdi,%rsi), %eax
|
||||||
; CHECK-NEXT: movl %edi, %ecx
|
; CHECK-NEXT: sarl $31, %eax
|
||||||
; CHECK-NEXT: addl %esi, %ecx
|
; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000
|
||||||
; CHECK-NEXT: setns %al
|
|
||||||
; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
|
|
||||||
; CHECK-NEXT: addl %edi, %esi
|
; CHECK-NEXT: addl %edi, %esi
|
||||||
; CHECK-NEXT: cmovnol %esi, %eax
|
; CHECK-NEXT: cmovnol %esi, %eax
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
|
|
||||||
|
|
@ -12,26 +12,22 @@ declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
|
||||||
define i32 @func(i32 %x, i32 %y) nounwind {
|
define i32 @func(i32 %x, i32 %y) nounwind {
|
||||||
; X86-LABEL: func:
|
; X86-LABEL: func:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %esi
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: leal (%eax,%ecx), %edx
|
||||||
; X86-NEXT: movl %eax, %esi
|
; X86-NEXT: sarl $31, %edx
|
||||||
; X86-NEXT: addl %edx, %esi
|
; X86-NEXT: xorl $-2147483648, %edx # imm = 0x80000000
|
||||||
; X86-NEXT: setns %cl
|
; X86-NEXT: addl %ecx, %eax
|
||||||
; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
|
; X86-NEXT: cmovol %edx, %eax
|
||||||
; X86-NEXT: addl %edx, %eax
|
|
||||||
; X86-NEXT: cmovol %ecx, %eax
|
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func:
|
; X64-LABEL: func:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: xorl %eax, %eax
|
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||||
; X64-NEXT: movl %edi, %ecx
|
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; X64-NEXT: addl %esi, %ecx
|
; X64-NEXT: leal (%rdi,%rsi), %eax
|
||||||
; X64-NEXT: setns %al
|
; X64-NEXT: sarl $31, %eax
|
||||||
; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
|
; X64-NEXT: xorl $-2147483648, %eax # imm = 0x80000000
|
||||||
; X64-NEXT: addl %esi, %edi
|
; X64-NEXT: addl %esi, %edi
|
||||||
; X64-NEXT: cmovnol %edi, %eax
|
; X64-NEXT: cmovnol %edi, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
|
@ -43,34 +39,27 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; X86-LABEL: func2:
|
; X86-LABEL: func2:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %ebx
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X86-NEXT: seto %bl
|
; X86-NEXT: seto %bl
|
||||||
; X86-NEXT: movl %esi, %eax
|
; X86-NEXT: movl %ecx, %edx
|
||||||
; X86-NEXT: sarl $31, %eax
|
; X86-NEXT: sarl $31, %edx
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %ecx, %eax
|
; X86-NEXT: cmovnel %edx, %eax
|
||||||
; X86-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl $-2147483648, %edx # imm = 0x80000000
|
||||||
; X86-NEXT: testl %esi, %esi
|
|
||||||
; X86-NEXT: setns %dl
|
|
||||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %esi, %edx
|
; X86-NEXT: cmovel %ecx, %edx
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func2:
|
; X64-LABEL: func2:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: xorl %ecx, %ecx
|
; X64-NEXT: leaq (%rdi,%rsi), %rcx
|
||||||
; X64-NEXT: movq %rdi, %rax
|
; X64-NEXT: sarq $63, %rcx
|
||||||
; X64-NEXT: addq %rsi, %rax
|
; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||||
; X64-NEXT: setns %cl
|
; X64-NEXT: xorq %rcx, %rax
|
||||||
; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
|
|
||||||
; X64-NEXT: addq %rcx, %rax
|
|
||||||
; X64-NEXT: addq %rsi, %rdi
|
; X64-NEXT: addq %rsi, %rdi
|
||||||
; X64-NEXT: cmovnoq %rdi, %rax
|
; X64-NEXT: cmovnoq %rdi, %rax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
|
@ -81,27 +70,26 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
|
define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
|
||||||
; X86-LABEL: func16:
|
; X86-LABEL: func16:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %esi
|
|
||||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: movl %eax, %edx
|
||||||
; X86-NEXT: movl %eax, %esi
|
; X86-NEXT: addw %cx, %dx
|
||||||
; X86-NEXT: addw %dx, %si
|
; X86-NEXT: movswl %dx, %edx
|
||||||
; X86-NEXT: setns %cl
|
; X86-NEXT: sarl $15, %edx
|
||||||
; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF
|
; X86-NEXT: xorl $-32768, %edx # imm = 0x8000
|
||||||
; X86-NEXT: addw %dx, %ax
|
; X86-NEXT: addw %cx, %ax
|
||||||
; X86-NEXT: cmovol %ecx, %eax
|
; X86-NEXT: cmovol %edx, %eax
|
||||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func16:
|
; X64-LABEL: func16:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: xorl %eax, %eax
|
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||||
; X64-NEXT: movl %edi, %ecx
|
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; X64-NEXT: addw %si, %cx
|
; X64-NEXT: leal (%rdi,%rsi), %eax
|
||||||
; X64-NEXT: setns %al
|
; X64-NEXT: cwtl
|
||||||
; X64-NEXT: addl $32767, %eax # imm = 0x7FFF
|
; X64-NEXT: sarl $15, %eax
|
||||||
|
; X64-NEXT: xorl $-32768, %eax # imm = 0x8000
|
||||||
; X64-NEXT: addw %si, %di
|
; X64-NEXT: addw %si, %di
|
||||||
; X64-NEXT: cmovnol %edi, %eax
|
; X64-NEXT: cmovnol %edi, %eax
|
||||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||||
|
|
@ -114,28 +102,29 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
|
||||||
; X86-LABEL: func8:
|
; X86-LABEL: func8:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: movl %eax, %edx
|
||||||
; X86-NEXT: movb %al, %ah
|
; X86-NEXT: addb %cl, %dl
|
||||||
; X86-NEXT: addb %dl, %ah
|
; X86-NEXT: sarb $7, %dl
|
||||||
; X86-NEXT: setns %cl
|
; X86-NEXT: xorb $-128, %dl
|
||||||
; X86-NEXT: addl $127, %ecx
|
; X86-NEXT: addb %cl, %al
|
||||||
; X86-NEXT: addb %dl, %al
|
; X86-NEXT: movzbl %al, %ecx
|
||||||
; X86-NEXT: movzbl %al, %eax
|
; X86-NEXT: movzbl %dl, %eax
|
||||||
; X86-NEXT: cmovol %ecx, %eax
|
; X86-NEXT: cmovnol %ecx, %eax
|
||||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func8:
|
; X64-LABEL: func8:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: xorl %ecx, %ecx
|
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||||
; X64-NEXT: movl %edi, %eax
|
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; X64-NEXT: addb %sil, %al
|
; X64-NEXT: leal (%rdi,%rsi), %eax
|
||||||
; X64-NEXT: setns %cl
|
; X64-NEXT: sarb $7, %al
|
||||||
; X64-NEXT: addl $127, %ecx
|
; X64-NEXT: xorb $-128, %al
|
||||||
; X64-NEXT: addb %sil, %dil
|
; X64-NEXT: addb %sil, %dil
|
||||||
; X64-NEXT: movzbl %dil, %eax
|
; X64-NEXT: movzbl %dil, %ecx
|
||||||
; X64-NEXT: cmovol %ecx, %eax
|
; X64-NEXT: movzbl %al, %eax
|
||||||
|
; X64-NEXT: cmovnol %ecx, %eax
|
||||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y)
|
%tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y)
|
||||||
|
|
@ -176,72 +165,59 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
|
||||||
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||||
; X86-LABEL: vec:
|
; X86-LABEL: vec:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %ebp
|
|
||||||
; X86-NEXT: pushl %ebx
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NEXT: pushl %edi
|
; X86-NEXT: pushl %edi
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-NEXT: xorl %eax, %eax
|
|
||||||
; X86-NEXT: movl %ecx, %esi
|
|
||||||
; X86-NEXT: addl %edx, %esi
|
|
||||||
; X86-NEXT: setns %al
|
|
||||||
; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
|
|
||||||
; X86-NEXT: addl %edx, %ecx
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-NEXT: cmovol %eax, %ecx
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X86-NEXT: xorl %eax, %eax
|
|
||||||
; X86-NEXT: movl %edx, %edi
|
|
||||||
; X86-NEXT: addl %esi, %edi
|
|
||||||
; X86-NEXT: setns %al
|
|
||||||
; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
|
|
||||||
; X86-NEXT: addl %esi, %edx
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X86-NEXT: cmovol %eax, %edx
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; X86-NEXT: xorl %eax, %eax
|
|
||||||
; X86-NEXT: movl %esi, %ebx
|
|
||||||
; X86-NEXT: addl %edi, %ebx
|
|
||||||
; X86-NEXT: setns %al
|
|
||||||
; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
|
|
||||||
; X86-NEXT: addl %edi, %esi
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; X86-NEXT: cmovol %eax, %esi
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: xorl %ebx, %ebx
|
; X86-NEXT: leal (%ecx,%eax), %esi
|
||||||
; X86-NEXT: movl %edi, %ebp
|
; X86-NEXT: sarl $31, %esi
|
||||||
; X86-NEXT: addl %eax, %ebp
|
; X86-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
|
||||||
; X86-NEXT: setns %bl
|
; X86-NEXT: addl %eax, %ecx
|
||||||
; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; X86-NEXT: cmovol %esi, %ecx
|
||||||
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-NEXT: leal (%edx,%eax), %esi
|
||||||
|
; X86-NEXT: sarl $31, %esi
|
||||||
|
; X86-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
|
||||||
|
; X86-NEXT: addl %eax, %edx
|
||||||
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
|
; X86-NEXT: cmovol %esi, %edx
|
||||||
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-NEXT: leal (%edi,%eax), %esi
|
||||||
|
; X86-NEXT: sarl $31, %esi
|
||||||
|
; X86-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
|
||||||
; X86-NEXT: addl %eax, %edi
|
; X86-NEXT: addl %eax, %edi
|
||||||
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||||
|
; X86-NEXT: cmovol %esi, %edi
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: cmovol %ebx, %edi
|
; X86-NEXT: leal (%ebx,%eax), %esi
|
||||||
|
; X86-NEXT: sarl $31, %esi
|
||||||
|
; X86-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
|
||||||
|
; X86-NEXT: addl %eax, %ebx
|
||||||
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-NEXT: cmovol %esi, %ebx
|
||||||
; X86-NEXT: movl %ecx, 12(%eax)
|
; X86-NEXT: movl %ecx, 12(%eax)
|
||||||
; X86-NEXT: movl %edx, 8(%eax)
|
; X86-NEXT: movl %edx, 8(%eax)
|
||||||
; X86-NEXT: movl %esi, 4(%eax)
|
; X86-NEXT: movl %edi, 4(%eax)
|
||||||
; X86-NEXT: movl %edi, (%eax)
|
; X86-NEXT: movl %ebx, (%eax)
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: popl %edi
|
; X86-NEXT: popl %edi
|
||||||
; X86-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
; X86-NEXT: popl %ebp
|
|
||||||
; X86-NEXT: retl $4
|
; X86-NEXT: retl $4
|
||||||
;
|
;
|
||||||
; X64-LABEL: vec:
|
; X64-LABEL: vec:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: pxor %xmm2, %xmm2
|
; X64-NEXT: pxor %xmm2, %xmm2
|
||||||
; X64-NEXT: pxor %xmm3, %xmm3
|
; X64-NEXT: pcmpgtd %xmm1, %xmm2
|
||||||
; X64-NEXT: pcmpgtd %xmm1, %xmm3
|
|
||||||
; X64-NEXT: paddd %xmm0, %xmm1
|
; X64-NEXT: paddd %xmm0, %xmm1
|
||||||
; X64-NEXT: pcmpgtd %xmm1, %xmm0
|
; X64-NEXT: pcmpgtd %xmm1, %xmm0
|
||||||
; X64-NEXT: pxor %xmm3, %xmm0
|
; X64-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-NEXT: movdqa %xmm1, %xmm3
|
; X64-NEXT: movdqa %xmm0, %xmm2
|
||||||
; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
; X64-NEXT: pandn %xmm1, %xmm2
|
||||||
; X64-NEXT: pcmpgtd %xmm1, %xmm2
|
; X64-NEXT: psrad $31, %xmm1
|
||||||
; X64-NEXT: psrld $1, %xmm2
|
; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||||
; X64-NEXT: por %xmm3, %xmm2
|
; X64-NEXT: pand %xmm1, %xmm0
|
||||||
; X64-NEXT: pand %xmm0, %xmm2
|
|
||||||
; X64-NEXT: pandn %xmm1, %xmm0
|
|
||||||
; X64-NEXT: por %xmm2, %xmm0
|
; X64-NEXT: por %xmm2, %xmm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%tmp = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y);
|
%tmp = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y);
|
||||||
|
|
|
||||||
|
|
@ -11,28 +11,24 @@ declare i64 @llvm.sadd.sat.i64(i64, i64)
|
||||||
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
; X86-LABEL: func32:
|
; X86-LABEL: func32:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %esi
|
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: leal (%eax,%ecx), %edx
|
||||||
; X86-NEXT: movl %eax, %esi
|
; X86-NEXT: sarl $31, %edx
|
||||||
; X86-NEXT: addl %edx, %esi
|
; X86-NEXT: xorl $-2147483648, %edx # imm = 0x80000000
|
||||||
; X86-NEXT: setns %cl
|
; X86-NEXT: addl %ecx, %eax
|
||||||
; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
|
; X86-NEXT: cmovol %edx, %eax
|
||||||
; X86-NEXT: addl %edx, %eax
|
|
||||||
; X86-NEXT: cmovol %ecx, %eax
|
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func32:
|
; X64-LABEL: func32:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
|
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||||
|
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; X64-NEXT: imull %edx, %esi
|
; X64-NEXT: imull %edx, %esi
|
||||||
; X64-NEXT: xorl %eax, %eax
|
; X64-NEXT: leal (%rdi,%rsi), %eax
|
||||||
; X64-NEXT: movl %edi, %ecx
|
; X64-NEXT: sarl $31, %eax
|
||||||
; X64-NEXT: addl %esi, %ecx
|
; X64-NEXT: xorl $-2147483648, %eax # imm = 0x80000000
|
||||||
; X64-NEXT: setns %al
|
|
||||||
; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
|
|
||||||
; X64-NEXT: addl %edi, %esi
|
; X64-NEXT: addl %edi, %esi
|
||||||
; X64-NEXT: cmovnol %esi, %eax
|
; X64-NEXT: cmovnol %esi, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
|
@ -45,34 +41,27 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; X86-LABEL: func64:
|
; X86-LABEL: func64:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %ebx
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X86-NEXT: seto %bl
|
; X86-NEXT: seto %bl
|
||||||
; X86-NEXT: movl %esi, %eax
|
; X86-NEXT: movl %ecx, %edx
|
||||||
; X86-NEXT: sarl $31, %eax
|
; X86-NEXT: sarl $31, %edx
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %ecx, %eax
|
; X86-NEXT: cmovnel %edx, %eax
|
||||||
; X86-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl $-2147483648, %edx # imm = 0x80000000
|
||||||
; X86-NEXT: testl %esi, %esi
|
|
||||||
; X86-NEXT: setns %dl
|
|
||||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %esi, %edx
|
; X86-NEXT: cmovel %ecx, %edx
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func64:
|
; X64-LABEL: func64:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: xorl %ecx, %ecx
|
; X64-NEXT: leaq (%rdi,%rdx), %rcx
|
||||||
; X64-NEXT: movq %rdi, %rax
|
; X64-NEXT: sarq $63, %rcx
|
||||||
; X64-NEXT: addq %rdx, %rax
|
; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||||
; X64-NEXT: setns %cl
|
; X64-NEXT: xorq %rcx, %rax
|
||||||
; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
|
|
||||||
; X64-NEXT: addq %rcx, %rax
|
|
||||||
; X64-NEXT: addq %rdx, %rdi
|
; X64-NEXT: addq %rdx, %rdi
|
||||||
; X64-NEXT: cmovnoq %rdi, %rax
|
; X64-NEXT: cmovnoq %rdi, %rax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
|
@ -84,31 +73,30 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind {
|
define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind {
|
||||||
; X86-LABEL: func16:
|
; X86-LABEL: func16:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %esi
|
|
||||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: imulw {{[0-9]+}}(%esp), %ax
|
; X86-NEXT: imulw {{[0-9]+}}(%esp), %ax
|
||||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: movl %eax, %edx
|
||||||
; X86-NEXT: movl %eax, %esi
|
; X86-NEXT: addw %cx, %dx
|
||||||
; X86-NEXT: addw %dx, %si
|
; X86-NEXT: movswl %dx, %edx
|
||||||
; X86-NEXT: setns %cl
|
; X86-NEXT: sarl $15, %edx
|
||||||
; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF
|
; X86-NEXT: xorl $-32768, %edx # imm = 0x8000
|
||||||
; X86-NEXT: addw %dx, %ax
|
; X86-NEXT: addw %cx, %ax
|
||||||
; X86-NEXT: cmovol %ecx, %eax
|
; X86-NEXT: cmovol %edx, %eax
|
||||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func16:
|
; X64-LABEL: func16:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
|
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
||||||
|
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; X64-NEXT: imull %edx, %esi
|
; X64-NEXT: imull %edx, %esi
|
||||||
; X64-NEXT: xorl %eax, %eax
|
; X64-NEXT: leal (%rdi,%rsi), %eax
|
||||||
; X64-NEXT: movl %edi, %ecx
|
; X64-NEXT: cwtl
|
||||||
; X64-NEXT: addw %si, %cx
|
; X64-NEXT: sarl $15, %eax
|
||||||
; X64-NEXT: setns %al
|
; X64-NEXT: xorl $-32768, %eax # imm = 0x8000
|
||||||
; X64-NEXT: addl $32767, %eax # imm = 0x7FFF
|
; X64-NEXT: addw %di, %si
|
||||||
; X64-NEXT: addw %si, %di
|
; X64-NEXT: cmovnol %esi, %eax
|
||||||
; X64-NEXT: cmovnol %edi, %eax
|
|
||||||
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
; X64-NEXT: # kill: def $ax killed $ax killed $eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%a = mul i16 %y, %z
|
%a = mul i16 %y, %z
|
||||||
|
|
@ -121,31 +109,32 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||||
; X86-NEXT: mulb {{[0-9]+}}(%esp)
|
; X86-NEXT: mulb {{[0-9]+}}(%esp)
|
||||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
|
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: movl %eax, %edx
|
||||||
; X86-NEXT: movb %al, %ah
|
; X86-NEXT: addb %cl, %dl
|
||||||
; X86-NEXT: addb %dl, %ah
|
; X86-NEXT: sarb $7, %dl
|
||||||
; X86-NEXT: setns %cl
|
; X86-NEXT: xorb $-128, %dl
|
||||||
; X86-NEXT: addl $127, %ecx
|
; X86-NEXT: addb %cl, %al
|
||||||
; X86-NEXT: addb %dl, %al
|
; X86-NEXT: movzbl %al, %ecx
|
||||||
; X86-NEXT: movzbl %al, %eax
|
; X86-NEXT: movzbl %dl, %eax
|
||||||
; X86-NEXT: cmovol %ecx, %eax
|
; X86-NEXT: cmovnol %ecx, %eax
|
||||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: func8:
|
; X64-LABEL: func8:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: movl %esi, %eax
|
; X64-NEXT: movl %esi, %eax
|
||||||
|
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; X64-NEXT: mulb %dl
|
; X64-NEXT: mulb %dl
|
||||||
; X64-NEXT: xorl %ecx, %ecx
|
; X64-NEXT: # kill: def $al killed $al def $rax
|
||||||
; X64-NEXT: movl %edi, %edx
|
; X64-NEXT: leal (%rdi,%rax), %ecx
|
||||||
; X64-NEXT: addb %al, %dl
|
; X64-NEXT: sarb $7, %cl
|
||||||
; X64-NEXT: setns %cl
|
; X64-NEXT: xorb $-128, %cl
|
||||||
; X64-NEXT: addl $127, %ecx
|
; X64-NEXT: addb %dil, %al
|
||||||
; X64-NEXT: addb %al, %dil
|
; X64-NEXT: movzbl %al, %edx
|
||||||
; X64-NEXT: movzbl %dil, %eax
|
; X64-NEXT: movzbl %cl, %eax
|
||||||
; X64-NEXT: cmovol %ecx, %eax
|
; X64-NEXT: cmovnol %edx, %eax
|
||||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%a = mul i8 %y, %z
|
%a = mul i8 %y, %z
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -39,23 +39,18 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||||
; X86-LABEL: func2:
|
; X86-LABEL: func2:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %ebx
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X86-NEXT: seto %bl
|
; X86-NEXT: seto %bl
|
||||||
; X86-NEXT: movl %esi, %eax
|
; X86-NEXT: movl %ecx, %edx
|
||||||
; X86-NEXT: sarl $31, %eax
|
; X86-NEXT: sarl $31, %edx
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %ecx, %eax
|
; X86-NEXT: cmovnel %edx, %eax
|
||||||
; X86-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl $-2147483648, %edx # imm = 0x80000000
|
||||||
; X86-NEXT: testl %esi, %esi
|
|
||||||
; X86-NEXT: setns %dl
|
|
||||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %esi, %edx
|
; X86-NEXT: cmovel %ecx, %edx
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
|
|
@ -212,20 +207,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||||
;
|
;
|
||||||
; X64-LABEL: vec:
|
; X64-LABEL: vec:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: pxor %xmm2, %xmm2
|
; X64-NEXT: pxor %xmm3, %xmm3
|
||||||
; X64-NEXT: movdqa %xmm0, %xmm3
|
; X64-NEXT: movdqa %xmm0, %xmm2
|
||||||
; X64-NEXT: psubd %xmm1, %xmm3
|
; X64-NEXT: psubd %xmm1, %xmm2
|
||||||
; X64-NEXT: pcmpgtd %xmm2, %xmm1
|
; X64-NEXT: pcmpgtd %xmm3, %xmm1
|
||||||
; X64-NEXT: pcmpgtd %xmm3, %xmm0
|
; X64-NEXT: pcmpgtd %xmm2, %xmm0
|
||||||
; X64-NEXT: pxor %xmm1, %xmm0
|
; X64-NEXT: pxor %xmm1, %xmm0
|
||||||
; X64-NEXT: movdqa %xmm3, %xmm1
|
; X64-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
; X64-NEXT: pandn %xmm2, %xmm1
|
||||||
; X64-NEXT: pcmpgtd %xmm3, %xmm2
|
; X64-NEXT: psrad $31, %xmm2
|
||||||
; X64-NEXT: psrld $1, %xmm2
|
; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
|
||||||
; X64-NEXT: por %xmm2, %xmm1
|
; X64-NEXT: pand %xmm0, %xmm2
|
||||||
; X64-NEXT: pand %xmm0, %xmm1
|
; X64-NEXT: por %xmm1, %xmm2
|
||||||
; X64-NEXT: pandn %xmm3, %xmm0
|
; X64-NEXT: movdqa %xmm2, %xmm0
|
||||||
; X64-NEXT: por %xmm1, %xmm0
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
|
%tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
|
||||||
ret <4 x i32> %tmp
|
ret <4 x i32> %tmp
|
||||||
|
|
|
||||||
|
|
@ -41,23 +41,18 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; X86-LABEL: func64:
|
; X86-LABEL: func64:
|
||||||
; X86: # %bb.0:
|
; X86: # %bb.0:
|
||||||
; X86-NEXT: pushl %ebx
|
; X86-NEXT: pushl %ebx
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X86-NEXT: seto %bl
|
; X86-NEXT: seto %bl
|
||||||
; X86-NEXT: movl %esi, %eax
|
; X86-NEXT: movl %ecx, %edx
|
||||||
; X86-NEXT: sarl $31, %eax
|
; X86-NEXT: sarl $31, %edx
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %ecx, %eax
|
; X86-NEXT: cmovnel %edx, %eax
|
||||||
; X86-NEXT: xorl %edx, %edx
|
; X86-NEXT: xorl $-2147483648, %edx # imm = 0x80000000
|
||||||
; X86-NEXT: testl %esi, %esi
|
|
||||||
; X86-NEXT: setns %dl
|
|
||||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
|
||||||
; X86-NEXT: testb %bl, %bl
|
; X86-NEXT: testb %bl, %bl
|
||||||
; X86-NEXT: cmovel %esi, %edx
|
; X86-NEXT: cmovel %ecx, %edx
|
||||||
; X86-NEXT: popl %esi
|
|
||||||
; X86-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue