[X86] combineFneg - generalize FMA negations with isNegatibleForFree/getNegatedExpression
This has a really interesting side effect in that it improves some UMAX/UMIN reduction code which had redundant XOR(SHUFFLE(XOR(X,SIGNMASK)),SIGNMASK) patterns - the getNegatibleCost recognises it as FNEG(SHUFFLE(FNEG(X))).... We have a lot of FNEG patterns bitcasted to the integer domain for XOR signbit twiddling which is similar to what we do to allow UMAX/UMIN to be lowered using SMAX/SMIN. Differential Revision: https://reviews.llvm.org/D74231
This commit is contained in:
parent
665dcdacc0
commit
ff307c8120
|
|
@ -43168,18 +43168,20 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
|
||||||
|
|
||||||
/// Do target-specific dag combines on floating point negations.
|
/// Do target-specific dag combines on floating point negations.
|
||||||
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
|
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
EVT OrigVT = N->getValueType(0);
|
EVT OrigVT = N->getValueType(0);
|
||||||
SDValue Arg = isFNEG(DAG, N);
|
SDValue Arg = isFNEG(DAG, N);
|
||||||
if (!Arg)
|
if (!Arg)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
EVT VT = Arg.getValueType();
|
EVT VT = Arg.getValueType();
|
||||||
EVT SVT = VT.getScalarType();
|
EVT SVT = VT.getScalarType();
|
||||||
SDLoc DL(N);
|
SDLoc DL(N);
|
||||||
|
|
||||||
// Let legalize expand this if it isn't a legal type yet.
|
// Let legalize expand this if it isn't a legal type yet.
|
||||||
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
if (!TLI.isTypeLegal(VT))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// If we're negating a FMUL node on a target with FMA, then we can avoid the
|
// If we're negating a FMUL node on a target with FMA, then we can avoid the
|
||||||
|
|
@ -43193,26 +43195,12 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
|
||||||
return DAG.getBitcast(OrigVT, NewNode);
|
return DAG.getBitcast(OrigVT, NewNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we're negating an FMA node, then we can adjust the
|
bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
|
||||||
// instruction to include the extra negation.
|
bool LegalOperations = !DCI.isBeforeLegalizeOps();
|
||||||
if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) {
|
if (TLI.getNegatibleCost(Arg, DAG, LegalOperations, CodeSize) !=
|
||||||
switch (Arg.getOpcode()) {
|
TargetLowering::NegatibleCost::Expensive)
|
||||||
case ISD::FMA:
|
return DAG.getBitcast(
|
||||||
case X86ISD::FMSUB:
|
OrigVT, TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize));
|
||||||
case X86ISD::FNMADD:
|
|
||||||
case X86ISD::FNMSUB:
|
|
||||||
case X86ISD::FMADD_RND:
|
|
||||||
case X86ISD::FMSUB_RND:
|
|
||||||
case X86ISD::FNMADD_RND:
|
|
||||||
case X86ISD::FNMSUB_RND: {
|
|
||||||
// We can't handle scalar intrinsic node here because it would only
|
|
||||||
// invert one element and not the whole vector. But we could try to handle
|
|
||||||
// a negation of the lower element only.
|
|
||||||
unsigned NewOpcode = negateFMAOpcode(Arg.getOpcode(), false, false, true);
|
|
||||||
return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg->ops()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
@ -43392,7 +43380,7 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
|
||||||
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
|
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
|
||||||
return FPLogic;
|
return FPLogic;
|
||||||
|
|
||||||
return combineFneg(N, DAG, Subtarget);
|
return combineFneg(N, DAG, DCI, Subtarget);
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
|
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
|
||||||
|
|
@ -43497,6 +43485,7 @@ static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
|
||||||
|
|
||||||
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
|
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
|
||||||
static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
|
static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
|
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
|
||||||
|
|
||||||
|
|
@ -43508,7 +43497,7 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
|
||||||
if (isNullFPScalarOrVectorConst(N->getOperand(1)))
|
if (isNullFPScalarOrVectorConst(N->getOperand(1)))
|
||||||
return N->getOperand(0);
|
return N->getOperand(0);
|
||||||
|
|
||||||
if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
|
if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget))
|
||||||
return NewVal;
|
return NewVal;
|
||||||
|
|
||||||
return lowerX86FPLogicOp(N, DAG, Subtarget);
|
return lowerX86FPLogicOp(N, DAG, Subtarget);
|
||||||
|
|
@ -46672,14 +46661,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
return combineUIntToFP(N, DAG, Subtarget);
|
return combineUIntToFP(N, DAG, Subtarget);
|
||||||
case ISD::FADD:
|
case ISD::FADD:
|
||||||
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
|
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
|
||||||
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
|
case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget);
|
||||||
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
|
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
|
||||||
case X86ISD::VTRUNC: return combineVTRUNC(N, DAG);
|
case X86ISD::VTRUNC: return combineVTRUNC(N, DAG);
|
||||||
case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
|
case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
|
||||||
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
|
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
|
||||||
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
|
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
|
||||||
case X86ISD::FXOR:
|
case X86ISD::FXOR:
|
||||||
case X86ISD::FOR: return combineFOr(N, DAG, Subtarget);
|
case X86ISD::FOR: return combineFOr(N, DAG, DCI, Subtarget);
|
||||||
case X86ISD::FMIN:
|
case X86ISD::FMIN:
|
||||||
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
|
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
|
||||||
case ISD::FMINNUM:
|
case ISD::FMINNUM:
|
||||||
|
|
|
||||||
|
|
@ -243,15 +243,10 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -285,15 +280,10 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -840,20 +830,12 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -903,20 +885,12 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -1678,20 +1652,12 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -1751,20 +1717,12 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -2034,15 +1992,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -2077,15 +2030,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -2154,15 +2102,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -2197,15 +2140,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
|
||||||
|
|
@ -245,15 +245,10 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -281,15 +276,10 @@ define i16 @test_reduce_v8i16(<8 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -780,20 +770,12 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -834,20 +816,12 @@ define i16 @test_reduce_v16i16(<16 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -1582,20 +1556,12 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm2
|
; X86-SSE2-NEXT: pminsw %xmm1, %xmm2
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||||
; X86-SSE2-NEXT: pminsw %xmm2, %xmm0
|
; X86-SSE2-NEXT: pminsw %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -1646,20 +1612,12 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm2
|
; X64-SSE2-NEXT: pminsw %xmm1, %xmm2
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
||||||
; X64-SSE2-NEXT: pminsw %xmm2, %xmm0
|
; X64-SSE2-NEXT: pminsw %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -1898,15 +1856,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -1935,15 +1888,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -1985,15 +1933,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: psrld $16, %xmm1
|
; X86-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X86-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X86-SSE2-NEXT: movd %xmm1, %eax
|
; X86-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
@ -2022,15 +1965,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
|
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: psrld $16, %xmm1
|
; X64-SSE2-NEXT: psrld $16, %xmm1
|
||||||
; X64-SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; X64-SSE2-NEXT: movd %xmm1, %eax
|
; X64-SSE2-NEXT: movd %xmm1, %eax
|
||||||
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000
|
||||||
|
|
|
||||||
|
|
@ -1208,9 +1208,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm0
|
; SSE2-NEXT: psrld $16, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movd %xmm0, %eax
|
; SSE2-NEXT: movd %xmm0, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1259,15 +1257,10 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm1
|
; SSE2-NEXT: psrld $16, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movd %xmm1, %eax
|
; SSE2-NEXT: movd %xmm1, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1324,20 +1317,12 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm1
|
; SSE2-NEXT: psrld $16, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movd %xmm1, %eax
|
; SSE2-NEXT: movd %xmm1, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1419,20 +1404,12 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm2
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
; SSE2-NEXT: pxor %xmm4, %xmm0
|
||||||
; SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm2, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm1
|
; SSE2-NEXT: psrld $16, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movd %xmm1, %eax
|
; SSE2-NEXT: movd %xmm1, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1532,20 +1509,12 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||||
; SSE2-NEXT: pmaxsw %xmm5, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm5, %xmm1
|
||||||
; SSE2-NEXT: pmaxsw %xmm4, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm4, %xmm1
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm1
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm1
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
; SSE2-NEXT: pmaxsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm0
|
; SSE2-NEXT: psrld $16, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
; SSE2-NEXT: pmaxsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movd %xmm0, %eax
|
; SSE2-NEXT: movd %xmm0, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
|
||||||
|
|
@ -1206,9 +1206,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm0
|
; SSE2-NEXT: psrld $16, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movd %xmm0, %eax
|
; SSE2-NEXT: movd %xmm0, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1257,15 +1255,10 @@ define i16 @test_v8i16(<8 x i16> %a0) {
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm1
|
; SSE2-NEXT: psrld $16, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movd %xmm1, %eax
|
; SSE2-NEXT: movd %xmm1, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1303,20 +1296,12 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
; SSE2-NEXT: pxor %xmm2, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm1
|
; SSE2-NEXT: psrld $16, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm2, %xmm1
|
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movd %xmm1, %eax
|
; SSE2-NEXT: movd %xmm1, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1375,20 +1360,12 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm2
|
; SSE2-NEXT: pminsw %xmm1, %xmm2
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
; SSE2-NEXT: pxor %xmm4, %xmm0
|
||||||
; SSE2-NEXT: pminsw %xmm2, %xmm0
|
; SSE2-NEXT: pminsw %xmm2, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm0
|
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm1
|
; SSE2-NEXT: psrld $16, %xmm1
|
||||||
; SSE2-NEXT: pxor %xmm4, %xmm1
|
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movd %xmm1, %eax
|
; SSE2-NEXT: movd %xmm1, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
@ -1463,20 +1440,12 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||||
; SSE2-NEXT: pminsw %xmm5, %xmm1
|
; SSE2-NEXT: pminsw %xmm5, %xmm1
|
||||||
; SSE2-NEXT: pminsw %xmm4, %xmm1
|
; SSE2-NEXT: pminsw %xmm4, %xmm1
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm1
|
|
||||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm1
|
|
||||||
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
; SSE2-NEXT: pminsw %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: psrld $16, %xmm0
|
; SSE2-NEXT: psrld $16, %xmm0
|
||||||
; SSE2-NEXT: pxor %xmm8, %xmm0
|
|
||||||
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
; SSE2-NEXT: pminsw %xmm1, %xmm0
|
||||||
; SSE2-NEXT: movd %xmm0, %eax
|
; SSE2-NEXT: movd %xmm0, %eax
|
||||||
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue