[X86][SSE] Add support for combining ANDNP byte masks with target shuffles
llvm-svn: 293178
This commit is contained in:
parent
0b034d6f25
commit
027bb453d9
|
@ -4132,6 +4132,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
|
||||||
return true;
|
return true;
|
||||||
// 'Faux' Target Shuffles.
|
// 'Faux' Target Shuffles.
|
||||||
case ISD::AND:
|
case ISD::AND:
|
||||||
|
case X86ISD::ANDNP:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5742,11 +5743,16 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
||||||
|
|
||||||
unsigned Opcode = N.getOpcode();
|
unsigned Opcode = N.getOpcode();
|
||||||
switch (Opcode) {
|
switch (Opcode) {
|
||||||
case ISD::AND: {
|
case ISD::AND:
|
||||||
|
case X86ISD::ANDNP: {
|
||||||
// Attempt to decode as a per-byte mask.
|
// Attempt to decode as a per-byte mask.
|
||||||
SmallBitVector UndefElts;
|
SmallBitVector UndefElts;
|
||||||
SmallVector<APInt, 32> EltBits;
|
SmallVector<APInt, 32> EltBits;
|
||||||
if (!getTargetConstantBitsFromNode(N.getOperand(1), 8, UndefElts, EltBits))
|
SDValue N0 = N.getOperand(0);
|
||||||
|
SDValue N1 = N.getOperand(1);
|
||||||
|
bool IsAndN = (X86ISD::ANDNP == Opcode);
|
||||||
|
uint64_t ZeroMask = IsAndN ? 255 : 0;
|
||||||
|
if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
|
||||||
return false;
|
return false;
|
||||||
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
|
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
|
||||||
if (UndefElts[i]) {
|
if (UndefElts[i]) {
|
||||||
|
@ -5756,9 +5762,9 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
||||||
uint64_t ByteBits = EltBits[i].getZExtValue();
|
uint64_t ByteBits = EltBits[i].getZExtValue();
|
||||||
if (ByteBits != 0 && ByteBits != 255)
|
if (ByteBits != 0 && ByteBits != 255)
|
||||||
return false;
|
return false;
|
||||||
Mask.push_back(ByteBits == 0 ? SM_SentinelZero : i);
|
Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
|
||||||
}
|
}
|
||||||
Ops.push_back(N.getOperand(0));
|
Ops.push_back(IsAndN ? N1 : N0);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
case X86ISD::VSHLI:
|
case X86ISD::VSHLI:
|
||||||
|
@ -33010,6 +33016,7 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
|
||||||
|
|
||||||
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
|
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
|
||||||
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
|
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
// ANDNP(0, x) -> x
|
// ANDNP(0, x) -> x
|
||||||
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
|
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
|
||||||
|
@ -33019,6 +33026,19 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
|
||||||
if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
|
if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
|
||||||
return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
|
return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
|
||||||
|
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
|
||||||
|
// Attempt to recursively combine a bitmask ANDNP with shuffles.
|
||||||
|
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
|
||||||
|
SDValue Op(N, 0);
|
||||||
|
SmallVector<int, 1> NonceMask; // Just a placeholder.
|
||||||
|
NonceMask.push_back(0);
|
||||||
|
if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
|
||||||
|
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
|
||||||
|
DCI, Subtarget))
|
||||||
|
return SDValue(); // This routine will use CombineTo to replace N.
|
||||||
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34088,7 +34108,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
|
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
|
||||||
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
|
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
|
||||||
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
|
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
|
||||||
case X86ISD::ANDNP: return combineAndnp(N, DAG, Subtarget);
|
case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
|
||||||
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
|
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
|
||||||
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
|
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
|
||||||
case X86ISD::FXOR:
|
case X86ISD::FXOR:
|
||||||
|
|
|
@ -356,43 +356,31 @@ define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
|
||||||
define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
|
define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
|
||||||
; SSE-LABEL: _clearupper8xi16b:
|
; SSE-LABEL: _clearupper8xi16b:
|
||||||
; SSE: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||||
; SSE-NEXT: pand %xmm2, %xmm0
|
; SSE-NEXT: pand %xmm1, %xmm0
|
||||||
; SSE-NEXT: xorl %eax, %eax
|
; SSE-NEXT: xorl %eax, %eax
|
||||||
; SSE-NEXT: movd %eax, %xmm1
|
; SSE-NEXT: movd %eax, %xmm2
|
||||||
; SSE-NEXT: movdqa %xmm1, %xmm3
|
; SSE-NEXT: movdqa %xmm2, %xmm3
|
||||||
; SSE-NEXT: psllw $8, %xmm3
|
; SSE-NEXT: psllw $8, %xmm3
|
||||||
; SSE-NEXT: pandn %xmm3, %xmm2
|
; SSE-NEXT: pandn %xmm3, %xmm1
|
||||||
; SSE-NEXT: por %xmm2, %xmm0
|
; SSE-NEXT: por %xmm1, %xmm0
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
|
||||||
; SSE-NEXT: pand %xmm2, %xmm0
|
; SSE-NEXT: pand %xmm1, %xmm0
|
||||||
; SSE-NEXT: movdqa %xmm1, %xmm3
|
; SSE-NEXT: movdqa %xmm2, %xmm3
|
||||||
; SSE-NEXT: pslld $24, %xmm3
|
; SSE-NEXT: pslld $24, %xmm3
|
||||||
; SSE-NEXT: pandn %xmm3, %xmm2
|
; SSE-NEXT: pandn %xmm3, %xmm1
|
||||||
; SSE-NEXT: por %xmm2, %xmm0
|
; SSE-NEXT: por %xmm1, %xmm0
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
|
||||||
; SSE-NEXT: pand %xmm2, %xmm0
|
; SSE-NEXT: pand %xmm1, %xmm0
|
||||||
; SSE-NEXT: movdqa %xmm1, %xmm3
|
; SSE-NEXT: movdqa %xmm2, %xmm3
|
||||||
; SSE-NEXT: psllq $40, %xmm3
|
; SSE-NEXT: psllq $40, %xmm3
|
||||||
; SSE-NEXT: pandn %xmm3, %xmm2
|
; SSE-NEXT: pandn %xmm3, %xmm1
|
||||||
; SSE-NEXT: por %xmm2, %xmm0
|
; SSE-NEXT: por %xmm1, %xmm0
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
|
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
|
||||||
; SSE-NEXT: pand %xmm2, %xmm0
|
; SSE-NEXT: pand %xmm1, %xmm0
|
||||||
; SSE-NEXT: movdqa %xmm1, %xmm3
|
; SSE-NEXT: psllq $56, %xmm2
|
||||||
; SSE-NEXT: psllq $56, %xmm3
|
; SSE-NEXT: pandn %xmm2, %xmm1
|
||||||
; SSE-NEXT: pandn %xmm3, %xmm2
|
; SSE-NEXT: por %xmm1, %xmm0
|
||||||
; SSE-NEXT: por %xmm2, %xmm0
|
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
|
|
||||||
; SSE-NEXT: pand %xmm2, %xmm0
|
|
||||||
; SSE-NEXT: movdqa %xmm1, %xmm3
|
|
||||||
; SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
|
|
||||||
; SSE-NEXT: pandn %xmm3, %xmm2
|
|
||||||
; SSE-NEXT: por %xmm2, %xmm0
|
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
|
|
||||||
; SSE-NEXT: pand %xmm2, %xmm0
|
|
||||||
; SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
|
|
||||||
; SSE-NEXT: pandn %xmm1, %xmm2
|
|
||||||
; SSE-NEXT: por %xmm2, %xmm0
|
|
||||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue