[X86][SSE] Fixed issue with commutation of 'faux unary' target shuffles (PR26667)
Fixed a bug introduced by D16683 when a binary shuffle is simplified to a unary shuffle (with undef/zero sentinel mask indices) - if this resulted in only the second input being used combineX86ShuffleChain failed to take this into account and still referenced the first input. llvm-svn: 261434
This commit is contained in:
parent
ccf2cce67c
commit
ecb0433599
|
@ -23506,15 +23506,15 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
|
||||||
/// into either a single instruction if there is a special purpose instruction
|
/// into either a single instruction if there is a special purpose instruction
|
||||||
/// for this operation, or into a PSHUFB instruction which is a fully general
|
/// for this operation, or into a PSHUFB instruction which is a fully general
|
||||||
/// instruction but should only be used to replace chains over a certain depth.
|
/// instruction but should only be used to replace chains over a certain depth.
|
||||||
static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
|
static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
|
||||||
int Depth, bool HasPSHUFB, SelectionDAG &DAG,
|
ArrayRef<int> Mask, int Depth,
|
||||||
|
bool HasPSHUFB, SelectionDAG &DAG,
|
||||||
TargetLowering::DAGCombinerInfo &DCI,
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
|
assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
|
||||||
|
|
||||||
// Find the operand that enters the chain. Note that multiple uses are OK
|
// Find the operand that enters the chain. Note that multiple uses are OK
|
||||||
// here, we're not going to remove the operand we find.
|
// here, we're not going to remove the operand we find.
|
||||||
SDValue Input = Op.getOperand(0);
|
|
||||||
while (Input.getOpcode() == ISD::BITCAST)
|
while (Input.getOpcode() == ISD::BITCAST)
|
||||||
Input = Input.getOperand(0);
|
Input = Input.getOperand(0);
|
||||||
|
|
||||||
|
@ -23814,7 +23814,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
|
||||||
DAG, DCI, Subtarget))
|
DAG, DCI, Subtarget))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
|
||||||
// Minor canonicalization of the accumulated shuffle mask to make it easier
|
// Minor canonicalization of the accumulated shuffle mask to make it easier
|
||||||
// to match below. All this does is detect masks with sequential pairs of
|
// to match below. All this does is detect masks with sequential pairs of
|
||||||
// elements, and shrink them to the half-width mask. It does this in a loop
|
// elements, and shrink them to the half-width mask. It does this in a loop
|
||||||
|
@ -23826,7 +23825,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
|
||||||
WidenedMask.clear();
|
WidenedMask.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
|
return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
|
||||||
Subtarget);
|
Subtarget);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -391,10 +391,12 @@ declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>,
|
||||||
define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) {
|
define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) {
|
||||||
; X32-LABEL: test_mm256_permute2f128_ps:
|
; X32-LABEL: test_mm256_permute2f128_ps:
|
||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
|
; X32-NEXT: vmovaps %ymm1, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_mm256_permute2f128_ps:
|
; X64-LABEL: test_mm256_permute2f128_ps:
|
||||||
; X64: # BB#0:
|
; X64: # BB#0:
|
||||||
|
; X64-NEXT: vmovaps %ymm1, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 50)
|
%res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 50)
|
||||||
ret <8 x float> %res
|
ret <8 x float> %res
|
||||||
|
|
Loading…
Reference in New Issue