X86: Prefer using VPSHUFD over VPERMIL because it has better throughput.
llvm-svn: 169624
This commit is contained in:
		
							parent
							
								
									889037d754
								
							
						
					
					
						commit
						481e50efe0
					
				| 
						 | 
				
			
			@ -6781,12 +6781,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 | 
			
		|||
 | 
			
		||||
    unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
 | 
			
		||||
 | 
			
		||||
    if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
 | 
			
		||||
      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
 | 
			
		||||
 | 
			
		||||
    if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
 | 
			
		||||
      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
 | 
			
		||||
 | 
			
		||||
    if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
 | 
			
		||||
      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
 | 
			
		||||
                                  DAG);
 | 
			
		||||
 | 
			
		||||
    return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
 | 
			
		||||
                                TargetMask, DAG);
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2348,7 +2348,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
 | 
			
		||||
  ; CHECK: vpermilps
 | 
			
		||||
  ; CHECK: vpshufd
 | 
			
		||||
  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
 | 
			
		||||
  ret <4 x float> %res
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
 | 
			
		|||
  ret <4 x float> %b
 | 
			
		||||
; CHECK: test1:
 | 
			
		||||
; CHECK: vshufps
 | 
			
		||||
; CHECK: vpermilps
 | 
			
		||||
; CHECK: vpshufd
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; rdar://10538417
 | 
			
		||||
| 
						 | 
				
			
			@ -106,7 +106,7 @@ define <4 x float> @test11(<4 x float> %a) nounwind  {
 | 
			
		|||
 | 
			
		||||
define <4 x float> @test12(<4 x float>* %a) nounwind  {
 | 
			
		||||
; CHECK: test12
 | 
			
		||||
; CHECK: vpermilps $27, (
 | 
			
		||||
; CHECK: vpshufd
 | 
			
		||||
  %tmp0 = load <4 x float>* %a
 | 
			
		||||
  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 | 
			
		||||
  ret <4 x float> %tmp1
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -84,7 +84,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
 | 
			
		|||
  ret <8 x float> %tmp
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: vpermilps  $0
 | 
			
		||||
; CHECK: vpshufd  $0
 | 
			
		||||
; CHECK-NEXT: vinsertf128  $1
 | 
			
		||||
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
 | 
			
		||||
entry:
 | 
			
		||||
| 
						 | 
				
			
			@ -93,7 +93,7 @@ entry:
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
; CHECK: vextractf128  $1
 | 
			
		||||
; CHECK-NEXT: vpermilps  $85
 | 
			
		||||
; CHECK-NEXT: vpshufd
 | 
			
		||||
; CHECK-NEXT: vinsertf128  $1
 | 
			
		||||
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
 | 
			
		||||
entry:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue