[X86] Add support for folding (insert_subvector vec1, (extract_subvector vec2, idx1), idx1) -> (blendi vec2, vec1).
llvm-svn: 294112
This commit is contained in:
		
							parent
							
								
									3d95228dbe
								
							
						
					
					
						commit
						978fdb75a4
					
				| 
						 | 
					@ -34121,8 +34121,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
 | 
				
			||||||
  // We are still creating an INSERT_SUBVECTOR below with an undef node to
 | 
					  // We are still creating an INSERT_SUBVECTOR below with an undef node to
 | 
				
			||||||
  // extend the subvector to the size of the result vector. Make sure that
 | 
					  // extend the subvector to the size of the result vector. Make sure that
 | 
				
			||||||
  // we are not recursing on that node by checking for undef here.
 | 
					  // we are not recursing on that node by checking for undef here.
 | 
				
			||||||
  if (IdxVal == 0 && OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
 | 
					  if (IdxVal == 0 && OpVT.is256BitVector() && !Vec.isUndef()) {
 | 
				
			||||||
      !Vec.isUndef()) {
 | 
					 | 
				
			||||||
    SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
 | 
					    SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
 | 
				
			||||||
                                 DAG.getUNDEF(OpVT), SubVec, N->getOperand(2));
 | 
					                                 DAG.getUNDEF(OpVT), SubVec, N->getOperand(2));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -34144,6 +34143,30 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
 | 
				
			||||||
    return DAG.getBitcast(OpVT, Vec256);
 | 
					    return DAG.getBitcast(OpVT, Vec256);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // If we're inserting into the upper half of a 256-bit vector with a vector
 | 
				
			||||||
 | 
					  // that was extracted from the upper half of a 256-bit vector, we should
 | 
				
			||||||
 | 
					  // use a blend instead.
 | 
				
			||||||
 | 
					  if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && OpVT.is256BitVector() &&
 | 
				
			||||||
 | 
					      SubVec.getOperand(0).getSimpleValueType() == OpVT &&
 | 
				
			||||||
 | 
					      Idx == SubVec.getOperand(1) && IdxVal == OpVT.getVectorNumElements()/2) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Integers must be cast to 32-bit because there is only vpblendd;
 | 
				
			||||||
 | 
					    // vpblendw can't be used for this because it has a handicapped mask.
 | 
				
			||||||
 | 
					    // If we don't have AVX2, then cast to float. Using a wrong domain blend
 | 
				
			||||||
 | 
					    // is still more efficient than using the wrong domain vinsertf128 that
 | 
				
			||||||
 | 
					    // will be created by InsertSubVector().
 | 
				
			||||||
 | 
					    MVT CastVT = OpVT;
 | 
				
			||||||
 | 
					    if (OpVT.isInteger())
 | 
				
			||||||
 | 
					      CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // The blend instruction, and therefore its mask, depend on the data type.
 | 
				
			||||||
 | 
					    unsigned MaskVal = CastVT.getScalarSizeInBits() == 64 ? 0x0c : 0xf0;
 | 
				
			||||||
 | 
					    SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
 | 
				
			||||||
 | 
					    Vec = DAG.getNode(X86ISD::BLENDI, dl, CastVT, DAG.getBitcast(CastVT, Vec),
 | 
				
			||||||
 | 
					                      DAG.getBitcast(CastVT, SubVec.getOperand(0)), Mask);
 | 
				
			||||||
 | 
					    return DAG.getBitcast(OpVT, Vec);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
 | 
					  // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
 | 
				
			||||||
  // load:
 | 
					  // load:
 | 
				
			||||||
  // (insert_subvector (insert_subvector undef, (load16 addr), 0),
 | 
					  // (insert_subvector (insert_subvector undef, (load16 addr), 0),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -466,8 +466,7 @@ define <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) {
 | 
				
			||||||
; AVX1:       ## BB#0:
 | 
					; AVX1:       ## BB#0:
 | 
				
			||||||
; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 | 
					; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 | 
				
			||||||
; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
 | 
					; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
 | 
				
			||||||
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 | 
					; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
 | 
				
			||||||
; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 | 
					 | 
				
			||||||
; AVX1-NEXT:    retq
 | 
					; AVX1-NEXT:    retq
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; AVX2-LABEL: shuffle_v4i64_67zz:
 | 
					; AVX2-LABEL: shuffle_v4i64_67zz:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2653,8 +2653,7 @@ define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_1
 | 
				
			||||||
; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
 | 
					; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7]
 | 
				
			||||||
; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
					; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 | 
				
			||||||
; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
 | 
					; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7]
 | 
				
			||||||
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
					; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
				
			||||||
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
					 | 
				
			||||||
; AVX1-NEXT:    retq
 | 
					; AVX1-NEXT:    retq
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
 | 
					; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1089,8 +1089,7 @@ define <32 x i8> @shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_
 | 
				
			||||||
; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
 | 
					; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
 | 
				
			||||||
; AVX1:       # BB#0:
 | 
					; AVX1:       # BB#0:
 | 
				
			||||||
; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15]
 | 
					; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15]
 | 
				
			||||||
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 | 
					; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
 | 
				
			||||||
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 | 
					 | 
				
			||||||
; AVX1-NEXT:    retq
 | 
					; AVX1-NEXT:    retq
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
 | 
					; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue