forked from OSchip/llvm-project
				
			[X86][AVX] Provide SubVectorBroadcast fallback if load fold fails (PR29133)
Fix for PR29133, matching the approach that was taken for AVX1 scalar broadcasts. llvm-svn: 279735
This commit is contained in:
		
							parent
							
								
									05cf9c22f1
								
							
						
					
					
						commit
						0ad9f3e93b
					
				| 
						 | 
					@ -12987,8 +12987,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
 | 
				
			||||||
      // lower to a VBROADCASTF128/VBROADCASTI128/etc.
 | 
					      // lower to a VBROADCASTF128/VBROADCASTI128/etc.
 | 
				
			||||||
      if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
 | 
					      if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
 | 
				
			||||||
        if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
 | 
					        if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
 | 
				
			||||||
            areOnlyUsersOf(SubVec2.getNode(), {Op, Vec}) &&
 | 
					            areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
 | 
				
			||||||
            !Ld->hasAnyUseOfValue(1)) {
 | 
					 | 
				
			||||||
          return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
 | 
					          return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1026,6 +1026,21 @@ def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
 | 
				
			||||||
          (VBROADCASTI32X4Z256rm addr:$src)>;
 | 
					          (VBROADCASTI32X4Z256rm addr:$src)>;
 | 
				
			||||||
def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
 | 
					def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
 | 
				
			||||||
          (VBROADCASTI32X4Z256rm addr:$src)>;
 | 
					          (VBROADCASTI32X4Z256rm addr:$src)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Provide fallback in case the load node that is used in the patterns above
 | 
				
			||||||
 | 
					// is used by additional users, which prevents the pattern selection.
 | 
				
			||||||
 | 
					def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v4f32 VR128X:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v4i32 VR128X:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v8i16 VR128X:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v16i8 VR128X:$src), 1)>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let Predicates = [HasVLX, HasDQI] in {
 | 
					let Predicates = [HasVLX, HasDQI] in {
 | 
				
			||||||
| 
						 | 
					@ -1042,6 +1057,15 @@ def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
 | 
				
			||||||
          (VBROADCASTF32X4Z256rm addr:$src)>;
 | 
					          (VBROADCASTF32X4Z256rm addr:$src)>;
 | 
				
			||||||
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
 | 
					def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
 | 
				
			||||||
          (VBROADCASTI32X4Z256rm addr:$src)>;
 | 
					          (VBROADCASTI32X4Z256rm addr:$src)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Provide fallback in case the load node that is used in the patterns above
 | 
				
			||||||
 | 
					// is used by additional users, which prevents the pattern selection.
 | 
				
			||||||
 | 
					def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v2f64 VR128X:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v2i64 VR128X:$src), 1)>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let Predicates = [HasDQI] in {
 | 
					let Predicates = [HasDQI] in {
 | 
				
			||||||
| 
						 | 
					@ -1057,6 +1081,15 @@ defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
 | 
				
			||||||
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
 | 
					defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
 | 
				
			||||||
                       v16f32_info, v8f32x_info>,
 | 
					                       v16f32_info, v8f32x_info>,
 | 
				
			||||||
                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
 | 
					                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Provide fallback in case the load node that is used in the patterns above
 | 
				
			||||||
 | 
					// is used by additional users, which prevents the pattern selection.
 | 
				
			||||||
 | 
					def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v2f64 VR128X:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
 | 
				
			||||||
 | 
					                              (v2i64 VR128X:$src), 1)>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
 | 
					multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8662,6 +8662,51 @@ let Predicates = [HasAVX2] in {
 | 
				
			||||||
  defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
 | 
					  defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
 | 
				
			||||||
  defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
 | 
					  defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					// SubVector Broadcasts
 | 
				
			||||||
 | 
					// Provide fallback in case the load node that is used in the patterns above
 | 
				
			||||||
 | 
					// is used by additional users, which prevents the pattern selection.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					let Predicates = [HasAVX2, NoVLX] in {
 | 
				
			||||||
 | 
					def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v2i64 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v4i32 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v8i16 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTI128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v16i8 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					let Predicates = [HasAVX, NoVLX] in {
 | 
				
			||||||
 | 
					def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v2f64 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v4f32 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					let Predicates = [HasAVX1Only] in {
 | 
				
			||||||
 | 
					def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v2i64 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v4i32 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v8i16 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
 | 
				
			||||||
 | 
					          (VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
 | 
				
			||||||
 | 
					                         (v16i8 VR128:$src), 1)>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
// Variable Bit Shifts
 | 
					// Variable Bit Shifts
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue