[X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned. I believe these were introduced in r312450. llvm-svn: 326967
This commit is contained in:
		
							parent
							
								
									06c064824e
								
							
						
					
					
						commit
						7ff9779768
					
				| 
						 | 
				
			
			@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
 | 
			
		|||
// will zero the upper bits.
 | 
			
		||||
// TODO: Is there a safe way to detect whether the producing instruction
 | 
			
		||||
// already zeroed the upper bits?
 | 
			
		||||
multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
 | 
			
		||||
                                   ValueType DstTy, ValueType SrcTy,
 | 
			
		||||
                                   ValueType ZeroTy, PatFrag memop,
 | 
			
		||||
                                   SubRegIndex SubIdx> {
 | 
			
		||||
multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
 | 
			
		||||
                                   RegisterClass RC, ValueType DstTy,
 | 
			
		||||
                                   ValueType SrcTy, ValueType ZeroTy,
 | 
			
		||||
                                   PatFrag memop, SubRegIndex SubIdx> {
 | 
			
		||||
  def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
 | 
			
		||||
                                     (SrcTy RC:$src), (iPTR 0))),
 | 
			
		||||
            (SUBREG_TO_REG (i64 0),
 | 
			
		||||
| 
						 | 
				
			
			@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
 | 
			
		|||
                                     (SrcTy (bitconvert (memop addr:$src))),
 | 
			
		||||
                                     (iPTR 0))),
 | 
			
		||||
            (SUBREG_TO_REG (i64 0),
 | 
			
		||||
             (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
 | 
			
		||||
             (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let Predicates = [HasAVX, NoVLX] in {
 | 
			
		||||
  defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
 | 
			
		||||
                                 loadv2f64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
 | 
			
		||||
                                 loadv4f32, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let Predicates = [HasVLX] in {
 | 
			
		||||
  defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
 | 
			
		||||
                                 loadv2f64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
 | 
			
		||||
                                 loadv4f32, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
 | 
			
		||||
                                 v2f64, v8i32, loadv2f64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
 | 
			
		||||
                                 v4f32, v8i32, loadv4f32, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
 | 
			
		||||
                                 v2i64, v8i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
 | 
			
		||||
                                 v4i32, v8i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
 | 
			
		||||
                                 v8i16, v8i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
 | 
			
		||||
                                 v16i8, v8i32, loadv2i64, sub_xmm>;
 | 
			
		||||
 | 
			
		||||
  defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
 | 
			
		||||
                                 loadv2f64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
 | 
			
		||||
                                 loadv4f32, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
 | 
			
		||||
                                 loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
 | 
			
		||||
                                 v2f64, v16i32, loadv2f64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
 | 
			
		||||
                                 v4f32, v16i32, loadv4f32, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
 | 
			
		||||
                                 v2i64, v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
 | 
			
		||||
                                 v4i32, v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
 | 
			
		||||
                                 v8i16, v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
 | 
			
		||||
                                 v16i8, v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
 | 
			
		||||
  defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
 | 
			
		||||
                                 loadv4f64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
 | 
			
		||||
                                 loadv8f32, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
 | 
			
		||||
                                 v4f64, v16i32, loadv4f64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
 | 
			
		||||
                                 v8f32, v16i32, loadv8f32, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
 | 
			
		||||
                                 v4i64, v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
 | 
			
		||||
                                 v8i32, v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
 | 
			
		||||
                                 v16i16, v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
 | 
			
		||||
                                 v32i8, v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let Predicates = [HasAVX512, NoVLX] in {
 | 
			
		||||
  defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
 | 
			
		||||
                                 sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
 | 
			
		||||
                                 v16i32,loadv2f64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
 | 
			
		||||
                                 v16i32, loadv4f32, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
 | 
			
		||||
                                 v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
 | 
			
		||||
                                 v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
 | 
			
		||||
                                 v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
 | 
			
		||||
                                 v16i32, loadv2i64, sub_xmm>;
 | 
			
		||||
 | 
			
		||||
  defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
 | 
			
		||||
                                 loadv4f64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
 | 
			
		||||
                                 loadv8f32, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
 | 
			
		||||
                                 loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
 | 
			
		||||
                                 v16i32, loadv4f64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
 | 
			
		||||
                                 v16i32, loadv8f32, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
 | 
			
		||||
                                 v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
 | 
			
		||||
                                 v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
 | 
			
		||||
                                 v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
 | 
			
		||||
                                 v16i32, loadv4i64, sub_ymm>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// List of opcodes that guaranteed to zero the upper elements of vector regs.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
 | 
			
		|||
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
 | 
			
		||||
; AVX-LABEL: merge_4f64_2f64_2z:
 | 
			
		||||
; AVX:       # %bb.0:
 | 
			
		||||
; AVX-NEXT:    vmovaps 32(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    vmovups 32(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; X32-AVX-LABEL: merge_4f64_2f64_2z:
 | 
			
		||||
; X32-AVX:       # %bb.0:
 | 
			
		||||
; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
			
		||||
; X32-AVX-NEXT:    vmovaps 32(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    vmovups 32(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    retl
 | 
			
		||||
  %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
 | 
			
		||||
  %val0 = load <2 x double>, <2 x double>* %ptr0
 | 
			
		||||
| 
						 | 
				
			
			@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
 | 
			
		|||
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
 | 
			
		||||
; AVX-LABEL: merge_4f64_f64_45zz:
 | 
			
		||||
; AVX:       # %bb.0:
 | 
			
		||||
; AVX-NEXT:    vmovaps 32(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    vmovups 32(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; X32-AVX-LABEL: merge_4f64_f64_45zz:
 | 
			
		||||
; X32-AVX:       # %bb.0:
 | 
			
		||||
; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
			
		||||
; X32-AVX-NEXT:    vmovaps 32(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    vmovups 32(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    retl
 | 
			
		||||
  %ptr0 = getelementptr inbounds double, double* %ptr, i64 4
 | 
			
		||||
  %ptr1 = getelementptr inbounds double, double* %ptr, i64 5
 | 
			
		||||
| 
						 | 
				
			
			@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
 | 
			
		|||
define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
 | 
			
		||||
; AVX-LABEL: merge_4i64_2i64_3z:
 | 
			
		||||
; AVX:       # %bb.0:
 | 
			
		||||
; AVX-NEXT:    vmovaps 48(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    vmovups 48(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; X32-AVX-LABEL: merge_4i64_2i64_3z:
 | 
			
		||||
; X32-AVX:       # %bb.0:
 | 
			
		||||
; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
			
		||||
; X32-AVX-NEXT:    vmovaps 48(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    vmovups 48(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    retl
 | 
			
		||||
  %ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
 | 
			
		||||
  %val0 = load <2 x i64>, <2 x i64>* %ptr0
 | 
			
		||||
| 
						 | 
				
			
			@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
 | 
			
		|||
define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
 | 
			
		||||
; AVX-LABEL: merge_4i64_i64_23zz:
 | 
			
		||||
; AVX:       # %bb.0:
 | 
			
		||||
; AVX-NEXT:    vmovaps 16(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    vmovups 16(%rdi), %xmm0
 | 
			
		||||
; AVX-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; X32-AVX-LABEL: merge_4i64_i64_23zz:
 | 
			
		||||
; X32-AVX:       # %bb.0:
 | 
			
		||||
; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
			
		||||
; X32-AVX-NEXT:    vmovaps 16(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    vmovups 16(%eax), %xmm0
 | 
			
		||||
; X32-AVX-NEXT:    retl
 | 
			
		||||
  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
 | 
			
		||||
  %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
 | 
			
		|||
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
 | 
			
		||||
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
 | 
			
		||||
; ALL:       # %bb.0:
 | 
			
		||||
; ALL-NEXT:    vmovaps 8(%rdi), %xmm0
 | 
			
		||||
; ALL-NEXT:    vmovups 8(%rdi), %xmm0
 | 
			
		||||
; ALL-NEXT:    retq
 | 
			
		||||
;
 | 
			
		||||
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
 | 
			
		||||
; X32-AVX512F:       # %bb.0:
 | 
			
		||||
; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
			
		||||
; X32-AVX512F-NEXT:    vmovaps 8(%eax), %xmm0
 | 
			
		||||
; X32-AVX512F-NEXT:    vmovups 8(%eax), %xmm0
 | 
			
		||||
; X32-AVX512F-NEXT:    retl
 | 
			
		||||
  %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
 | 
			
		||||
  %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
 | 
			
		||||
| 
						 | 
				
			
			@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
 | 
			
		|||
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
 | 
			
		||||
; ALL-LABEL: merge_8i64_i64_56zz9uzz:
 | 
			
		||||
; ALL:       # %bb.0:
 | 
			
		||||
; ALL-NEXT:    vmovaps 40(%rdi), %xmm0
 | 
			
		||||
; ALL-NEXT:    vmovups 40(%rdi), %xmm0
 | 
			
		||||
; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 | 
			
		||||
; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 | 
			
		||||
; ALL-NEXT:    retq
 | 
			
		||||
| 
						 | 
				
			
			@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
 | 
			
		|||
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
 | 
			
		||||
; X32-AVX512F:       # %bb.0:
 | 
			
		||||
; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
			
		||||
; X32-AVX512F-NEXT:    vmovaps 40(%eax), %xmm0
 | 
			
		||||
; X32-AVX512F-NEXT:    vmovups 40(%eax), %xmm0
 | 
			
		||||
; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 | 
			
		||||
; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 | 
			
		||||
; X32-AVX512F-NEXT:    retl
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue