[X86] Make a bunch of merge masked binops commutable for loading folding.
This primarily affects add/fadd/mul/fmul/and/or/xor/pmuludq/pmuldq/max/min/fmaxc/fminc/pmaddwd/pavg. We already commuted the unmasked and zero masked versions. I've added 512-bit stack folding tests for most of the instructions affected. I've tested needing commuting and not commuting across unmasked, merged masked, and zero masked. The 128/256 bit instructions should behave similarly. llvm-svn: 362746
This commit is contained in:
		
							parent
							
								
									980d3645df
								
							
						
					
					
						commit
						f320f26716
					
				| 
						 | 
					@ -277,10 +277,9 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
 | 
				
			||||||
multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
 | 
					multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
 | 
				
			||||||
                           dag Outs, dag Ins, string OpcodeStr,
 | 
					                           dag Outs, dag Ins, string OpcodeStr,
 | 
				
			||||||
                           string AttSrcAsm, string IntelSrcAsm,
 | 
					                           string AttSrcAsm, string IntelSrcAsm,
 | 
				
			||||||
                           dag RHS,
 | 
					                           dag RHS> :
 | 
				
			||||||
                           bit IsCommutable = 0> :
 | 
					 | 
				
			||||||
   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
 | 
					   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
 | 
				
			||||||
                   RHS, IsCommutable, 0, IsCommutable, X86selects>;
 | 
					                   RHS, 0, 0, 0, X86selects>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Similar to AVX512_maskable but in this case one of the source operands
 | 
					// Similar to AVX512_maskable but in this case one of the source operands
 | 
				
			||||||
// ($src1) is already tied to $dst so we just use that for the preserved
 | 
					// ($src1) is already tied to $dst so we just use that for the preserved
 | 
				
			||||||
| 
						 | 
					@ -4602,7 +4601,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 | 
				
			||||||
                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
 | 
					                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
 | 
				
			||||||
                    "$src2, $src1", "$src1, $src2",
 | 
					                    "$src2, $src1", "$src1, $src2",
 | 
				
			||||||
                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
 | 
					                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
 | 
				
			||||||
                    IsCommutable>, AVX512BIBase, EVEX_4V,
 | 
					                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
 | 
				
			||||||
                    Sched<[sched]>;
 | 
					                    Sched<[sched]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
 | 
					  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
 | 
				
			||||||
| 
						 | 
					@ -4838,7 +4837,7 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
 | 
				
			||||||
                            (_Dst.VT (OpNode
 | 
					                            (_Dst.VT (OpNode
 | 
				
			||||||
                                         (_Src.VT _Src.RC:$src1),
 | 
					                                         (_Src.VT _Src.RC:$src1),
 | 
				
			||||||
                                         (_Src.VT _Src.RC:$src2))),
 | 
					                                         (_Src.VT _Src.RC:$src2))),
 | 
				
			||||||
                            IsCommutable>,
 | 
					                            IsCommutable, IsCommutable>,
 | 
				
			||||||
                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
 | 
					                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
 | 
				
			||||||
  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
 | 
					  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
 | 
				
			||||||
                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
 | 
					                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
 | 
				
			||||||
| 
						 | 
					@ -5531,13 +5530,13 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
 | 
				
			||||||
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
 | 
					multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
 | 
				
			||||||
                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
 | 
					                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
 | 
				
			||||||
                            bit IsCommutable,
 | 
					                            bit IsCommutable,
 | 
				
			||||||
                            bit IsKZCommutable = IsCommutable> {
 | 
					                            bit IsKCommutable = IsCommutable> {
 | 
				
			||||||
  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
 | 
					  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
 | 
				
			||||||
  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
 | 
					  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
 | 
				
			||||||
                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
 | 
					                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
 | 
				
			||||||
                  "$src2, $src1", "$src1, $src2",
 | 
					                  "$src2, $src1", "$src1, $src2",
 | 
				
			||||||
                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
 | 
					                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
 | 
				
			||||||
                  IsKZCommutable>,
 | 
					                  IsKCommutable, IsKCommutable>,
 | 
				
			||||||
                  EVEX_4V, Sched<[sched]>;
 | 
					                  EVEX_4V, Sched<[sched]>;
 | 
				
			||||||
  let mayLoad = 1 in {
 | 
					  let mayLoad = 1 in {
 | 
				
			||||||
    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
 | 
					    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,6 +16,28 @@ define <8 x double> @stack_fold_addpd_zmm(<8 x double> %a0, <8 x double> %a1) {
 | 
				
			||||||
  ret <8 x double> %2
 | 
					  ret <8 x double> %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_addpd_zmm_k(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_addpd_zmm_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vaddpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fadd <8 x double> %a0, %a1
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_addpd_zmm_k_commuted(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_addpd_zmm_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vaddpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fadd <8 x double> %a1, %a0
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <8 x double> @stack_fold_addpd_zmm_kz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
 | 
					define <8 x double> @stack_fold_addpd_zmm_kz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
 | 
				
			||||||
  ;CHECK-LABEL: stack_fold_addpd_zmm_kz
 | 
					  ;CHECK-LABEL: stack_fold_addpd_zmm_kz
 | 
				
			||||||
  ;CHECK:       vaddpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
					  ;CHECK:       vaddpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
| 
						 | 
					@ -34,6 +56,28 @@ define <16 x float> @stack_fold_addps_zmm(<16 x float> %a0, <16 x float> %a1) {
 | 
				
			||||||
  ret <16 x float> %2
 | 
					  ret <16 x float> %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_addps_zmm_k(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_addps_zmm_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vaddps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fadd <16 x float> %a0, %a1
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_addps_zmm_k_commuted(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_addps_zmm_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vaddps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fadd <16 x float> %a1, %a0
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <16 x float> @stack_fold_addps_zmm_kz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
 | 
					define <16 x float> @stack_fold_addps_zmm_kz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
 | 
				
			||||||
  ;CHECK-LABEL: stack_fold_addps_zmm_kz
 | 
					  ;CHECK-LABEL: stack_fold_addps_zmm_kz
 | 
				
			||||||
  ;CHECK:       vaddps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
					  ;CHECK:       vaddps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
| 
						 | 
					@ -311,6 +355,28 @@ define <8 x double> @stack_fold_maxpd_zmm_commutable(<8 x double> %a0, <8 x doub
 | 
				
			||||||
  ret <8 x double> %2
 | 
					  ret <8 x double> %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_maxpd_zmm_commutable_k(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_maxpd_zmm_commutable_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_maxpd_zmm_commutable_k_commuted(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_maxpd_zmm_commutable_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <8 x double> @stack_fold_maxpd_zmm_commutable_kz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #1 {
 | 
					define <8 x double> @stack_fold_maxpd_zmm_commutable_kz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #1 {
 | 
				
			||||||
  ;CHECK-LABEL: stack_fold_maxpd_zmm_commutable_kz
 | 
					  ;CHECK-LABEL: stack_fold_maxpd_zmm_commutable_kz
 | 
				
			||||||
  ;CHECK:       vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
					  ;CHECK:       vmaxpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
| 
						 | 
					@ -338,6 +404,28 @@ define <16 x float> @stack_fold_maxps_zmm_commutable(<16 x float> %a0, <16 x flo
 | 
				
			||||||
  ret <16 x float> %2
 | 
					  ret <16 x float> %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_maxps_zmm_commutable_k(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_maxps_zmm_commutable_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_maxps_zmm_commutable_k_commuted(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_maxps_zmm_commutable_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <16 x float> @stack_fold_maxps_zmm_commutable_kz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #1 {
 | 
					define <16 x float> @stack_fold_maxps_zmm_commutable_kz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #1 {
 | 
				
			||||||
  ;CHECK-LABEL: stack_fold_maxps_zmm_commutable_kz
 | 
					  ;CHECK-LABEL: stack_fold_maxps_zmm_commutable_kz
 | 
				
			||||||
  ;CHECK:       vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
					  ;CHECK:       vmaxps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
| 
						 | 
					@ -365,6 +453,28 @@ define <8 x double> @stack_fold_minpd_zmm_commutable(<8 x double> %a0, <8 x doub
 | 
				
			||||||
  ret <8 x double> %2
 | 
					  ret <8 x double> %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_minpd_zmm_commutable_k(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_minpd_zmm_commutable_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_minpd_zmm_commutable_k_commuted(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_minpd_zmm_commutable_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <8 x double> @stack_fold_minpd_zmm_commutable_kz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #1 {
 | 
					define <8 x double> @stack_fold_minpd_zmm_commutable_kz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #1 {
 | 
				
			||||||
  ;CHECK-LABEL: stack_fold_minpd_zmm_commutable_kz
 | 
					  ;CHECK-LABEL: stack_fold_minpd_zmm_commutable_kz
 | 
				
			||||||
  ;CHECK:       vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
					  ;CHECK:       vminpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
| 
						 | 
					@ -392,6 +502,28 @@ define <16 x float> @stack_fold_minps_zmm_commutable(<16 x float> %a0, <16 x flo
 | 
				
			||||||
  ret <16 x float> %2
 | 
					  ret <16 x float> %2
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_minps_zmm_commutable_k(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_minps_zmm_commutable_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_minps_zmm_commutable_k_commuted(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) #1 {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_minps_zmm_commutable_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define <16 x float> @stack_fold_minps_zmm_commutable_kz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #1 {
 | 
					define <16 x float> @stack_fold_minps_zmm_commutable_kz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #1 {
 | 
				
			||||||
  ;CHECK-LABEL: stack_fold_minps_zmm_commutable_kz
 | 
					  ;CHECK-LABEL: stack_fold_minps_zmm_commutable_kz
 | 
				
			||||||
  ;CHECK:       vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
					  ;CHECK:       vminps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
| 
						 | 
					@ -402,6 +534,86 @@ define <16 x float> @stack_fold_minps_zmm_commutable_kz(<16 x float> %a0, <16 x
 | 
				
			||||||
  ret <16 x float> %4
 | 
					  ret <16 x float> %4
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_mulpd_zmm(<8 x double> %a0, <8 x double> %a1) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulpd_zmm
 | 
				
			||||||
 | 
					  ;CHECK:       vmulpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <8 x double> %a0, %a1
 | 
				
			||||||
 | 
					  ret <8 x double> %2
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_mulpd_zmm_k(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulpd_zmm_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vmulpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <8 x double> %a0, %a1
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_mulpd_zmm_k_commuted(<8 x double> %a0, <8 x double> %a1, i8 %mask, <8 x double>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulpd_zmm_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vmulpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <8 x double> %a1, %a0
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = load <8 x double>, <8 x double>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
 | 
				
			||||||
 | 
					  ret <8 x double> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <8 x double> @stack_fold_mulpd_zmm_kz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulpd_zmm_kz
 | 
				
			||||||
 | 
					  ;CHECK:       vmulpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <8 x double> %a1, %a0
 | 
				
			||||||
 | 
					  %3 = bitcast i8 %mask to <8 x i1>
 | 
				
			||||||
 | 
					  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
 | 
				
			||||||
 | 
					  ret <8 x double> %4
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_mulps_zmm(<16 x float> %a0, <16 x float> %a1) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulps_zmm
 | 
				
			||||||
 | 
					  ;CHECK:       vmulps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <16 x float> %a0, %a1
 | 
				
			||||||
 | 
					  ret <16 x float> %2
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_mulps_zmm_k(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulps_zmm_k:
 | 
				
			||||||
 | 
					  ;CHECK:       vmulps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <16 x float> %a0, %a1
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_mulps_zmm_k_commuted(<16 x float> %a0, <16 x float> %a1, i16 %mask, <16 x float>* %passthru) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulps_zmm_k_commuted:
 | 
				
			||||||
 | 
					  ;CHECK:       vmulps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <16 x float> %a1, %a0
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = load <16 x float>, <16 x float>* %passthru
 | 
				
			||||||
 | 
					  %5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
 | 
				
			||||||
 | 
					  ret <16 x float> %5
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <16 x float> @stack_fold_mulps_zmm_kz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
 | 
				
			||||||
 | 
					  ;CHECK-LABEL: stack_fold_mulps_zmm_kz
 | 
				
			||||||
 | 
					  ;CHECK:       vmulps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 64-byte Folded Reload
 | 
				
			||||||
 | 
					  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
 | 
				
			||||||
 | 
					  %2 = fmul <16 x float> %a1, %a0
 | 
				
			||||||
 | 
					  %3 = bitcast i16 %mask to <16 x i1>
 | 
				
			||||||
 | 
					  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
 | 
				
			||||||
 | 
					  ret <16 x float> %4
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define double @stack_fold_mulsd(double %a0, double %a1) {
 | 
					define double @stack_fold_mulsd(double %a0, double %a1) {
 | 
				
			||||||
  ;CHECK-LABEL: stack_fold_mulsd
 | 
					  ;CHECK-LABEL: stack_fold_mulsd
 | 
				
			||||||
  ;CHECK:       vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
 | 
					  ;CHECK:       vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue