forked from OSchip/llvm-project
[X86] Standardize floating point assembly comments
Consistently try to use APFloat::toString for floating point constant comments to get rid of differences between Constant / ConstantDataSequential values - it should help stop some of the linux-windows buildbot failures matching NaN/INF etc. as well. Differential Revision: https://reviews.llvm.org/D52702 llvm-svn: 343562
This commit is contained in:
parent
c066a92657
commit
ad23f270db
|
|
@ -1482,6 +1482,12 @@ static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
|
||||||
return Comment;
|
return Comment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void printConstant(const APFloat& Flt, raw_ostream &CS) {
|
||||||
|
SmallString<32> Str;
|
||||||
|
Flt.toString(Str);
|
||||||
|
CS << Str;
|
||||||
|
}
|
||||||
|
|
||||||
static void printConstant(const Constant *COp, raw_ostream &CS) {
|
static void printConstant(const Constant *COp, raw_ostream &CS) {
|
||||||
if (isa<UndefValue>(COp)) {
|
if (isa<UndefValue>(COp)) {
|
||||||
CS << "u";
|
CS << "u";
|
||||||
|
|
@ -1500,9 +1506,7 @@ static void printConstant(const Constant *COp, raw_ostream &CS) {
|
||||||
CS << ")";
|
CS << ")";
|
||||||
}
|
}
|
||||||
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
|
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
|
||||||
SmallString<32> Str;
|
printConstant(CF->getValueAPF(), CS);
|
||||||
CF->getValueAPF().toString(Str);
|
|
||||||
CS << Str;
|
|
||||||
} else {
|
} else {
|
||||||
CS << "?";
|
CS << "?";
|
||||||
}
|
}
|
||||||
|
|
@ -2097,10 +2101,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||||
CS << ",";
|
CS << ",";
|
||||||
if (CDS->getElementType()->isIntegerTy())
|
if (CDS->getElementType()->isIntegerTy())
|
||||||
CS << CDS->getElementAsInteger(i);
|
CS << CDS->getElementAsInteger(i);
|
||||||
else if (CDS->getElementType()->isFloatTy())
|
else if (CDS->getElementType()->isHalfTy() ||
|
||||||
CS << CDS->getElementAsFloat(i);
|
CDS->getElementType()->isFloatTy() ||
|
||||||
else if (CDS->getElementType()->isDoubleTy())
|
CDS->getElementType()->isDoubleTy())
|
||||||
CS << CDS->getElementAsDouble(i);
|
printConstant(CDS->getElementAsAPFloat(i), CS);
|
||||||
else
|
else
|
||||||
CS << "?";
|
CS << "?";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
|
define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
|
||||||
; CHECK-LABEL: ui_to_fp_conv:
|
; CHECK-LABEL: ui_to_fp_conv:
|
||||||
; CHECK: # %bb.0: # %allocas
|
; CHECK: # %bb.0: # %allocas
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,0.000000e+00,0.000000e+00]
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0]
|
||||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||||
; CHECK-NEXT: movups %xmm1, 16(%rsi)
|
; CHECK-NEXT: movups %xmm1, 16(%rsi)
|
||||||
; CHECK-NEXT: movups %xmm0, (%rsi)
|
; CHECK-NEXT: movups %xmm0, (%rsi)
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@ define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
|
||||||
define <16 x float> @fneg(<16 x float> %a) nounwind {
|
define <16 x float> @fneg(<16 x float> %a) nounwind {
|
||||||
; CHECK-LABEL: fneg:
|
; CHECK-LABEL: fneg:
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [-0,-0,-0,-0,-0,-0,-0,-0]
|
||||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||||
; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1
|
; CHECK-NEXT: vxorps %ymm2, %ymm1, %ymm1
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
|
|
||||||
|
|
@ -316,12 +316,12 @@ entry:
|
||||||
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
|
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
|
||||||
; X32-LABEL: _e2:
|
; X32-LABEL: _e2:
|
||||||
; X32: ## %bb.0: ## %entry
|
; X32: ## %bb.0: ## %entry
|
||||||
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
|
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: _e2:
|
; X64-LABEL: _e2:
|
||||||
; X64: ## %bb.0: ## %entry
|
; X64: ## %bb.0: ## %entry
|
||||||
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
|
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-0.0078125,-0.0078125,-0.0078125,-0.0078125]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
|
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
|
||||||
|
|
|
||||||
|
|
@ -1641,7 +1641,7 @@ define <8 x float> @f8xf32_f64(<8 x float> %a) {
|
||||||
define <8 x float> @f8xf32_f128(<8 x float> %a) {
|
define <8 x float> @f8xf32_f128(<8 x float> %a) {
|
||||||
; AVX-LABEL: f8xf32_f128:
|
; AVX-LABEL: f8xf32_f128:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX-NEXT: # ymm1 = mem[0,1,0,1]
|
; AVX-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||||||
; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1649,7 +1649,7 @@ define <8 x float> @f8xf32_f128(<8 x float> %a) {
|
||||||
;
|
;
|
||||||
; ALL32-LABEL: f8xf32_f128:
|
; ALL32-LABEL: f8xf32_f128:
|
||||||
; ALL32: # %bb.0:
|
; ALL32: # %bb.0:
|
||||||
; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
|
||||||
; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
|
; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||||||
; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1657,7 +1657,7 @@ define <8 x float> @f8xf32_f128(<8 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX-64-LABEL: f8xf32_f128:
|
; AVX-64-LABEL: f8xf32_f128:
|
||||||
; AVX-64: # %bb.0:
|
; AVX-64: # %bb.0:
|
||||||
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
|
; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||||||
; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1665,7 +1665,7 @@ define <8 x float> @f8xf32_f128(<8 x float> %a) {
|
||||||
;
|
;
|
||||||
; ALL64-LABEL: f8xf32_f128:
|
; ALL64-LABEL: f8xf32_f128:
|
||||||
; ALL64: # %bb.0:
|
; ALL64: # %bb.0:
|
||||||
; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4,1,2,3,4,1,2,3]
|
||||||
; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
|
; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||||||
; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1735,7 +1735,7 @@ define <16 x float> @f16xf32_f64(<16 x float> %a) {
|
||||||
define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
||||||
; AVX-LABEL: f16xf32_f128:
|
; AVX-LABEL: f16xf32_f128:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1745,7 +1745,7 @@ define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: f16xf32_f128:
|
; AVX2-LABEL: f16xf32_f128:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1755,7 +1755,7 @@ define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f16xf32_f128:
|
; AVX512-LABEL: f16xf32_f128:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3]
|
||||||
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
||||||
|
|
@ -1763,7 +1763,7 @@ define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX-64-LABEL: f16xf32_f128:
|
; AVX-64-LABEL: f16xf32_f128:
|
||||||
; AVX-64: # %bb.0:
|
; AVX-64: # %bb.0:
|
||||||
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1773,7 +1773,7 @@ define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-64-LABEL: f16xf32_f128:
|
; AVX2-64-LABEL: f16xf32_f128:
|
||||||
; AVX2-64: # %bb.0:
|
; AVX2-64: # %bb.0:
|
||||||
; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1783,7 +1783,7 @@ define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX512F-64-LABEL: f16xf32_f128:
|
; AVX512F-64-LABEL: f16xf32_f128:
|
||||||
; AVX512F-64: # %bb.0:
|
; AVX512F-64: # %bb.0:
|
||||||
; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3]
|
||||||
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
||||||
|
|
@ -1797,7 +1797,7 @@ define <16 x float> @f16xf32_f128(<16 x float> %a) {
|
||||||
define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
||||||
; AVX-LABEL: f16xf32_f256:
|
; AVX-LABEL: f16xf32_f256:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
|
||||||
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1806,7 +1806,7 @@ define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: f16xf32_f256:
|
; AVX2-LABEL: f16xf32_f256:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
|
||||||
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1815,7 +1815,7 @@ define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f16xf32_f256:
|
; AVX512-LABEL: f16xf32_f256:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7]
|
||||||
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
||||||
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
||||||
|
|
@ -1823,7 +1823,7 @@ define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX-64-LABEL: f16xf32_f256:
|
; AVX-64-LABEL: f16xf32_f256:
|
||||||
; AVX-64: # %bb.0:
|
; AVX-64: # %bb.0:
|
||||||
; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
|
||||||
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1832,7 +1832,7 @@ define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-64-LABEL: f16xf32_f256:
|
; AVX2-64-LABEL: f16xf32_f256:
|
||||||
; AVX2-64: # %bb.0:
|
; AVX2-64: # %bb.0:
|
||||||
; AVX2-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; AVX2-64-NEXT: vmovaps {{.*#+}} ymm2 = [8,1,2,3,4,5,6,7]
|
||||||
; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1841,7 +1841,7 @@ define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
||||||
;
|
;
|
||||||
; AVX512F-64-LABEL: f16xf32_f256:
|
; AVX512F-64-LABEL: f16xf32_f256:
|
||||||
; AVX512F-64: # %bb.0:
|
; AVX512F-64: # %bb.0:
|
||||||
; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7]
|
||||||
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
||||||
; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
|
||||||
|
|
@ -1855,7 +1855,7 @@ define <16 x float> @f16xf32_f256(<16 x float> %a) {
|
||||||
define <4 x double> @f4xf64_f128(<4 x double> %a) {
|
define <4 x double> @f4xf64_f128(<4 x double> %a) {
|
||||||
; AVX-LABEL: f4xf64_f128:
|
; AVX-LABEL: f4xf64_f128:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
|
||||||
; AVX-NEXT: # ymm1 = mem[0,1,0,1]
|
; AVX-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||||
; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1863,7 +1863,7 @@ define <4 x double> @f4xf64_f128(<4 x double> %a) {
|
||||||
;
|
;
|
||||||
; ALL32-LABEL: f4xf64_f128:
|
; ALL32-LABEL: f4xf64_f128:
|
||||||
; ALL32: # %bb.0:
|
; ALL32: # %bb.0:
|
||||||
; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
|
||||||
; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
|
; ALL32-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; ALL32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
; ALL32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||||
; ALL32-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
; ALL32-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1871,7 +1871,7 @@ define <4 x double> @f4xf64_f128(<4 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX-64-LABEL: f4xf64_f128:
|
; AVX-64-LABEL: f4xf64_f128:
|
||||||
; AVX-64: # %bb.0:
|
; AVX-64: # %bb.0:
|
||||||
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
|
||||||
; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
|
; AVX-64-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; AVX-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
; AVX-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||||
; AVX-64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
; AVX-64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1879,7 +1879,7 @@ define <4 x double> @f4xf64_f128(<4 x double> %a) {
|
||||||
;
|
;
|
||||||
; ALL64-LABEL: f4xf64_f128:
|
; ALL64-LABEL: f4xf64_f128:
|
||||||
; ALL64: # %bb.0:
|
; ALL64: # %bb.0:
|
||||||
; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2,1,2,1]
|
||||||
; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
|
; ALL64-NEXT: # ymm1 = mem[0,1,0,1]
|
||||||
; ALL64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
; ALL64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||||
; ALL64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
; ALL64-NEXT: vdivpd %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -1893,7 +1893,7 @@ define <4 x double> @f4xf64_f128(<4 x double> %a) {
|
||||||
define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
||||||
; AVX-LABEL: f8xf64_f128:
|
; AVX-LABEL: f8xf64_f128:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
|
||||||
; AVX-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1903,7 +1903,7 @@ define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: f8xf64_f128:
|
; AVX2-LABEL: f8xf64_f128:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
|
||||||
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX2-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1913,7 +1913,7 @@ define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f8xf64_f128:
|
; AVX512-LABEL: f8xf64_f128:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2,1,2,1,2,1,2,1]
|
||||||
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
||||||
|
|
@ -1921,7 +1921,7 @@ define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX-64-LABEL: f8xf64_f128:
|
; AVX-64-LABEL: f8xf64_f128:
|
||||||
; AVX-64: # %bb.0:
|
; AVX-64: # %bb.0:
|
||||||
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
|
||||||
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1931,7 +1931,7 @@ define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-64-LABEL: f8xf64_f128:
|
; AVX2-64-LABEL: f8xf64_f128:
|
||||||
; AVX2-64: # %bb.0:
|
; AVX2-64: # %bb.0:
|
||||||
; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2,1,2,1]
|
||||||
; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
|
; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1]
|
||||||
; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1941,7 +1941,7 @@ define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX512F-64-LABEL: f8xf64_f128:
|
; AVX512F-64-LABEL: f8xf64_f128:
|
||||||
; AVX512F-64: # %bb.0:
|
; AVX512F-64: # %bb.0:
|
||||||
; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00]
|
; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2,1,2,1,2,1,2,1]
|
||||||
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
||||||
|
|
@ -1962,7 +1962,7 @@ define <8 x double> @f8xf64_f128(<8 x double> %a) {
|
||||||
define <8 x double> @f8xf64_f256(<8 x double> %a) {
|
define <8 x double> @f8xf64_f256(<8 x double> %a) {
|
||||||
; AVX-LABEL: f8xf64_f256:
|
; AVX-LABEL: f8xf64_f256:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
|
||||||
; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1971,7 +1971,7 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: f8xf64_f256:
|
; AVX2-LABEL: f8xf64_f256:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX2-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
|
||||||
; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1980,7 +1980,7 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f8xf64_f256:
|
; AVX512-LABEL: f8xf64_f256:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
||||||
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
||||||
|
|
@ -1988,7 +1988,7 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX-64-LABEL: f8xf64_f256:
|
; AVX-64-LABEL: f8xf64_f256:
|
||||||
; AVX-64: # %bb.0:
|
; AVX-64: # %bb.0:
|
||||||
; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
|
||||||
; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1997,7 +1997,7 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX2-64-LABEL: f8xf64_f256:
|
; AVX2-64-LABEL: f8xf64_f256:
|
||||||
; AVX2-64: # %bb.0:
|
; AVX2-64: # %bb.0:
|
||||||
; AVX2-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX2-64-NEXT: vmovapd {{.*#+}} ymm2 = [4,1,2,3]
|
||||||
; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -2006,7 +2006,7 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
|
||||||
;
|
;
|
||||||
; AVX512F-64-LABEL: f8xf64_f256:
|
; AVX512F-64-LABEL: f8xf64_f256:
|
||||||
; AVX512F-64: # %bb.0:
|
; AVX512F-64: # %bb.0:
|
||||||
; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00]
|
; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4,1,2,3,4,1,2,3]
|
||||||
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3]
|
||||||
; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0
|
||||||
|
|
|
||||||
|
|
@ -24,12 +24,12 @@ define float @combine_fabs_constant() {
|
||||||
define <4 x float> @combine_vec_fabs_constant() {
|
define <4 x float> @combine_vec_fabs_constant() {
|
||||||
; SSE-LABEL: combine_vec_fabs_constant:
|
; SSE-LABEL: combine_vec_fabs_constant:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.000000e+00,0.000000e+00,2.000000e+00,2.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,2,2]
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fabs_constant:
|
; AVX-LABEL: combine_vec_fabs_constant:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.000000e+00,0.000000e+00,2.000000e+00,2.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,2]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
|
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
|
||||||
ret <4 x float> %1
|
ret <4 x float> %1
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
|
; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
|
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
|
||||||
|
|
@ -77,7 +77,7 @@ define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
|
; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
|
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
|
||||||
|
|
@ -92,7 +92,7 @@ define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x flo
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
|
; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
|
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
|
||||||
|
|
@ -112,7 +112,7 @@ define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
|
; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
|
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
||||||
|
|
@ -134,7 +134,7 @@ define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
|
; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
|
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
||||||
|
|
@ -156,7 +156,7 @@ define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x flo
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
|
; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
|
; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
||||||
|
|
@ -178,7 +178,7 @@ define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x flo
|
||||||
;
|
;
|
||||||
; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
|
; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
|
; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
|
||||||
|
|
@ -202,7 +202,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm7
|
; SSE-NEXT: movaps {{.*#+}} xmm7
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm2
|
; SSE-NEXT: movaps %xmm0, %xmm2
|
||||||
; SSE-NEXT: andps %xmm7, %xmm2
|
; SSE-NEXT: andps %xmm7, %xmm2
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm8 = [-0.000000e+00,-0.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm8 = [-0,-0]
|
||||||
; SSE-NEXT: andps %xmm8, %xmm4
|
; SSE-NEXT: andps %xmm8, %xmm4
|
||||||
; SSE-NEXT: orps %xmm4, %xmm2
|
; SSE-NEXT: orps %xmm4, %xmm2
|
||||||
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
|
||||||
|
|
@ -232,7 +232,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float
|
||||||
; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
|
; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
|
||||||
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||||
; AVX-NEXT: vcvtps2pd %xmm1, %ymm1
|
; AVX-NEXT: vcvtps2pd %xmm1, %ymm1
|
||||||
; AVX-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
|
; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||||
; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
|
; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
|
|
@ -249,7 +249,7 @@ define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x doubl
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm5
|
; SSE-NEXT: movaps {{.*#+}} xmm5
|
||||||
; SSE-NEXT: andps %xmm5, %xmm0
|
; SSE-NEXT: andps %xmm5, %xmm0
|
||||||
; SSE-NEXT: cvtsd2ss %xmm1, %xmm6
|
; SSE-NEXT: cvtsd2ss %xmm1, %xmm6
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0,-0,-0,-0]
|
||||||
; SSE-NEXT: andps %xmm4, %xmm6
|
; SSE-NEXT: andps %xmm4, %xmm6
|
||||||
; SSE-NEXT: orps %xmm6, %xmm0
|
; SSE-NEXT: orps %xmm6, %xmm0
|
||||||
; SSE-NEXT: movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3]
|
; SSE-NEXT: movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3]
|
||||||
|
|
@ -282,7 +282,7 @@ define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x doubl
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
||||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
|
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
|
; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
|
||||||
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
|
; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0,-0,-0,-0]
|
||||||
; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
|
; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vzeroupper
|
; AVX-NEXT: vzeroupper
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ define <2 x float> @uitofp_2i32_buildvector_cvt(i32 %x, i32 %y, <2 x float> %v)
|
||||||
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||||
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||||
; X32-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
; X32-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||||
; X32-NEXT: movapd {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15]
|
; X32-NEXT: movapd {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
|
||||||
; X32-NEXT: orpd %xmm1, %xmm2
|
; X32-NEXT: orpd %xmm1, %xmm2
|
||||||
; X32-NEXT: subpd %xmm1, %xmm2
|
; X32-NEXT: subpd %xmm1, %xmm2
|
||||||
; X32-NEXT: cvtpd2ps %xmm2, %xmm1
|
; X32-NEXT: cvtpd2ps %xmm2, %xmm1
|
||||||
|
|
@ -56,7 +56,7 @@ define <2 x float> @uitofp_2i32_buildvector_cvt(i32 %x, i32 %y, <2 x float> %v)
|
||||||
; X64-NEXT: movd %esi, %xmm1
|
; X64-NEXT: movd %esi, %xmm1
|
||||||
; X64-NEXT: movd %edi, %xmm2
|
; X64-NEXT: movd %edi, %xmm2
|
||||||
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
|
||||||
; X64-NEXT: movdqa {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15]
|
; X64-NEXT: movdqa {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
|
||||||
; X64-NEXT: por %xmm1, %xmm2
|
; X64-NEXT: por %xmm1, %xmm2
|
||||||
; X64-NEXT: subpd %xmm1, %xmm2
|
; X64-NEXT: subpd %xmm1, %xmm2
|
||||||
; X64-NEXT: cvtpd2ps %xmm2, %xmm1
|
; X64-NEXT: cvtpd2ps %xmm2, %xmm1
|
||||||
|
|
@ -74,7 +74,7 @@ define <2 x float> @uitofp_2i32_legalized(<2 x i32> %in, <2 x float> %v) {
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: xorps %xmm2, %xmm2
|
; X32-NEXT: xorps %xmm2, %xmm2
|
||||||
; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||||
; X32-NEXT: movaps {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15]
|
; X32-NEXT: movaps {{.*#+}} xmm0 = [4503599627370496,4503599627370496]
|
||||||
; X32-NEXT: orps %xmm0, %xmm2
|
; X32-NEXT: orps %xmm0, %xmm2
|
||||||
; X32-NEXT: subpd %xmm0, %xmm2
|
; X32-NEXT: subpd %xmm0, %xmm2
|
||||||
; X32-NEXT: cvtpd2ps %xmm2, %xmm0
|
; X32-NEXT: cvtpd2ps %xmm2, %xmm0
|
||||||
|
|
@ -85,7 +85,7 @@ define <2 x float> @uitofp_2i32_legalized(<2 x i32> %in, <2 x float> %v) {
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: xorps %xmm2, %xmm2
|
; X64-NEXT: xorps %xmm2, %xmm2
|
||||||
; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||||
; X64-NEXT: movaps {{.*#+}} xmm0 = [4.503600e+15,4.503600e+15]
|
; X64-NEXT: movaps {{.*#+}} xmm0 = [4503599627370496,4503599627370496]
|
||||||
; X64-NEXT: orps %xmm0, %xmm2
|
; X64-NEXT: orps %xmm0, %xmm2
|
||||||
; X64-NEXT: subpd %xmm0, %xmm2
|
; X64-NEXT: subpd %xmm0, %xmm2
|
||||||
; X64-NEXT: cvtpd2ps %xmm2, %xmm0
|
; X64-NEXT: cvtpd2ps %xmm2, %xmm0
|
||||||
|
|
|
||||||
|
|
@ -160,7 +160,7 @@ entry:
|
||||||
define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
||||||
; CHECK-LABEL: test_mm_fnmsub_ps:
|
; CHECK-LABEL: test_mm_fnmsub_ps:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; CHECK-NEXT: vmovaps {{.*#+}} xmm3 = [-0,-0,-0,-0]
|
||||||
; CHECK-NEXT: vxorps %xmm3, %xmm0, %xmm4
|
; CHECK-NEXT: vxorps %xmm3, %xmm0, %xmm4
|
||||||
; CHECK-NEXT: vxorps %xmm3, %xmm2, %xmm0
|
; CHECK-NEXT: vxorps %xmm3, %xmm2, %xmm0
|
||||||
; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
|
; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
|
||||||
|
|
@ -175,7 +175,7 @@ entry:
|
||||||
define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||||
; CHECK-LABEL: test_mm_fnmsub_pd:
|
; CHECK-LABEL: test_mm_fnmsub_pd:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00]
|
; CHECK-NEXT: vmovapd {{.*#+}} xmm3 = [-0,-0]
|
||||||
; CHECK-NEXT: vxorpd %xmm3, %xmm0, %xmm4
|
; CHECK-NEXT: vxorpd %xmm3, %xmm0, %xmm4
|
||||||
; CHECK-NEXT: vxorpd %xmm3, %xmm2, %xmm0
|
; CHECK-NEXT: vxorpd %xmm3, %xmm2, %xmm0
|
||||||
; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
|
; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
|
||||||
|
|
@ -342,7 +342,7 @@ entry:
|
||||||
define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||||
; CHECK-LABEL: test_mm256_fnmsub_ps:
|
; CHECK-LABEL: test_mm256_fnmsub_ps:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [-0,-0,-0,-0,-0,-0,-0,-0]
|
||||||
; CHECK-NEXT: vxorps %ymm3, %ymm0, %ymm4
|
; CHECK-NEXT: vxorps %ymm3, %ymm0, %ymm4
|
||||||
; CHECK-NEXT: vxorps %ymm3, %ymm2, %ymm0
|
; CHECK-NEXT: vxorps %ymm3, %ymm2, %ymm0
|
||||||
; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
|
; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
|
||||||
|
|
@ -357,7 +357,7 @@ entry:
|
||||||
define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||||
; CHECK-LABEL: test_mm256_fnmsub_pd:
|
; CHECK-LABEL: test_mm256_fnmsub_pd:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = [-0,-0,-0,-0]
|
||||||
; CHECK-NEXT: vxorpd %ymm3, %ymm0, %ymm4
|
; CHECK-NEXT: vxorpd %ymm3, %ymm0, %ymm4
|
||||||
; CHECK-NEXT: vxorpd %ymm3, %ymm2, %ymm0
|
; CHECK-NEXT: vxorpd %ymm3, %ymm2, %ymm0
|
||||||
; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
|
; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
|
||||||
|
|
|
||||||
|
|
@ -715,14 +715,14 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y
|
||||||
define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
|
define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
|
; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||||
; FMA-INFS-NEXT: retq
|
; FMA-INFS-NEXT: retq
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
|
; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||||
; FMA4-INFS-NEXT: retq
|
; FMA4-INFS-NEXT: retq
|
||||||
|
|
@ -756,14 +756,14 @@ define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
|
||||||
define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
|
define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
|
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||||
; FMA-INFS-NEXT: retq
|
; FMA-INFS-NEXT: retq
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
|
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||||
; FMA4-INFS-NEXT: retq
|
; FMA4-INFS-NEXT: retq
|
||||||
|
|
@ -797,14 +797,14 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
|
||||||
define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
|
define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
|
; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||||
; FMA-INFS-NEXT: retq
|
; FMA-INFS-NEXT: retq
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
|
; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||||
; FMA4-INFS-NEXT: retq
|
; FMA4-INFS-NEXT: retq
|
||||||
|
|
@ -838,14 +838,14 @@ define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y
|
||||||
define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
|
define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
|
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||||
; FMA-INFS-NEXT: retq
|
; FMA-INFS-NEXT: retq
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
|
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||||
; FMA4-INFS-NEXT: retq
|
; FMA4-INFS-NEXT: retq
|
||||||
|
|
@ -1084,7 +1084,7 @@ define float @test_f32_interp(float %x, float %y, float %t) {
|
||||||
define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
|
define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
|
||||||
; FMA-INFS-LABEL: test_v4f32_interp:
|
; FMA-INFS-LABEL: test_v4f32_interp:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
||||||
; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
||||||
; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
|
; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
|
||||||
|
|
@ -1092,7 +1092,7 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v4f32_interp:
|
; FMA4-INFS-LABEL: test_v4f32_interp:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
|
||||||
; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
|
||||||
; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
|
; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
|
||||||
|
|
@ -1133,7 +1133,7 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
|
||||||
define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
|
define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
|
||||||
; FMA-INFS-LABEL: test_v8f32_interp:
|
; FMA-INFS-LABEL: test_v8f32_interp:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
||||||
; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
|
; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
|
||||||
|
|
@ -1141,7 +1141,7 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f32_interp:
|
; FMA4-INFS-LABEL: test_v8f32_interp:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -1231,7 +1231,7 @@ define double @test_f64_interp(double %x, double %y, double %t) {
|
||||||
define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
|
define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
|
||||||
; FMA-INFS-LABEL: test_v2f64_interp:
|
; FMA-INFS-LABEL: test_v2f64_interp:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1,1]
|
||||||
; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
||||||
; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
||||||
; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
|
; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
|
||||||
|
|
@ -1239,7 +1239,7 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v2f64_interp:
|
; FMA4-INFS-LABEL: test_v2f64_interp:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1,1]
|
||||||
; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
||||||
; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
||||||
; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
|
; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
|
||||||
|
|
@ -1247,7 +1247,7 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
|
||||||
;
|
;
|
||||||
; AVX512-INFS-LABEL: test_v2f64_interp:
|
; AVX512-INFS-LABEL: test_v2f64_interp:
|
||||||
; AVX512-INFS: # %bb.0:
|
; AVX512-INFS: # %bb.0:
|
||||||
; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
|
; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1,1]
|
||||||
; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
|
||||||
; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
|
||||||
; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
|
; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
|
||||||
|
|
@ -1280,7 +1280,7 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
|
||||||
define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
|
define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
|
||||||
; FMA-INFS-LABEL: test_v4f64_interp:
|
; FMA-INFS-LABEL: test_v4f64_interp:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
|
; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
|
||||||
|
|
@ -1288,7 +1288,7 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v4f64_interp:
|
; FMA4-INFS-LABEL: test_v4f64_interp:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
|
||||||
|
|
|
||||||
|
|
@ -259,7 +259,7 @@ define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <
|
||||||
define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
|
define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y:
|
; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -268,7 +268,7 @@ define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y:
|
; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -305,7 +305,7 @@ define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %
|
||||||
define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
|
define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
|
||||||
; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one:
|
; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -314,7 +314,7 @@ define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one:
|
; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -351,7 +351,7 @@ define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y
|
||||||
define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
|
define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
|
; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -360,7 +360,7 @@ define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
|
; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -397,7 +397,7 @@ define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float
|
||||||
define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
|
define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
|
||||||
; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
|
; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -406,7 +406,7 @@ define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double>
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
|
; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -443,7 +443,7 @@ define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double>
|
||||||
define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
|
define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
|
; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -452,7 +452,7 @@ define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
|
; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -490,7 +490,7 @@ define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %
|
||||||
define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
|
define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
|
||||||
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
|
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -499,7 +499,7 @@ define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
|
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -537,7 +537,7 @@ define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y
|
||||||
define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
|
define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
|
; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -546,7 +546,7 @@ define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
|
; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -584,7 +584,7 @@ define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float
|
||||||
define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
|
define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
|
||||||
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
|
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -593,7 +593,7 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double>
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
|
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -631,7 +631,7 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double>
|
||||||
define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
|
define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -640,7 +640,7 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -677,7 +677,7 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %
|
||||||
define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
|
define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
|
||||||
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -686,7 +686,7 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -723,7 +723,7 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y
|
||||||
define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
|
define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
|
||||||
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -732,7 +732,7 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1,-1,-1,-1,-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
|
|
@ -769,7 +769,7 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float
|
||||||
define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
|
define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
|
||||||
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -778,7 +778,7 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double>
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1,-1,-1,-1]
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -819,7 +819,7 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double>
|
||||||
define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
|
define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
|
||||||
; FMA-INFS-LABEL: test_v16f32_interp:
|
; FMA-INFS-LABEL: test_v16f32_interp:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
|
; FMA-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
|
||||||
; FMA-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
|
; FMA-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
|
||||||
; FMA-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
|
; FMA-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
|
||||||
|
|
@ -830,7 +830,7 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v16f32_interp:
|
; FMA4-INFS-LABEL: test_v16f32_interp:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
|
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7
|
||||||
; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
|
; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6
|
||||||
; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
|
; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3
|
||||||
|
|
@ -878,7 +878,7 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
|
||||||
define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
|
define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
|
||||||
; FMA-INFS-LABEL: test_v8f64_interp:
|
; FMA-INFS-LABEL: test_v8f64_interp:
|
||||||
; FMA-INFS: # %bb.0:
|
; FMA-INFS: # %bb.0:
|
||||||
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1,1,1,1]
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
|
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
|
||||||
; FMA-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
|
; FMA-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
|
||||||
; FMA-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
|
; FMA-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
|
||||||
|
|
@ -889,7 +889,7 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
|
||||||
;
|
;
|
||||||
; FMA4-INFS-LABEL: test_v8f64_interp:
|
; FMA4-INFS-LABEL: test_v8f64_interp:
|
||||||
; FMA4-INFS: # %bb.0:
|
; FMA4-INFS: # %bb.0:
|
||||||
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1,1,1,1]
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
|
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7
|
||||||
; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
|
; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6
|
||||||
; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
|
; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3
|
||||||
|
|
@ -1143,7 +1143,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
|
||||||
; FMA: # %bb.0:
|
; FMA: # %bb.0:
|
||||||
; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||||
; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
||||||
; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0,-0,-0,-0]
|
||||||
; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0
|
; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0
|
||||||
; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1
|
; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1
|
||||||
; FMA-NEXT: retq
|
; FMA-NEXT: retq
|
||||||
|
|
@ -1152,7 +1152,7 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %
|
||||||
; FMA4: # %bb.0:
|
; FMA4: # %bb.0:
|
||||||
; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
|
||||||
; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
|
||||||
; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0,-0,-0,-0]
|
||||||
; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0
|
; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0
|
||||||
; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1
|
; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1
|
||||||
; FMA4-NEXT: retq
|
; FMA4-NEXT: retq
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ define <4 x float> @fmul2_v4f32(<4 x float> %x) {
|
||||||
define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) {
|
define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) {
|
||||||
; CHECK-LABEL: constant_fold_fmul_v4f32:
|
; CHECK-LABEL: constant_fold_fmul_v4f32:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8.000000e+00,8.000000e+00,8.000000e+00,8.000000e+00]
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,8,8,8]
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%y = fmul <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float 2.0>
|
%y = fmul <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float 2.0>
|
||||||
ret <4 x float> %y
|
ret <4 x float> %y
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s
|
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s
|
||||||
|
|
||||||
; Check that constant integers are correctly being truncated before float conversion
|
; Check that constant integers are correctly being truncated before float conversion
|
||||||
|
|
||||||
define <4 x float> @test1() {
|
define <4 x float> @test1() {
|
||||||
; CHECK-LABEL: test1
|
; CHECK-LABEL: test1:
|
||||||
; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,0.000000e+00,-1.000000e+00,0.000000e+00]
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-1,0,-1,0]
|
||||||
|
; CHECK-NEXT: ret{{[l|q]}}
|
||||||
%1 = trunc <4 x i3> <i3 -1, i3 -22, i3 7, i3 8> to <4 x i1>
|
%1 = trunc <4 x i3> <i3 -1, i3 -22, i3 7, i3 8> to <4 x i1>
|
||||||
%2 = sitofp <4 x i1> %1 to <4 x float>
|
%2 = sitofp <4 x i1> %1 to <4 x float>
|
||||||
ret <4 x float> %2
|
ret <4 x float> %2
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
|
||||||
; SSE2-NEXT: movq %rcx, %xmm1
|
; SSE2-NEXT: movq %rcx, %xmm1
|
||||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||||
; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
|
; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; SSE2-NEXT: subpd %xmm3, %xmm1
|
; SSE2-NEXT: subpd %xmm3, %xmm1
|
||||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||||
|
|
@ -195,7 +195,7 @@ define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
|
||||||
; SSE2-NEXT: movq %rdx, %xmm1
|
; SSE2-NEXT: movq %rdx, %xmm1
|
||||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
||||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||||
; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
|
; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; SSE2-NEXT: subpd %xmm3, %xmm1
|
; SSE2-NEXT: subpd %xmm3, %xmm1
|
||||||
; SSE2-NEXT: movapd %xmm1, %xmm0
|
; SSE2-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||||
|
|
|
||||||
|
|
@ -295,7 +295,7 @@ define <8 x float> @elt6_v8f32(float %x) {
|
||||||
; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
; X32SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
||||||
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
|
; X32SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
|
||||||
; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
|
; X32SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
|
||||||
; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00]
|
; X32SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
|
||||||
; X32SSE2-NEXT: retl
|
; X32SSE2-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64SSE2-LABEL: elt6_v8f32:
|
; X64SSE2-LABEL: elt6_v8f32:
|
||||||
|
|
@ -303,21 +303,21 @@ define <8 x float> @elt6_v8f32(float %x) {
|
||||||
; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
; X64SSE2-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
||||||
; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
|
; X64SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
|
||||||
; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
|
; X64SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
|
||||||
; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00]
|
; X64SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
|
||||||
; X64SSE2-NEXT: retq
|
; X64SSE2-NEXT: retq
|
||||||
;
|
;
|
||||||
; X32SSE4-LABEL: elt6_v8f32:
|
; X32SSE4-LABEL: elt6_v8f32:
|
||||||
; X32SSE4: # %bb.0:
|
; X32SSE4: # %bb.0:
|
||||||
; X32SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
; X32SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
||||||
; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
|
; X32SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
|
||||||
; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00]
|
; X32SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
|
||||||
; X32SSE4-NEXT: retl
|
; X32SSE4-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64SSE4-LABEL: elt6_v8f32:
|
; X64SSE4-LABEL: elt6_v8f32:
|
||||||
; X64SSE4: # %bb.0:
|
; X64SSE4: # %bb.0:
|
||||||
; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
; X64SSE4-NEXT: movaps {{.*#+}} xmm1 = <4,5,u,7>
|
||||||
; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
|
; X64SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3]
|
||||||
; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.200000e+01,1.000000e+00,2.000000e+00,3.000000e+00]
|
; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3]
|
||||||
; X64SSE4-NEXT: retq
|
; X64SSE4-NEXT: retq
|
||||||
;
|
;
|
||||||
; X32AVX-LABEL: elt6_v8f32:
|
; X32AVX-LABEL: elt6_v8f32:
|
||||||
|
|
@ -415,18 +415,18 @@ define <8 x double> @elt1_v8f64(double %x) {
|
||||||
; X32SSE: # %bb.0:
|
; X32SSE: # %bb.0:
|
||||||
; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <42,u>
|
; X32SSE-NEXT: movapd {{.*#+}} xmm0 = <42,u>
|
||||||
; X32SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
; X32SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||||
; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2.000000e+00,3.000000e+00]
|
; X32SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
|
||||||
; X32SSE-NEXT: movaps {{.*#+}} xmm2 = [4.000000e+00,5.000000e+00]
|
; X32SSE-NEXT: movaps {{.*#+}} xmm2 = [4,5]
|
||||||
; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6.000000e+00,7.000000e+00]
|
; X32SSE-NEXT: movaps {{.*#+}} xmm3 = [6,7]
|
||||||
; X32SSE-NEXT: retl
|
; X32SSE-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64SSE-LABEL: elt1_v8f64:
|
; X64SSE-LABEL: elt1_v8f64:
|
||||||
; X64SSE: # %bb.0:
|
; X64SSE: # %bb.0:
|
||||||
; X64SSE-NEXT: movaps {{.*#+}} xmm4 = <42,u>
|
; X64SSE-NEXT: movaps {{.*#+}} xmm4 = <42,u>
|
||||||
; X64SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
|
; X64SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0]
|
||||||
; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2.000000e+00,3.000000e+00]
|
; X64SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
|
||||||
; X64SSE-NEXT: movaps {{.*#+}} xmm2 = [4.000000e+00,5.000000e+00]
|
; X64SSE-NEXT: movaps {{.*#+}} xmm2 = [4,5]
|
||||||
; X64SSE-NEXT: movaps {{.*#+}} xmm3 = [6.000000e+00,7.000000e+00]
|
; X64SSE-NEXT: movaps {{.*#+}} xmm3 = [6,7]
|
||||||
; X64SSE-NEXT: movaps %xmm4, %xmm0
|
; X64SSE-NEXT: movaps %xmm4, %xmm0
|
||||||
; X64SSE-NEXT: retq
|
; X64SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
@ -435,7 +435,7 @@ define <8 x double> @elt1_v8f64(double %x) {
|
||||||
; X32AVX2-NEXT: vmovapd {{.*#+}} ymm0 = <42,u,2,3>
|
; X32AVX2-NEXT: vmovapd {{.*#+}} ymm0 = <42,u,2,3>
|
||||||
; X32AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm0[0],mem[0]
|
; X32AVX2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm0[0],mem[0]
|
||||||
; X32AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
; X32AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
||||||
; X32AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; X32AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,6,7]
|
||||||
; X32AVX2-NEXT: retl
|
; X32AVX2-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64AVX2-LABEL: elt1_v8f64:
|
; X64AVX2-LABEL: elt1_v8f64:
|
||||||
|
|
@ -443,7 +443,7 @@ define <8 x double> @elt1_v8f64(double %x) {
|
||||||
; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <42,u,2,3>
|
; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = <42,u,2,3>
|
||||||
; X64AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
; X64AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||||
; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||||
; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; X64AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,6,7]
|
||||||
; X64AVX2-NEXT: retq
|
; X64AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; X32AVX512F-LABEL: elt1_v8f64:
|
; X32AVX512F-LABEL: elt1_v8f64:
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
|
||||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
|
; X86-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
|
||||||
; X86-SSE-NEXT: psrlq $63, %xmm4
|
; X86-SSE-NEXT: psrlq $63, %xmm4
|
||||||
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
|
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
|
||||||
; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.940656e-324,-0.000000e+00]
|
; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.9406564584124654E-324,-0]
|
||||||
; X86-SSE-NEXT: xorpd %xmm2, %xmm0
|
; X86-SSE-NEXT: xorpd %xmm2, %xmm0
|
||||||
; X86-SSE-NEXT: psubq %xmm2, %xmm0
|
; X86-SSE-NEXT: psubq %xmm2, %xmm0
|
||||||
; X86-SSE-NEXT: psrlq $63, %xmm3
|
; X86-SSE-NEXT: psrlq $63, %xmm3
|
||||||
|
|
|
||||||
|
|
@ -56,11 +56,11 @@ define <4 x float> @pow_v4f32_one_fourth_fmf(<4 x float> %x) nounwind {
|
||||||
; CHECK-NEXT: rsqrtps %xmm0, %xmm1
|
; CHECK-NEXT: rsqrtps %xmm0, %xmm1
|
||||||
; CHECK-NEXT: movaps %xmm0, %xmm2
|
; CHECK-NEXT: movaps %xmm0, %xmm2
|
||||||
; CHECK-NEXT: mulps %xmm1, %xmm2
|
; CHECK-NEXT: mulps %xmm1, %xmm2
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm3 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01]
|
; CHECK-NEXT: movaps {{.*#+}} xmm3 = [-0.5,-0.5,-0.5,-0.5]
|
||||||
; CHECK-NEXT: movaps %xmm2, %xmm4
|
; CHECK-NEXT: movaps %xmm2, %xmm4
|
||||||
; CHECK-NEXT: mulps %xmm3, %xmm4
|
; CHECK-NEXT: mulps %xmm3, %xmm4
|
||||||
; CHECK-NEXT: mulps %xmm1, %xmm2
|
; CHECK-NEXT: mulps %xmm1, %xmm2
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00]
|
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-3,-3,-3,-3]
|
||||||
; CHECK-NEXT: addps %xmm1, %xmm2
|
; CHECK-NEXT: addps %xmm1, %xmm2
|
||||||
; CHECK-NEXT: mulps %xmm4, %xmm2
|
; CHECK-NEXT: mulps %xmm4, %xmm2
|
||||||
; CHECK-NEXT: xorps %xmm4, %xmm4
|
; CHECK-NEXT: xorps %xmm4, %xmm4
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ define void @foo(%struct.anon* byval %p) nounwind {
|
||||||
; CHECK-NEXT: subl $28, %esp
|
; CHECK-NEXT: subl $28, %esp
|
||||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
|
; CHECK-NEXT: movaps {{.*#+}} xmm2 = [-0,-0,-0,-0]
|
||||||
; CHECK-NEXT: xorps %xmm2, %xmm0
|
; CHECK-NEXT: xorps %xmm2, %xmm0
|
||||||
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
|
||||||
; CHECK-NEXT: xorps %xmm2, %xmm1
|
; CHECK-NEXT: xorps %xmm2, %xmm1
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ define <8 x double> @test(<4 x double> %a, <4 x double> %b) {
|
||||||
; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
|
||||||
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
||||||
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm2[1],ymm1[3],ymm2[3]
|
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm2[1],ymm1[3],ymm2[3]
|
||||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [8.207174e-01,8.207174e-01]
|
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [0.82071743224100002,0.82071743224100002]
|
||||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
|
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%1 = shufflevector <4 x double> %a, <4 x double> <double undef, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C>, <8 x i32> <i32 6, i32 5, i32 2, i32 3, i32 5, i32 1, i32 3, i32 7>
|
%1 = shufflevector <4 x double> %a, <4 x double> <double undef, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C, double 0x3FEA435134576E1C>, <8 x i32> <i32 6, i32 5, i32 2, i32 3, i32 5, i32 1, i32 3, i32 7>
|
||||||
|
|
|
||||||
|
|
@ -283,32 +283,32 @@ define float @f32_two_step(float %x) #2 {
|
||||||
define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
||||||
; SSE-LABEL: v4f32_no_estimate:
|
; SSE-LABEL: v4f32_no_estimate:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: divps %xmm0, %xmm1
|
; SSE-NEXT: divps %xmm0, %xmm1
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-RECIP-LABEL: v4f32_no_estimate:
|
; AVX-RECIP-LABEL: v4f32_no_estimate:
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
; AVX-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||||
; AVX-RECIP-NEXT: retq
|
; AVX-RECIP-NEXT: retq
|
||||||
;
|
;
|
||||||
; FMA-RECIP-LABEL: v4f32_no_estimate:
|
; FMA-RECIP-LABEL: v4f32_no_estimate:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
; FMA-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||||
; FMA-RECIP-NEXT: retq
|
; FMA-RECIP-NEXT: retq
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v4f32_no_estimate:
|
; BTVER2-LABEL: v4f32_no_estimate:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
|
; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
|
||||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v4f32_no_estimate:
|
; SANDY-LABEL: v4f32_no_estimate:
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
|
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
|
||||||
; SANDY-NEXT: retq # sched: [1:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
|
|
@ -344,7 +344,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm2
|
; SSE-NEXT: rcpps %xmm0, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm0
|
; SSE-NEXT: mulps %xmm2, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: subps %xmm0, %xmm1
|
; SSE-NEXT: subps %xmm0, %xmm1
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm1
|
; SSE-NEXT: mulps %xmm2, %xmm1
|
||||||
; SSE-NEXT: addps %xmm2, %xmm1
|
; SSE-NEXT: addps %xmm2, %xmm1
|
||||||
|
|
@ -355,7 +355,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||||
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||||
|
|
@ -370,7 +370,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v4f32_one_step:
|
; BTVER2-LABEL: v4f32_one_step:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
|
|
@ -382,7 +382,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
|
|
@ -430,7 +430,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm2
|
; SSE-NEXT: rcpps %xmm0, %xmm2
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm3
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm3
|
; SSE-NEXT: mulps %xmm2, %xmm3
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm4
|
; SSE-NEXT: movaps %xmm1, %xmm4
|
||||||
; SSE-NEXT: subps %xmm3, %xmm4
|
; SSE-NEXT: subps %xmm3, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm4
|
; SSE-NEXT: mulps %xmm2, %xmm4
|
||||||
|
|
@ -446,7 +446,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
|
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
|
; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
|
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
|
||||||
; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||||
|
|
@ -459,7 +459,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
; FMA-RECIP-LABEL: v4f32_two_step:
|
; FMA-RECIP-LABEL: v4f32_two_step:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
|
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
|
||||||
|
|
@ -469,7 +469,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v4f32_two_step:
|
; BTVER2-LABEL: v4f32_two_step:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
|
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
|
|
@ -485,7 +485,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
|
@ -548,7 +548,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||||
; SSE-LABEL: v8f32_no_estimate:
|
; SSE-LABEL: v8f32_no_estimate:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm2, %xmm3
|
; SSE-NEXT: movaps %xmm2, %xmm3
|
||||||
; SSE-NEXT: divps %xmm0, %xmm3
|
; SSE-NEXT: divps %xmm0, %xmm3
|
||||||
; SSE-NEXT: divps %xmm1, %xmm2
|
; SSE-NEXT: divps %xmm1, %xmm2
|
||||||
|
|
@ -558,25 +558,25 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||||
;
|
;
|
||||||
; AVX-RECIP-LABEL: v8f32_no_estimate:
|
; AVX-RECIP-LABEL: v8f32_no_estimate:
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||||
; AVX-RECIP-NEXT: retq
|
; AVX-RECIP-NEXT: retq
|
||||||
;
|
;
|
||||||
; FMA-RECIP-LABEL: v8f32_no_estimate:
|
; FMA-RECIP-LABEL: v8f32_no_estimate:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||||
; FMA-RECIP-NEXT: retq
|
; FMA-RECIP-NEXT: retq
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v8f32_no_estimate:
|
; BTVER2-LABEL: v8f32_no_estimate:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [38:38.00]
|
; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [38:38.00]
|
||||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v8f32_no_estimate:
|
; SANDY-LABEL: v8f32_no_estimate:
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
|
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
|
||||||
; SANDY-NEXT: retq # sched: [1:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
|
|
@ -612,7 +612,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm4
|
; SSE-NEXT: rcpps %xmm0, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm0
|
; SSE-NEXT: mulps %xmm4, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm2, %xmm3
|
; SSE-NEXT: movaps %xmm2, %xmm3
|
||||||
; SSE-NEXT: subps %xmm0, %xmm3
|
; SSE-NEXT: subps %xmm0, %xmm3
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm3
|
; SSE-NEXT: mulps %xmm4, %xmm3
|
||||||
|
|
@ -630,7 +630,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -645,7 +645,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v8f32_one_step:
|
; BTVER2-LABEL: v8f32_one_step:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
|
||||||
|
|
@ -657,7 +657,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
|
|
@ -706,7 +706,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm3
|
; SSE-NEXT: rcpps %xmm0, %xmm3
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm4
|
; SSE-NEXT: movaps %xmm0, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm4
|
; SSE-NEXT: mulps %xmm3, %xmm4
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm5
|
; SSE-NEXT: movaps %xmm1, %xmm5
|
||||||
; SSE-NEXT: subps %xmm4, %xmm5
|
; SSE-NEXT: subps %xmm4, %xmm5
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm5
|
; SSE-NEXT: mulps %xmm3, %xmm5
|
||||||
|
|
@ -734,7 +734,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
|
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
|
; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
|
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
|
|
@ -747,7 +747,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
; FMA-RECIP-LABEL: v8f32_two_step:
|
; FMA-RECIP-LABEL: v8f32_two_step:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vmovaps %ymm1, %ymm3
|
; FMA-RECIP-NEXT: vmovaps %ymm1, %ymm3
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
|
||||||
|
|
@ -757,7 +757,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v8f32_two_step:
|
; BTVER2-LABEL: v8f32_two_step:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
|
||||||
|
|
@ -773,7 +773,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||||
|
|
@ -836,7 +836,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
|
define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
|
||||||
; SSE-LABEL: v16f32_no_estimate:
|
; SSE-LABEL: v16f32_no_estimate:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm4 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm4, %xmm5
|
; SSE-NEXT: movaps %xmm4, %xmm5
|
||||||
; SSE-NEXT: divps %xmm0, %xmm5
|
; SSE-NEXT: divps %xmm0, %xmm5
|
||||||
; SSE-NEXT: movaps %xmm4, %xmm6
|
; SSE-NEXT: movaps %xmm4, %xmm6
|
||||||
|
|
@ -852,28 +852,28 @@ define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
|
||||||
;
|
;
|
||||||
; AVX-RECIP-LABEL: v16f32_no_estimate:
|
; AVX-RECIP-LABEL: v16f32_no_estimate:
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
||||||
; AVX-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
|
; AVX-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
|
||||||
; AVX-RECIP-NEXT: retq
|
; AVX-RECIP-NEXT: retq
|
||||||
;
|
;
|
||||||
; FMA-RECIP-LABEL: v16f32_no_estimate:
|
; FMA-RECIP-LABEL: v16f32_no_estimate:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
||||||
; FMA-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
|
; FMA-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
|
||||||
; FMA-RECIP-NEXT: retq
|
; FMA-RECIP-NEXT: retq
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v16f32_no_estimate:
|
; BTVER2-LABEL: v16f32_no_estimate:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [38:38.00]
|
; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [38:38.00]
|
||||||
; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [38:38.00]
|
; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [38:38.00]
|
||||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v16f32_no_estimate:
|
; SANDY-LABEL: v16f32_no_estimate:
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
|
; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
|
||||||
; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
|
; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
|
||||||
; SANDY-NEXT: retq # sched: [1:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
|
|
@ -914,7 +914,7 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm5
|
; SSE-NEXT: movaps %xmm0, %xmm5
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm6
|
; SSE-NEXT: rcpps %xmm0, %xmm6
|
||||||
; SSE-NEXT: mulps %xmm6, %xmm5
|
; SSE-NEXT: mulps %xmm6, %xmm5
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm3, %xmm0
|
; SSE-NEXT: movaps %xmm3, %xmm0
|
||||||
; SSE-NEXT: subps %xmm5, %xmm0
|
; SSE-NEXT: subps %xmm5, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm6, %xmm0
|
; SSE-NEXT: mulps %xmm6, %xmm0
|
||||||
|
|
@ -944,7 +944,7 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
|
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -958,7 +958,7 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
|
||||||
; FMA-RECIP-LABEL: v16f32_one_step:
|
; FMA-RECIP-LABEL: v16f32_one_step:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
||||||
|
|
@ -968,7 +968,7 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v16f32_one_step:
|
; BTVER2-LABEL: v16f32_one_step:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
|
||||||
|
|
@ -985,7 +985,7 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
|
|
@ -1048,7 +1048,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm0
|
; SSE-NEXT: rcpps %xmm0, %xmm0
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm6
|
; SSE-NEXT: movaps %xmm1, %xmm6
|
||||||
; SSE-NEXT: mulps %xmm0, %xmm6
|
; SSE-NEXT: mulps %xmm0, %xmm6
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm3, %xmm7
|
; SSE-NEXT: movaps %xmm3, %xmm7
|
||||||
; SSE-NEXT: subps %xmm6, %xmm7
|
; SSE-NEXT: subps %xmm6, %xmm7
|
||||||
; SSE-NEXT: mulps %xmm0, %xmm7
|
; SSE-NEXT: mulps %xmm0, %xmm7
|
||||||
|
|
@ -1100,7 +1100,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm3
|
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm3
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
|
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
|
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
|
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
|
||||||
|
|
@ -1122,7 +1122,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
|
||||||
; FMA-RECIP-LABEL: v16f32_two_step:
|
; FMA-RECIP-LABEL: v16f32_two_step:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
|
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
|
||||||
|
|
@ -1138,7 +1138,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v16f32_two_step:
|
; BTVER2-LABEL: v16f32_two_step:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
|
||||||
|
|
@ -1163,7 +1163,7 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
|
||||||
|
|
|
||||||
|
|
@ -398,7 +398,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm2
|
; SSE-NEXT: rcpps %xmm0, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm0
|
; SSE-NEXT: mulps %xmm2, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: subps %xmm0, %xmm1
|
; SSE-NEXT: subps %xmm0, %xmm1
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm1
|
; SSE-NEXT: mulps %xmm2, %xmm1
|
||||||
; SSE-NEXT: addps %xmm2, %xmm1
|
; SSE-NEXT: addps %xmm2, %xmm1
|
||||||
|
|
@ -410,7 +410,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||||
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||||
|
|
@ -427,7 +427,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v4f32_one_step2:
|
; BTVER2-LABEL: v4f32_one_step2:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
|
|
@ -440,7 +440,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
|
|
@ -492,11 +492,11 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm1
|
; SSE-NEXT: rcpps %xmm0, %xmm1
|
||||||
; SSE-NEXT: mulps %xmm1, %xmm0
|
; SSE-NEXT: mulps %xmm1, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; SSE-NEXT: subps %xmm0, %xmm2
|
; SSE-NEXT: subps %xmm0, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm1, %xmm2
|
; SSE-NEXT: mulps %xmm1, %xmm2
|
||||||
; SSE-NEXT: addps %xmm1, %xmm2
|
; SSE-NEXT: addps %xmm1, %xmm2
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,3,4]
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm0
|
; SSE-NEXT: mulps %xmm2, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm0
|
; SSE-NEXT: mulps %xmm2, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
|
|
@ -505,7 +505,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
|
||||||
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||||
|
|
@ -524,7 +524,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v4f32_one_step_2_divs:
|
; BTVER2-LABEL: v4f32_one_step_2_divs:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
|
|
@ -538,7 +538,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
|
|
@ -597,7 +597,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm2
|
; SSE-NEXT: rcpps %xmm0, %xmm2
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm3
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm3
|
; SSE-NEXT: mulps %xmm2, %xmm3
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm4
|
; SSE-NEXT: movaps %xmm1, %xmm4
|
||||||
; SSE-NEXT: subps %xmm3, %xmm4
|
; SSE-NEXT: subps %xmm3, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm4
|
; SSE-NEXT: mulps %xmm2, %xmm4
|
||||||
|
|
@ -614,7 +614,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
|
; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
|
; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
|
||||||
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
|
; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
|
||||||
; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||||
|
|
@ -628,7 +628,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
; FMA-RECIP-LABEL: v4f32_two_step2:
|
; FMA-RECIP-LABEL: v4f32_two_step2:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
|
; FMA-RECIP-NEXT: vmovaps %xmm1, %xmm3
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
|
||||||
|
|
@ -639,7 +639,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v4f32_two_step2:
|
; BTVER2-LABEL: v4f32_two_step2:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
|
; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
|
||||||
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
|
|
@ -656,7 +656,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
|
@ -726,7 +726,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rcpps %xmm1, %xmm4
|
; SSE-NEXT: rcpps %xmm1, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm1
|
; SSE-NEXT: mulps %xmm4, %xmm1
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm2, %xmm3
|
; SSE-NEXT: movaps %xmm2, %xmm3
|
||||||
; SSE-NEXT: subps %xmm1, %xmm3
|
; SSE-NEXT: subps %xmm1, %xmm3
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm3
|
; SSE-NEXT: mulps %xmm4, %xmm3
|
||||||
|
|
@ -746,7 +746,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -763,7 +763,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v8f32_one_step2:
|
; BTVER2-LABEL: v8f32_one_step2:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
|
||||||
|
|
@ -776,7 +776,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
|
|
@ -828,7 +828,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm2
|
; SSE-NEXT: rcpps %xmm0, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm0
|
; SSE-NEXT: mulps %xmm2, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm3, %xmm4
|
; SSE-NEXT: movaps %xmm3, %xmm4
|
||||||
; SSE-NEXT: subps %xmm0, %xmm4
|
; SSE-NEXT: subps %xmm0, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm4
|
; SSE-NEXT: mulps %xmm2, %xmm4
|
||||||
|
|
@ -838,9 +838,9 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
||||||
; SSE-NEXT: subps %xmm1, %xmm3
|
; SSE-NEXT: subps %xmm1, %xmm3
|
||||||
; SSE-NEXT: mulps %xmm0, %xmm3
|
; SSE-NEXT: mulps %xmm0, %xmm3
|
||||||
; SSE-NEXT: addps %xmm0, %xmm3
|
; SSE-NEXT: addps %xmm0, %xmm3
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [5,6,7,8]
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm1
|
; SSE-NEXT: mulps %xmm3, %xmm1
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,3,4]
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm0
|
; SSE-NEXT: mulps %xmm4, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm0
|
; SSE-NEXT: mulps %xmm4, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm1
|
; SSE-NEXT: mulps %xmm3, %xmm1
|
||||||
|
|
@ -850,7 +850,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||||
|
|
@ -869,7 +869,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v8f32_one_step_2_divs:
|
; BTVER2-LABEL: v8f32_one_step_2_divs:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
|
||||||
|
|
@ -883,7 +883,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
|
|
@ -943,7 +943,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
||||||
; SSE-NEXT: rcpps %xmm1, %xmm3
|
; SSE-NEXT: rcpps %xmm1, %xmm3
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm4
|
; SSE-NEXT: movaps %xmm1, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm4
|
; SSE-NEXT: mulps %xmm3, %xmm4
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm5
|
; SSE-NEXT: movaps %xmm0, %xmm5
|
||||||
; SSE-NEXT: subps %xmm4, %xmm5
|
; SSE-NEXT: subps %xmm4, %xmm5
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm5
|
; SSE-NEXT: mulps %xmm3, %xmm5
|
||||||
|
|
@ -973,7 +973,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
|
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
|
; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
|
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||||
|
|
@ -987,7 +987,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
||||||
; FMA-RECIP-LABEL: v8f32_two_step2:
|
; FMA-RECIP-LABEL: v8f32_two_step2:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vmovaps %ymm1, %ymm3
|
; FMA-RECIP-NEXT: vmovaps %ymm1, %ymm3
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
|
||||||
|
|
@ -998,7 +998,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v8f32_two_step2:
|
; BTVER2-LABEL: v8f32_two_step2:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
|
||||||
|
|
@ -1015,7 +1015,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||||
|
|
@ -1198,7 +1198,7 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm6
|
; SSE-NEXT: movaps %xmm0, %xmm6
|
||||||
; SSE-NEXT: rcpps %xmm3, %xmm2
|
; SSE-NEXT: rcpps %xmm3, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm4
|
; SSE-NEXT: mulps %xmm2, %xmm4
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm3
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
||||||
; SSE-NEXT: subps %xmm4, %xmm3
|
; SSE-NEXT: subps %xmm4, %xmm3
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm3
|
; SSE-NEXT: mulps %xmm2, %xmm3
|
||||||
|
|
@ -1231,7 +1231,7 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm1
|
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm1
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm1, %ymm3, %ymm1
|
; AVX-RECIP-NEXT: vsubps %ymm1, %ymm3, %ymm1
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
|
; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm1, %ymm2, %ymm1
|
; AVX-RECIP-NEXT: vaddps %ymm1, %ymm2, %ymm1
|
||||||
|
|
@ -1247,7 +1247,7 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
|
||||||
; FMA-RECIP-LABEL: v16f32_one_step2:
|
; FMA-RECIP-LABEL: v16f32_one_step2:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
||||||
|
|
@ -1259,7 +1259,7 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v16f32_one_step2:
|
; BTVER2-LABEL: v16f32_one_step2:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
|
||||||
|
|
@ -1278,7 +1278,7 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
|
||||||
|
|
@ -1345,7 +1345,7 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rcpps %xmm0, %xmm6
|
; SSE-NEXT: rcpps %xmm0, %xmm6
|
||||||
; SSE-NEXT: mulps %xmm6, %xmm0
|
; SSE-NEXT: mulps %xmm6, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm4 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm4, %xmm5
|
; SSE-NEXT: movaps %xmm4, %xmm5
|
||||||
; SSE-NEXT: subps %xmm0, %xmm5
|
; SSE-NEXT: subps %xmm0, %xmm5
|
||||||
; SSE-NEXT: mulps %xmm6, %xmm5
|
; SSE-NEXT: mulps %xmm6, %xmm5
|
||||||
|
|
@ -1367,13 +1367,13 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
|
||||||
; SSE-NEXT: subps %xmm3, %xmm4
|
; SSE-NEXT: subps %xmm3, %xmm4
|
||||||
; SSE-NEXT: mulps %xmm0, %xmm4
|
; SSE-NEXT: mulps %xmm0, %xmm4
|
||||||
; SSE-NEXT: addps %xmm0, %xmm4
|
; SSE-NEXT: addps %xmm0, %xmm4
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.300000e+01,1.400000e+01,1.500000e+01,1.600000e+01]
|
; SSE-NEXT: movaps {{.*#+}} xmm3 = [13,14,15,16]
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm3
|
; SSE-NEXT: mulps %xmm4, %xmm3
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm2 = [9.000000e+00,1.000000e+01,1.100000e+01,1.200000e+01]
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = [9,10,11,12]
|
||||||
; SSE-NEXT: mulps %xmm7, %xmm2
|
; SSE-NEXT: mulps %xmm7, %xmm2
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [5,6,7,8]
|
||||||
; SSE-NEXT: mulps %xmm6, %xmm1
|
; SSE-NEXT: mulps %xmm6, %xmm1
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,3,4]
|
||||||
; SSE-NEXT: mulps %xmm5, %xmm0
|
; SSE-NEXT: mulps %xmm5, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm5, %xmm0
|
; SSE-NEXT: mulps %xmm5, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm6, %xmm1
|
; SSE-NEXT: mulps %xmm6, %xmm1
|
||||||
|
|
@ -1385,7 +1385,7 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
|
; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
|
; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
|
||||||
|
|
@ -1403,7 +1403,7 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
|
||||||
; FMA-RECIP-LABEL: v16f32_one_step_2_divs:
|
; FMA-RECIP-LABEL: v16f32_one_step_2_divs:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
||||||
|
|
@ -1417,7 +1417,7 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v16f32_one_step_2_divs:
|
; BTVER2-LABEL: v16f32_one_step_2_divs:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
|
||||||
|
|
@ -1438,7 +1438,7 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
|
||||||
|
|
@ -1517,7 +1517,7 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm4
|
; SSE-NEXT: movaps %xmm0, %xmm4
|
||||||
; SSE-NEXT: rcpps %xmm3, %xmm2
|
; SSE-NEXT: rcpps %xmm3, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm3
|
; SSE-NEXT: mulps %xmm2, %xmm3
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm7
|
; SSE-NEXT: movaps %xmm0, %xmm7
|
||||||
; SSE-NEXT: subps %xmm3, %xmm7
|
; SSE-NEXT: subps %xmm3, %xmm7
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm7
|
; SSE-NEXT: mulps %xmm2, %xmm7
|
||||||
|
|
@ -1573,7 +1573,7 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
|
||||||
; AVX-RECIP: # %bb.0:
|
; AVX-RECIP: # %bb.0:
|
||||||
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
|
; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
|
||||||
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
|
; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
|
||||||
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
|
; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
|
||||||
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
|
; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
|
||||||
|
|
@ -1597,7 +1597,7 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
|
||||||
; FMA-RECIP-LABEL: v16f32_two_step2:
|
; FMA-RECIP-LABEL: v16f32_two_step2:
|
||||||
; FMA-RECIP: # %bb.0:
|
; FMA-RECIP: # %bb.0:
|
||||||
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
|
||||||
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
|
||||||
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
|
; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
|
||||||
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
|
; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
|
||||||
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
|
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
|
||||||
|
|
@ -1615,7 +1615,7 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: v16f32_two_step2:
|
; BTVER2-LABEL: v16f32_two_step2:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
|
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
|
||||||
; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
|
; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00]
|
; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00]
|
||||||
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
|
; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
|
||||||
|
|
@ -1642,7 +1642,7 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
|
||||||
; SANDY: # %bb.0:
|
; SANDY: # %bb.0:
|
||||||
; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
|
; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
|
||||||
|
|
|
||||||
|
|
@ -468,10 +468,10 @@ define <2 x double> @sel_constants_fmul_constant_vec(i1 %cond) {
|
||||||
; CHECK-NEXT: testb $1, %dil
|
; CHECK-NEXT: testb $1, %dil
|
||||||
; CHECK-NEXT: jne .LBB37_1
|
; CHECK-NEXT: jne .LBB37_1
|
||||||
; CHECK-NEXT: # %bb.2:
|
; CHECK-NEXT: # %bb.2:
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.188300e+02,3.454000e+01]
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [118.83,34.539999999999999]
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
; CHECK-NEXT: .LBB37_1:
|
; CHECK-NEXT: .LBB37_1:
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-2.040000e+01,3.768000e+01]
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-20.399999999999999,37.68]
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, <2 x double> <double -4.0, double 12.0>, <2 x double> <double 23.3, double 11.0>
|
%sel = select i1 %cond, <2 x double> <double -4.0, double 12.0>, <2 x double> <double 23.3, double 11.0>
|
||||||
%bo = fmul <2 x double> %sel, <double 5.1, double 3.14>
|
%bo = fmul <2 x double> %sel, <double 5.1, double 3.14>
|
||||||
|
|
|
||||||
|
|
@ -178,13 +178,13 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
|
||||||
; SSE-NEXT: rsqrtps %xmm0, %xmm2
|
; SSE-NEXT: rsqrtps %xmm0, %xmm2
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm1
|
; SSE-NEXT: mulps %xmm2, %xmm1
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm3 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01]
|
; SSE-NEXT: movaps {{.*#+}} xmm3 = [-0.5,-0.5,-0.5,-0.5]
|
||||||
; SSE-NEXT: mulps %xmm1, %xmm3
|
; SSE-NEXT: mulps %xmm1, %xmm3
|
||||||
; SSE-NEXT: mulps %xmm2, %xmm1
|
; SSE-NEXT: mulps %xmm2, %xmm1
|
||||||
; SSE-NEXT: addps {{.*}}(%rip), %xmm1
|
; SSE-NEXT: addps {{.*}}(%rip), %xmm1
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm1
|
; SSE-NEXT: mulps %xmm3, %xmm1
|
||||||
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
|
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.175494e-38,1.175494e-38,1.175494e-38,1.175494e-38]
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
|
||||||
; SSE-NEXT: cmpleps %xmm0, %xmm2
|
; SSE-NEXT: cmpleps %xmm0, %xmm2
|
||||||
; SSE-NEXT: andps %xmm2, %xmm1
|
; SSE-NEXT: andps %xmm2, %xmm1
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||||
|
|
@ -199,7 +199,7 @@ define <4 x float> @sqrt_v4f32_check_denorms(<4 x float> %x) #3 {
|
||||||
; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm1, %xmm1
|
; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vmulps %xmm1, %xmm3, %xmm1
|
; AVX1-NEXT: vmulps %xmm1, %xmm3, %xmm1
|
||||||
; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.175494e-38,1.175494e-38,1.175494e-38,1.175494e-38]
|
; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
|
||||||
; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
|
; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
|
||||||
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
|
@ -282,14 +282,14 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
||||||
; SSE-LABEL: v4f32_no_estimate:
|
; SSE-LABEL: v4f32_no_estimate:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: sqrtps %xmm0, %xmm1
|
; SSE-NEXT: sqrtps %xmm0, %xmm1
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
|
||||||
; SSE-NEXT: divps %xmm1, %xmm0
|
; SSE-NEXT: divps %xmm1, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: v4f32_no_estimate:
|
; AVX1-LABEL: v4f32_no_estimate:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vsqrtps %xmm0, %xmm0
|
; AVX1-NEXT: vsqrtps %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
@ -347,7 +347,7 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: sqrtps %xmm1, %xmm2
|
; SSE-NEXT: sqrtps %xmm1, %xmm2
|
||||||
; SSE-NEXT: sqrtps %xmm0, %xmm3
|
; SSE-NEXT: sqrtps %xmm0, %xmm3
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||||
; SSE-NEXT: divps %xmm3, %xmm0
|
; SSE-NEXT: divps %xmm3, %xmm0
|
||||||
; SSE-NEXT: divps %xmm2, %xmm1
|
; SSE-NEXT: divps %xmm2, %xmm1
|
||||||
|
|
@ -356,7 +356,7 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||||
; AVX1-LABEL: v8f32_no_estimate:
|
; AVX1-LABEL: v8f32_no_estimate:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vsqrtps %ymm0, %ymm0
|
; AVX1-NEXT: vsqrtps %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX1-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
; AVX1-NEXT: vdivps %ymm0, %ymm1, %ymm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
@ -375,11 +375,11 @@ define <8 x float> @v8f32_estimate(<8 x float> %x) #1 {
|
||||||
; SSE-LABEL: v8f32_estimate:
|
; SSE-LABEL: v8f32_estimate:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: rsqrtps %xmm0, %xmm3
|
; SSE-NEXT: rsqrtps %xmm0, %xmm3
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm4 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01]
|
; SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.5,-0.5,-0.5,-0.5]
|
||||||
; SSE-NEXT: movaps %xmm3, %xmm2
|
; SSE-NEXT: movaps %xmm3, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm2
|
; SSE-NEXT: mulps %xmm3, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm0, %xmm2
|
; SSE-NEXT: mulps %xmm0, %xmm2
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3,-3,-3,-3]
|
||||||
; SSE-NEXT: addps %xmm0, %xmm2
|
; SSE-NEXT: addps %xmm0, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm4, %xmm2
|
; SSE-NEXT: mulps %xmm4, %xmm2
|
||||||
; SSE-NEXT: mulps %xmm3, %xmm2
|
; SSE-NEXT: mulps %xmm3, %xmm2
|
||||||
|
|
@ -426,7 +426,7 @@ define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
|
||||||
; SSE-NEXT: sqrtps %xmm2, %xmm5
|
; SSE-NEXT: sqrtps %xmm2, %xmm5
|
||||||
; SSE-NEXT: sqrtps %xmm1, %xmm2
|
; SSE-NEXT: sqrtps %xmm1, %xmm2
|
||||||
; SSE-NEXT: sqrtps %xmm0, %xmm1
|
; SSE-NEXT: sqrtps %xmm0, %xmm1
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm3 = [1,1,1,1]
|
||||||
; SSE-NEXT: movaps %xmm3, %xmm0
|
; SSE-NEXT: movaps %xmm3, %xmm0
|
||||||
; SSE-NEXT: divps %xmm1, %xmm0
|
; SSE-NEXT: divps %xmm1, %xmm0
|
||||||
; SSE-NEXT: movaps %xmm3, %xmm1
|
; SSE-NEXT: movaps %xmm3, %xmm1
|
||||||
|
|
@ -440,7 +440,7 @@ define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vsqrtps %ymm1, %ymm1
|
; AVX1-NEXT: vsqrtps %ymm1, %ymm1
|
||||||
; AVX1-NEXT: vsqrtps %ymm0, %ymm0
|
; AVX1-NEXT: vsqrtps %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||||
; AVX1-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
; AVX1-NEXT: vdivps %ymm0, %ymm2, %ymm0
|
||||||
; AVX1-NEXT: vdivps %ymm1, %ymm2, %ymm1
|
; AVX1-NEXT: vdivps %ymm1, %ymm2, %ymm1
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
|
@ -462,11 +462,11 @@ define <16 x float> @v16f32_estimate(<16 x float> %x) #1 {
|
||||||
; SSE-NEXT: movaps %xmm1, %xmm4
|
; SSE-NEXT: movaps %xmm1, %xmm4
|
||||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||||
; SSE-NEXT: rsqrtps %xmm0, %xmm5
|
; SSE-NEXT: rsqrtps %xmm0, %xmm5
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01]
|
; SSE-NEXT: movaps {{.*#+}} xmm6 = [-0.5,-0.5,-0.5,-0.5]
|
||||||
; SSE-NEXT: movaps %xmm5, %xmm0
|
; SSE-NEXT: movaps %xmm5, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm5, %xmm0
|
; SSE-NEXT: mulps %xmm5, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm1, %xmm0
|
; SSE-NEXT: mulps %xmm1, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm7 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00]
|
; SSE-NEXT: movaps {{.*#+}} xmm7 = [-3,-3,-3,-3]
|
||||||
; SSE-NEXT: addps %xmm7, %xmm0
|
; SSE-NEXT: addps %xmm7, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm6, %xmm0
|
; SSE-NEXT: mulps %xmm6, %xmm0
|
||||||
; SSE-NEXT: mulps %xmm5, %xmm0
|
; SSE-NEXT: mulps %xmm5, %xmm0
|
||||||
|
|
@ -498,10 +498,10 @@ define <16 x float> @v16f32_estimate(<16 x float> %x) #1 {
|
||||||
; AVX1-LABEL: v16f32_estimate:
|
; AVX1-LABEL: v16f32_estimate:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vrsqrtps %ymm0, %ymm2
|
; AVX1-NEXT: vrsqrtps %ymm0, %ymm2
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01]
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5,-0.5]
|
||||||
; AVX1-NEXT: vmulps %ymm2, %ymm2, %ymm4
|
; AVX1-NEXT: vmulps %ymm2, %ymm2, %ymm4
|
||||||
; AVX1-NEXT: vmulps %ymm4, %ymm0, %ymm0
|
; AVX1-NEXT: vmulps %ymm4, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00]
|
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [-3,-3,-3,-3,-3,-3,-3,-3]
|
||||||
; AVX1-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
; AVX1-NEXT: vaddps %ymm4, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vmulps %ymm0, %ymm3, %ymm0
|
; AVX1-NEXT: vmulps %ymm0, %ymm3, %ymm0
|
||||||
; AVX1-NEXT: vmulps %ymm0, %ymm2, %ymm0
|
; AVX1-NEXT: vmulps %ymm0, %ymm2, %ymm0
|
||||||
|
|
|
||||||
|
|
@ -395,7 +395,7 @@ define void @test12() nounwind {
|
||||||
; SSE-LABEL: test12:
|
; SSE-LABEL: test12:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movapd 0, %xmm0
|
; SSE-NEXT: movapd 0, %xmm0
|
||||||
; SSE-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
|
; SSE-NEXT: movapd {{.*#+}} xmm1 = [1,1,1,1]
|
||||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||||
; SSE-NEXT: xorps %xmm2, %xmm2
|
; SSE-NEXT: xorps %xmm2, %xmm2
|
||||||
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
|
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
|
||||||
|
|
|
||||||
|
|
@ -949,7 +949,7 @@ entry:
|
||||||
define void @fallback_broadcast_v4f64_to_v8f64(<4 x double> %a, <8 x double> %b) {
|
define void @fallback_broadcast_v4f64_to_v8f64(<4 x double> %a, <8 x double> %b) {
|
||||||
; X32-AVX-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
; X32-AVX-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
||||||
; X32-AVX: # %bb.0: # %entry
|
; X32-AVX: # %bb.0: # %entry
|
||||||
; X32-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; X32-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1,2,3,4]
|
||||||
; X32-AVX-NEXT: vaddpd %ymm3, %ymm0, %ymm0
|
; X32-AVX-NEXT: vaddpd %ymm3, %ymm0, %ymm0
|
||||||
; X32-AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm2
|
; X32-AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm2
|
||||||
; X32-AVX-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
; X32-AVX-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||||
|
|
@ -963,7 +963,7 @@ define void @fallback_broadcast_v4f64_to_v8f64(<4 x double> %a, <8 x double> %b)
|
||||||
;
|
;
|
||||||
; X32-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
; X32-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
||||||
; X32-AVX512: # %bb.0: # %entry
|
; X32-AVX512: # %bb.0: # %entry
|
||||||
; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; X32-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1,2,3,4]
|
||||||
; X32-AVX512-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; X32-AVX512-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
; X32-AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
|
; X32-AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
|
||||||
; X32-AVX512-NEXT: vaddpd %zmm2, %zmm1, %zmm1
|
; X32-AVX512-NEXT: vaddpd %zmm2, %zmm1, %zmm1
|
||||||
|
|
@ -975,7 +975,7 @@ define void @fallback_broadcast_v4f64_to_v8f64(<4 x double> %a, <8 x double> %b)
|
||||||
;
|
;
|
||||||
; X64-AVX-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
; X64-AVX-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
||||||
; X64-AVX: # %bb.0: # %entry
|
; X64-AVX: # %bb.0: # %entry
|
||||||
; X64-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; X64-AVX-NEXT: vmovapd {{.*#+}} ymm3 = [1,2,3,4]
|
||||||
; X64-AVX-NEXT: vaddpd %ymm3, %ymm0, %ymm0
|
; X64-AVX-NEXT: vaddpd %ymm3, %ymm0, %ymm0
|
||||||
; X64-AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm2
|
; X64-AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm2
|
||||||
; X64-AVX-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
; X64-AVX-NEXT: vaddpd %ymm3, %ymm1, %ymm1
|
||||||
|
|
@ -989,7 +989,7 @@ define void @fallback_broadcast_v4f64_to_v8f64(<4 x double> %a, <8 x double> %b)
|
||||||
;
|
;
|
||||||
; X64-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
; X64-AVX512-LABEL: fallback_broadcast_v4f64_to_v8f64:
|
||||||
; X64-AVX512: # %bb.0: # %entry
|
; X64-AVX512: # %bb.0: # %entry
|
||||||
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm2 = [1,2,3,4]
|
||||||
; X64-AVX512-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
; X64-AVX512-NEXT: vaddpd %ymm2, %ymm0, %ymm0
|
||||||
; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
|
; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm2, %zmm2
|
||||||
; X64-AVX512-NEXT: vaddpd %zmm2, %zmm1, %zmm1
|
; X64-AVX512-NEXT: vaddpd %zmm2, %zmm1, %zmm1
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i3
|
||||||
; CHECK-SSE1: # %bb.0:
|
; CHECK-SSE1: # %bb.0:
|
||||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||||
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
||||||
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
|
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
|
||||||
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
|
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
|
||||||
; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
|
; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
|
||||||
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
||||||
|
|
@ -122,7 +122,7 @@ define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py,
|
||||||
; CHECK-SSE1: # %bb.0:
|
; CHECK-SSE1: # %bb.0:
|
||||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||||
; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
|
; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
|
||||||
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
|
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
|
||||||
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
|
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
|
||||||
; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
|
; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
|
||||||
; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
|
; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
|
||||||
|
|
@ -382,7 +382,7 @@ define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py,
|
||||||
; CHECK-SSE1: # %bb.0:
|
; CHECK-SSE1: # %bb.0:
|
||||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||||
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
||||||
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
|
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
|
||||||
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
|
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
|
||||||
; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
|
; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
|
||||||
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
||||||
|
|
@ -422,7 +422,7 @@ define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py,
|
||||||
; CHECK-SSE1: # %bb.0:
|
; CHECK-SSE1: # %bb.0:
|
||||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||||
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
||||||
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [nan,nan,nan,nan]
|
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
|
||||||
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
|
; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
|
||||||
; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
|
; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
|
||||||
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
||||||
|
|
@ -461,7 +461,7 @@ define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32>
|
||||||
; CHECK-SSE1: # %bb.0:
|
; CHECK-SSE1: # %bb.0:
|
||||||
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
; CHECK-SSE1-NEXT: movq %rdi, %rax
|
||||||
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
|
||||||
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.885454e-44,5.885454e-44,5.885454e-44,5.885454e-44]
|
; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.88545355E-44,5.88545355E-44,5.88545355E-44,5.88545355E-44]
|
||||||
; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
|
; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
|
||||||
; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
|
; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
|
||||||
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,12 @@
|
||||||
define <4 x float> @foo() {
|
define <4 x float> @foo() {
|
||||||
; X32-LABEL: foo:
|
; X32-LABEL: foo:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: movaps {{.*#+}} xmm0 = [3.223542e+00,2.300000e+00,1.200000e+00,1.000000e-01]
|
; X32-NEXT: movaps {{.*#+}} xmm0 = [3.22354245,2.29999995,1.20000005,0.100000001]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: foo:
|
; X64-LABEL: foo:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: movaps {{.*#+}} xmm0 = [3.223542e+00,2.300000e+00,1.200000e+00,1.000000e-01]
|
; X64-NEXT: movaps {{.*#+}} xmm0 = [3.22354245,2.29999995,1.20000005,0.100000001]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
|
ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) {
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15]
|
; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
|
||||||
; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
|
; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||||
; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0
|
; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0
|
||||||
; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
|
; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
|
||||||
|
|
@ -99,7 +99,7 @@ define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) {
|
||||||
; CHECK-WIDE-LABEL: cvt_v2u32_v2f32:
|
; CHECK-WIDE-LABEL: cvt_v2u32_v2f32:
|
||||||
; CHECK-WIDE: ## %bb.0:
|
; CHECK-WIDE: ## %bb.0:
|
||||||
; CHECK-WIDE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
; CHECK-WIDE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||||
; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15]
|
; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm1 = [4503599627370496,4503599627370496]
|
||||||
; CHECK-WIDE-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; CHECK-WIDE-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||||
; CHECK-WIDE-NEXT: vsubpd %xmm1, %xmm0, %xmm0
|
; CHECK-WIDE-NEXT: vsubpd %xmm1, %xmm0, %xmm0
|
||||||
; CHECK-WIDE-NEXT: vcvtpd2ps %xmm0, %xmm0
|
; CHECK-WIDE-NEXT: vcvtpd2ps %xmm0, %xmm0
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,6 @@
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ
|
||||||
|
|
||||||
; FIXME: Drop the regex pattern matching of 'nan' once we drop support for MSVC
|
|
||||||
; 2013.
|
|
||||||
|
|
||||||
define <2 x double> @fabs_v2f64(<2 x double> %p) {
|
define <2 x double> @fabs_v2f64(<2 x double> %p) {
|
||||||
; X32-LABEL: fabs_v2f64:
|
; X32-LABEL: fabs_v2f64:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
|
|
@ -132,7 +129,7 @@ declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
|
||||||
define <8 x double> @fabs_v8f64(<8 x double> %p) {
|
define <8 x double> @fabs_v8f64(<8 x double> %p) {
|
||||||
; X32_AVX-LABEL: fabs_v8f64:
|
; X32_AVX-LABEL: fabs_v8f64:
|
||||||
; X32_AVX: # %bb.0:
|
; X32_AVX: # %bb.0:
|
||||||
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
|
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
|
||||||
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||||
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||||
; X32_AVX-NEXT: retl
|
; X32_AVX-NEXT: retl
|
||||||
|
|
@ -149,7 +146,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
|
||||||
;
|
;
|
||||||
; X64_AVX-LABEL: fabs_v8f64:
|
; X64_AVX-LABEL: fabs_v8f64:
|
||||||
; X64_AVX: # %bb.0:
|
; X64_AVX: # %bb.0:
|
||||||
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
|
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
|
||||||
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||||
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||||
; X64_AVX-NEXT: retq
|
; X64_AVX-NEXT: retq
|
||||||
|
|
@ -171,7 +168,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
||||||
define <16 x float> @fabs_v16f32(<16 x float> %p) {
|
define <16 x float> @fabs_v16f32(<16 x float> %p) {
|
||||||
; X32_AVX-LABEL: fabs_v16f32:
|
; X32_AVX-LABEL: fabs_v16f32:
|
||||||
; X32_AVX: # %bb.0:
|
; X32_AVX: # %bb.0:
|
||||||
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
|
; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||||
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||||
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||||
; X32_AVX-NEXT: retl
|
; X32_AVX-NEXT: retl
|
||||||
|
|
@ -188,7 +185,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
|
||||||
;
|
;
|
||||||
; X64_AVX-LABEL: fabs_v16f32:
|
; X64_AVX-LABEL: fabs_v16f32:
|
||||||
; X64_AVX: # %bb.0:
|
; X64_AVX: # %bb.0:
|
||||||
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
|
; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
||||||
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||||
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||||
; X64_AVX-NEXT: retq
|
; X64_AVX-NEXT: retq
|
||||||
|
|
|
||||||
|
|
@ -703,17 +703,17 @@ declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
|
||||||
define <2 x double> @const_floor_v2f64() {
|
define <2 x double> @const_floor_v2f64() {
|
||||||
; SSE41-LABEL: const_floor_v2f64:
|
; SSE41-LABEL: const_floor_v2f64:
|
||||||
; SSE41: ## %bb.0:
|
; SSE41: ## %bb.0:
|
||||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
|
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2,2]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: const_floor_v2f64:
|
; AVX-LABEL: const_floor_v2f64:
|
||||||
; AVX: ## %bb.0:
|
; AVX: ## %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: const_floor_v2f64:
|
; AVX512-LABEL: const_floor_v2f64:
|
||||||
; AVX512: ## %bb.0:
|
; AVX512: ## %bb.0:
|
||||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>)
|
%t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>)
|
||||||
ret <2 x double> %t
|
ret <2 x double> %t
|
||||||
|
|
@ -722,17 +722,17 @@ define <2 x double> @const_floor_v2f64() {
|
||||||
define <4 x float> @const_floor_v4f32() {
|
define <4 x float> @const_floor_v4f32() {
|
||||||
; SSE41-LABEL: const_floor_v4f32:
|
; SSE41-LABEL: const_floor_v4f32:
|
||||||
; SSE41: ## %bb.0:
|
; SSE41: ## %bb.0:
|
||||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
|
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4,6,-9,2]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: const_floor_v4f32:
|
; AVX-LABEL: const_floor_v4f32:
|
||||||
; AVX: ## %bb.0:
|
; AVX: ## %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4,6,-9,2]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: const_floor_v4f32:
|
; AVX512-LABEL: const_floor_v4f32:
|
||||||
; AVX512: ## %bb.0:
|
; AVX512: ## %bb.0:
|
||||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-4,6,-9,2]
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
|
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
|
||||||
ret <4 x float> %t
|
ret <4 x float> %t
|
||||||
|
|
@ -741,17 +741,17 @@ define <4 x float> @const_floor_v4f32() {
|
||||||
define <2 x double> @const_ceil_v2f64() {
|
define <2 x double> @const_ceil_v2f64() {
|
||||||
; SSE41-LABEL: const_ceil_v2f64:
|
; SSE41-LABEL: const_ceil_v2f64:
|
||||||
; SSE41: ## %bb.0:
|
; SSE41: ## %bb.0:
|
||||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
|
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1,3]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: const_ceil_v2f64:
|
; AVX-LABEL: const_ceil_v2f64:
|
||||||
; AVX: ## %bb.0:
|
; AVX: ## %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1,3]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: const_ceil_v2f64:
|
; AVX512-LABEL: const_ceil_v2f64:
|
||||||
; AVX512: ## %bb.0:
|
; AVX512: ## %bb.0:
|
||||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1,3]
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>)
|
%t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>)
|
||||||
ret <2 x double> %t
|
ret <2 x double> %t
|
||||||
|
|
@ -760,17 +760,17 @@ define <2 x double> @const_ceil_v2f64() {
|
||||||
define <4 x float> @const_ceil_v4f32() {
|
define <4 x float> @const_ceil_v4f32() {
|
||||||
; SSE41-LABEL: const_ceil_v4f32:
|
; SSE41-LABEL: const_ceil_v4f32:
|
||||||
; SSE41: ## %bb.0:
|
; SSE41: ## %bb.0:
|
||||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
|
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3,6,-9,3]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: const_ceil_v4f32:
|
; AVX-LABEL: const_ceil_v4f32:
|
||||||
; AVX: ## %bb.0:
|
; AVX: ## %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,3]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: const_ceil_v4f32:
|
; AVX512-LABEL: const_ceil_v4f32:
|
||||||
; AVX512: ## %bb.0:
|
; AVX512: ## %bb.0:
|
||||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,3]
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
|
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
|
||||||
ret <4 x float> %t
|
ret <4 x float> %t
|
||||||
|
|
@ -779,17 +779,17 @@ define <4 x float> @const_ceil_v4f32() {
|
||||||
define <2 x double> @const_trunc_v2f64() {
|
define <2 x double> @const_trunc_v2f64() {
|
||||||
; SSE41-LABEL: const_trunc_v2f64:
|
; SSE41-LABEL: const_trunc_v2f64:
|
||||||
; SSE41: ## %bb.0:
|
; SSE41: ## %bb.0:
|
||||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
|
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1,2]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: const_trunc_v2f64:
|
; AVX-LABEL: const_trunc_v2f64:
|
||||||
; AVX: ## %bb.0:
|
; AVX: ## %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1,2]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: const_trunc_v2f64:
|
; AVX512-LABEL: const_trunc_v2f64:
|
||||||
; AVX512: ## %bb.0:
|
; AVX512: ## %bb.0:
|
||||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1,2]
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>)
|
%t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>)
|
||||||
ret <2 x double> %t
|
ret <2 x double> %t
|
||||||
|
|
@ -798,17 +798,17 @@ define <2 x double> @const_trunc_v2f64() {
|
||||||
define <4 x float> @const_trunc_v4f32() {
|
define <4 x float> @const_trunc_v4f32() {
|
||||||
; SSE41-LABEL: const_trunc_v4f32:
|
; SSE41-LABEL: const_trunc_v4f32:
|
||||||
; SSE41: ## %bb.0:
|
; SSE41: ## %bb.0:
|
||||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
|
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3,6,-9,2]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: const_trunc_v4f32:
|
; AVX-LABEL: const_trunc_v4f32:
|
||||||
; AVX: ## %bb.0:
|
; AVX: ## %bb.0:
|
||||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,2]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: const_trunc_v4f32:
|
; AVX512-LABEL: const_trunc_v4f32:
|
||||||
; AVX512: ## %bb.0:
|
; AVX512: ## %bb.0:
|
||||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
|
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3,6,-9,2]
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
|
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
|
||||||
ret <4 x float> %t
|
ret <4 x float> %t
|
||||||
|
|
|
||||||
|
|
@ -253,42 +253,42 @@ entry:
|
||||||
define <2 x double> @fpext_fromconst() {
|
define <2 x double> @fpext_fromconst() {
|
||||||
; X32-SSE-LABEL: fpext_fromconst:
|
; X32-SSE-LABEL: fpext_fromconst:
|
||||||
; X32-SSE: # %bb.0: # %entry
|
; X32-SSE: # %bb.0: # %entry
|
||||||
; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
|
; X32-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,-2]
|
||||||
; X32-SSE-NEXT: # encoding: [0x0f,0x28,0x05,A,A,A,A]
|
; X32-SSE-NEXT: # encoding: [0x0f,0x28,0x05,A,A,A,A]
|
||||||
; X32-SSE-NEXT: # fixup A - offset: 3, value: {{\.LCPI.*}}, kind: FK_Data_4
|
; X32-SSE-NEXT: # fixup A - offset: 3, value: {{\.LCPI.*}}, kind: FK_Data_4
|
||||||
; X32-SSE-NEXT: retl # encoding: [0xc3]
|
; X32-SSE-NEXT: retl # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; X32-AVX-LABEL: fpext_fromconst:
|
; X32-AVX-LABEL: fpext_fromconst:
|
||||||
; X32-AVX: # %bb.0: # %entry
|
; X32-AVX: # %bb.0: # %entry
|
||||||
; X32-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
|
; X32-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,-2]
|
||||||
; X32-AVX-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
; X32-AVX-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||||
; X32-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
|
; X32-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
|
||||||
; X32-AVX-NEXT: retl # encoding: [0xc3]
|
; X32-AVX-NEXT: retl # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; X32-AVX512VL-LABEL: fpext_fromconst:
|
; X32-AVX512VL-LABEL: fpext_fromconst:
|
||||||
; X32-AVX512VL: # %bb.0: # %entry
|
; X32-AVX512VL: # %bb.0: # %entry
|
||||||
; X32-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [1.000000e+00,-2.000000e+00]
|
; X32-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [1,-2]
|
||||||
; X32-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
; X32-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||||
; X32-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
|
; X32-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
|
||||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; X64-SSE-LABEL: fpext_fromconst:
|
; X64-SSE-LABEL: fpext_fromconst:
|
||||||
; X64-SSE: # %bb.0: # %entry
|
; X64-SSE: # %bb.0: # %entry
|
||||||
; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
|
; X64-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,-2]
|
||||||
; X64-SSE-NEXT: # encoding: [0x0f,0x28,0x05,A,A,A,A]
|
; X64-SSE-NEXT: # encoding: [0x0f,0x28,0x05,A,A,A,A]
|
||||||
; X64-SSE-NEXT: # fixup A - offset: 3, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
|
; X64-SSE-NEXT: # fixup A - offset: 3, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
|
||||||
; X64-SSE-NEXT: retq # encoding: [0xc3]
|
; X64-SSE-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; X64-AVX-LABEL: fpext_fromconst:
|
; X64-AVX-LABEL: fpext_fromconst:
|
||||||
; X64-AVX: # %bb.0: # %entry
|
; X64-AVX: # %bb.0: # %entry
|
||||||
; X64-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
|
; X64-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,-2]
|
||||||
; X64-AVX-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
; X64-AVX-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||||
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
|
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
|
||||||
; X64-AVX-NEXT: retq # encoding: [0xc3]
|
; X64-AVX-NEXT: retq # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; X64-AVX512VL-LABEL: fpext_fromconst:
|
; X64-AVX512VL-LABEL: fpext_fromconst:
|
||||||
; X64-AVX512VL: # %bb.0: # %entry
|
; X64-AVX512VL: # %bb.0: # %entry
|
||||||
; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [1.000000e+00,-2.000000e+00]
|
; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [1,-2]
|
||||||
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||||
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
|
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
|
||||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||||
|
|
|
||||||
|
|
@ -415,7 +415,7 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
||||||
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||||
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||||
; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
|
; SSE-NEXT: movapd {{.*#+}} xmm4 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; SSE-NEXT: subpd %xmm4, %xmm0
|
; SSE-NEXT: subpd %xmm4, %xmm0
|
||||||
; SSE-NEXT: movapd %xmm0, %xmm1
|
; SSE-NEXT: movapd %xmm0, %xmm1
|
||||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
|
||||||
|
|
@ -433,7 +433,7 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
|
||||||
; VEX: # %bb.0:
|
; VEX: # %bb.0:
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
|
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
|
||||||
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||||
|
|
@ -698,7 +698,7 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
|
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
|
||||||
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
|
||||||
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
|
||||||
; SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25]
|
; SSE-NEXT: movapd {{.*#+}} xmm5 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; SSE-NEXT: subpd %xmm5, %xmm2
|
; SSE-NEXT: subpd %xmm5, %xmm2
|
||||||
; SSE-NEXT: movapd %xmm2, %xmm0
|
; SSE-NEXT: movapd %xmm2, %xmm0
|
||||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
|
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
|
||||||
|
|
@ -729,7 +729,7 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
|
||||||
; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
|
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
|
||||||
; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||||
|
|
@ -797,7 +797,7 @@ define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
|
||||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE-NEXT: psrld $16, %xmm1
|
; SSE-NEXT: psrld $16, %xmm1
|
||||||
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
|
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||||
; SSE-NEXT: movapd {{.*#+}} xmm2 = [6.553600e+04,6.553600e+04]
|
; SSE-NEXT: movapd {{.*#+}} xmm2 = [65536,65536]
|
||||||
; SSE-NEXT: mulpd %xmm2, %xmm1
|
; SSE-NEXT: mulpd %xmm2, %xmm1
|
||||||
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||||
|
|
@ -2329,7 +2329,7 @@ define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
|
||||||
; SSE-NEXT: psrld $16, %xmm0
|
; SSE-NEXT: psrld $16, %xmm0
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
|
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
|
||||||
; SSE-NEXT: por %xmm5, %xmm0
|
; SSE-NEXT: por %xmm5, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
|
; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11]
|
||||||
; SSE-NEXT: addps %xmm6, %xmm0
|
; SSE-NEXT: addps %xmm6, %xmm0
|
||||||
; SSE-NEXT: addps %xmm3, %xmm0
|
; SSE-NEXT: addps %xmm3, %xmm0
|
||||||
; SSE-NEXT: pand %xmm1, %xmm2
|
; SSE-NEXT: pand %xmm1, %xmm2
|
||||||
|
|
@ -2804,7 +2804,7 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) {
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
||||||
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||||
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||||
; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
|
; SSE-NEXT: movapd {{.*#+}} xmm4 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; SSE-NEXT: subpd %xmm4, %xmm1
|
; SSE-NEXT: subpd %xmm4, %xmm1
|
||||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||||
|
|
@ -2822,7 +2822,7 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) {
|
||||||
; VEX-NEXT: vmovapd (%rdi), %xmm0
|
; VEX-NEXT: vmovapd (%rdi), %xmm0
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
|
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
|
||||||
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||||
|
|
@ -2972,7 +2972,7 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) {
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
|
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
|
||||||
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
|
||||||
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
|
||||||
; SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25]
|
; SSE-NEXT: movapd {{.*#+}} xmm5 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; SSE-NEXT: subpd %xmm5, %xmm1
|
; SSE-NEXT: subpd %xmm5, %xmm1
|
||||||
; SSE-NEXT: movapd %xmm1, %xmm0
|
; SSE-NEXT: movapd %xmm1, %xmm0
|
||||||
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||||
|
|
@ -3003,7 +3003,7 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) {
|
||||||
; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||||
; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
|
; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4503599627370496,1.9342813113834067E+25]
|
||||||
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
|
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
|
||||||
; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
|
||||||
; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||||
|
|
@ -3075,7 +3075,7 @@ define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) {
|
||||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE-NEXT: psrld $16, %xmm1
|
; SSE-NEXT: psrld $16, %xmm1
|
||||||
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
|
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
|
||||||
; SSE-NEXT: movapd {{.*#+}} xmm2 = [6.553600e+04,6.553600e+04]
|
; SSE-NEXT: movapd {{.*#+}} xmm2 = [65536,65536]
|
||||||
; SSE-NEXT: mulpd %xmm2, %xmm1
|
; SSE-NEXT: mulpd %xmm2, %xmm1
|
||||||
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||||
|
|
@ -4480,7 +4480,7 @@ define <8 x float> @uitofp_load_8i32_to_8f32(<8 x i32> *%a) {
|
||||||
; SSE-NEXT: psrld $16, %xmm0
|
; SSE-NEXT: psrld $16, %xmm0
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
|
; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
|
||||||
; SSE-NEXT: por %xmm5, %xmm0
|
; SSE-NEXT: por %xmm5, %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
|
; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11]
|
||||||
; SSE-NEXT: addps %xmm6, %xmm0
|
; SSE-NEXT: addps %xmm6, %xmm0
|
||||||
; SSE-NEXT: addps %xmm3, %xmm0
|
; SSE-NEXT: addps %xmm3, %xmm0
|
||||||
; SSE-NEXT: pand %xmm1, %xmm2
|
; SSE-NEXT: pand %xmm1, %xmm2
|
||||||
|
|
|
||||||
|
|
@ -247,22 +247,22 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
|
||||||
define <2 x double> @test5() nounwind uwtable readnone noinline {
|
define <2 x double> @test5() nounwind uwtable readnone noinline {
|
||||||
; X32-LABEL: test5:
|
; X32-LABEL: test5:
|
||||||
; X32: ## %bb.0: ## %entry
|
; X32: ## %bb.0: ## %entry
|
||||||
; X32-NEXT: movaps {{.*#+}} xmm0 = [1.280000e+02,1.233210e+02]
|
; X32-NEXT: movaps {{.*#+}} xmm0 = [128,123.321]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: test5:
|
; X64-LABEL: test5:
|
||||||
; X64: ## %bb.0: ## %entry
|
; X64: ## %bb.0: ## %entry
|
||||||
; X64-NEXT: movaps {{.*#+}} xmm0 = [1.280000e+02,1.233210e+02]
|
; X64-NEXT: movaps {{.*#+}} xmm0 = [128,123.321]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
;
|
;
|
||||||
; X32_AVX-LABEL: test5:
|
; X32_AVX-LABEL: test5:
|
||||||
; X32_AVX: ## %bb.0: ## %entry
|
; X32_AVX: ## %bb.0: ## %entry
|
||||||
; X32_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.280000e+02,1.233210e+02]
|
; X32_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [128,123.321]
|
||||||
; X32_AVX-NEXT: retl
|
; X32_AVX-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64_AVX-LABEL: test5:
|
; X64_AVX-LABEL: test5:
|
||||||
; X64_AVX: ## %bb.0: ## %entry
|
; X64_AVX: ## %bb.0: ## %entry
|
||||||
; X64_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1.280000e+02,1.233210e+02]
|
; X64_AVX-NEXT: vmovaps {{.*#+}} xmm0 = [128,123.321]
|
||||||
; X64_AVX-NEXT: retq
|
; X64_AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
|
%0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,7 @@ define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||||
; SSE2-NEXT: psrld $16, %xmm2
|
; SSE2-NEXT: psrld $16, %xmm2
|
||||||
; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
|
; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
|
||||||
; SSE2-NEXT: movaps {{.*#+}} xmm3 = [6.553600e+04,6.553600e+04,6.553600e+04,6.553600e+04]
|
; SSE2-NEXT: movaps {{.*#+}} xmm3 = [65536,65536,65536,65536]
|
||||||
; SSE2-NEXT: mulps %xmm3, %xmm2
|
; SSE2-NEXT: mulps %xmm3, %xmm2
|
||||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
|
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
|
||||||
; SSE2-NEXT: pand %xmm4, %xmm0
|
; SSE2-NEXT: pand %xmm4, %xmm0
|
||||||
|
|
@ -129,7 +129,7 @@ define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
|
||||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||||
; SSE41-NEXT: psrld $16, %xmm2
|
; SSE41-NEXT: psrld $16, %xmm2
|
||||||
; SSE41-NEXT: cvtdq2ps %xmm2, %xmm2
|
; SSE41-NEXT: cvtdq2ps %xmm2, %xmm2
|
||||||
; SSE41-NEXT: movaps {{.*#+}} xmm3 = [6.553600e+04,6.553600e+04,6.553600e+04,6.553600e+04]
|
; SSE41-NEXT: movaps {{.*#+}} xmm3 = [65536,65536,65536,65536]
|
||||||
; SSE41-NEXT: mulps %xmm3, %xmm2
|
; SSE41-NEXT: mulps %xmm3, %xmm2
|
||||||
; SSE41-NEXT: pxor %xmm4, %xmm4
|
; SSE41-NEXT: pxor %xmm4, %xmm4
|
||||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
|
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
|
||||||
|
|
|
||||||
|
|
@ -107,7 +107,7 @@ define <8 x float> @test2(<8 x i32> %A) nounwind {
|
||||||
; SSE-NEXT: psrld $16, %xmm0
|
; SSE-NEXT: psrld $16, %xmm0
|
||||||
; SSE-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
|
; SSE-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
|
||||||
; SSE-NEXT: por %[[HIGHCST]], %xmm0
|
; SSE-NEXT: por %[[HIGHCST]], %xmm0
|
||||||
; SSE-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
|
; SSE-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11]
|
||||||
; SSE-NEXT: addps %[[MAGICCST]], %xmm0
|
; SSE-NEXT: addps %[[MAGICCST]], %xmm0
|
||||||
; SSE-NEXT: addps [[VECLOW]], %xmm0
|
; SSE-NEXT: addps [[VECLOW]], %xmm0
|
||||||
; MASK is the low vector of the second part after this point.
|
; MASK is the low vector of the second part after this point.
|
||||||
|
|
@ -125,7 +125,7 @@ define <8 x float> @test2(<8 x i32> %A) nounwind {
|
||||||
; SSE41-NEXT: psrld $16, %xmm0
|
; SSE41-NEXT: psrld $16, %xmm0
|
||||||
; SSE41-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
|
; SSE41-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
|
||||||
; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm0
|
; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm0
|
||||||
; SSE41-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
|
; SSE41-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11]
|
||||||
; SSE41-NEXT: addps %[[MAGICCST]], %xmm0
|
; SSE41-NEXT: addps %[[MAGICCST]], %xmm0
|
||||||
; SSE41-NEXT: addps [[VECLOW]], %xmm0
|
; SSE41-NEXT: addps [[VECLOW]], %xmm0
|
||||||
; LOWCST is the low vector of the second part after this point.
|
; LOWCST is the low vector of the second part after this point.
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,13 @@ entry:
|
||||||
define <2 x double> @constrained_vector_fdiv_v2f64() {
|
define <2 x double> @constrained_vector_fdiv_v2f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fdiv_v2f64:
|
; NO-FMA-LABEL: constrained_vector_fdiv_v2f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,2]
|
||||||
; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: retq
|
; NO-FMA-NEXT: retq
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fdiv_v2f64:
|
; HAS-FMA-LABEL: constrained_vector_fdiv_v2f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1,2]
|
||||||
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0
|
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -82,7 +82,7 @@ entry:
|
||||||
define <3 x double> @constrained_vector_fdiv_v3f64() {
|
define <3 x double> @constrained_vector_fdiv_v3f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fdiv_v3f64:
|
; NO-FMA-LABEL: constrained_vector_fdiv_v3f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,2]
|
||||||
; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: divpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1
|
; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1
|
||||||
|
|
@ -96,7 +96,7 @@ define <3 x double> @constrained_vector_fdiv_v3f64() {
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
; HAS-FMA-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
|
; HAS-FMA-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1,2]
|
||||||
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm1, %xmm1
|
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
|
|
@ -112,16 +112,16 @@ entry:
|
||||||
define <4 x double> @constrained_vector_fdiv_v4f64() {
|
define <4 x double> @constrained_vector_fdiv_v4f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fdiv_v4f64:
|
; NO-FMA-LABEL: constrained_vector_fdiv_v4f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm2 = [1.000000e+01,1.000000e+01]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm2 = [10,10]
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,2]
|
||||||
; NO-FMA-NEXT: divpd %xmm2, %xmm0
|
; NO-FMA-NEXT: divpd %xmm2, %xmm0
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [3.000000e+00,4.000000e+00]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [3,4]
|
||||||
; NO-FMA-NEXT: divpd %xmm2, %xmm1
|
; NO-FMA-NEXT: divpd %xmm2, %xmm1
|
||||||
; NO-FMA-NEXT: retq
|
; NO-FMA-NEXT: retq
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fdiv_v4f64:
|
; HAS-FMA-LABEL: constrained_vector_fdiv_v4f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1,2,3,4]
|
||||||
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0
|
; HAS-FMA-NEXT: vdivpd {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -414,13 +414,13 @@ entry:
|
||||||
define <2 x double> @constrained_vector_fmul_v2f64() {
|
define <2 x double> @constrained_vector_fmul_v2f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fmul_v2f64:
|
; NO-FMA-LABEL: constrained_vector_fmul_v2f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: retq
|
; NO-FMA-NEXT: retq
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fmul_v2f64:
|
; HAS-FMA-LABEL: constrained_vector_fmul_v2f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
|
; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -467,7 +467,7 @@ entry:
|
||||||
define <3 x double> @constrained_vector_fmul_v3f64() {
|
define <3 x double> @constrained_vector_fmul_v3f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fmul_v3f64:
|
; NO-FMA-LABEL: constrained_vector_fmul_v3f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
; NO-FMA-NEXT: mulsd {{.*}}(%rip), %xmm1
|
; NO-FMA-NEXT: mulsd {{.*}}(%rip), %xmm1
|
||||||
|
|
@ -481,7 +481,7 @@ define <3 x double> @constrained_vector_fmul_v3f64() {
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
; HAS-FMA-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
|
; HAS-FMA-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm1, %xmm1
|
; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
|
|
@ -498,15 +498,15 @@ entry:
|
||||||
define <4 x double> @constrained_vector_fmul_v4f64() {
|
define <4 x double> @constrained_vector_fmul_v4f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fmul_v4f64:
|
; NO-FMA-LABEL: constrained_vector_fmul_v4f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [2.000000e+00,3.000000e+00]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [2,3]
|
||||||
; NO-FMA-NEXT: mulpd %xmm1, %xmm0
|
; NO-FMA-NEXT: mulpd %xmm1, %xmm0
|
||||||
; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm1
|
; NO-FMA-NEXT: mulpd {{.*}}(%rip), %xmm1
|
||||||
; NO-FMA-NEXT: retq
|
; NO-FMA-NEXT: retq
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fmul_v4f64:
|
; HAS-FMA-LABEL: constrained_vector_fmul_v4f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
; HAS-FMA-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -544,13 +544,13 @@ entry:
|
||||||
define <2 x double> @constrained_vector_fadd_v2f64() {
|
define <2 x double> @constrained_vector_fadd_v2f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fadd_v2f64:
|
; NO-FMA-LABEL: constrained_vector_fadd_v2f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: retq
|
; NO-FMA-NEXT: retq
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fadd_v2f64:
|
; HAS-FMA-LABEL: constrained_vector_fadd_v2f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0
|
; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -598,7 +598,7 @@ entry:
|
||||||
define <3 x double> @constrained_vector_fadd_v3f64() {
|
define <3 x double> @constrained_vector_fadd_v3f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fadd_v3f64:
|
; NO-FMA-LABEL: constrained_vector_fadd_v3f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: xorpd %xmm1, %xmm1
|
; NO-FMA-NEXT: xorpd %xmm1, %xmm1
|
||||||
; NO-FMA-NEXT: addsd {{.*}}(%rip), %xmm1
|
; NO-FMA-NEXT: addsd {{.*}}(%rip), %xmm1
|
||||||
|
|
@ -612,7 +612,7 @@ define <3 x double> @constrained_vector_fadd_v3f64() {
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
; HAS-FMA-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
|
; HAS-FMA-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1
|
; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
|
|
@ -629,15 +629,15 @@ entry:
|
||||||
define <4 x double> @constrained_vector_fadd_v4f64() {
|
define <4 x double> @constrained_vector_fadd_v4f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fadd_v4f64:
|
; NO-FMA-LABEL: constrained_vector_fadd_v4f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.797693e+308,1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,1.000000e-01]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [1,0.10000000000000001]
|
||||||
; NO-FMA-NEXT: addpd %xmm1, %xmm0
|
; NO-FMA-NEXT: addpd %xmm1, %xmm0
|
||||||
; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm1
|
; NO-FMA-NEXT: addpd {{.*}}(%rip), %xmm1
|
||||||
; NO-FMA-NEXT: retq
|
; NO-FMA-NEXT: retq
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fadd_v4f64:
|
; HAS-FMA-LABEL: constrained_vector_fadd_v4f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.797693e+308,1.797693e+308,1.797693e+308,1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
|
; HAS-FMA-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -675,13 +675,13 @@ entry:
|
||||||
define <2 x double> @constrained_vector_fsub_v2f64() {
|
define <2 x double> @constrained_vector_fsub_v2f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fsub_v2f64:
|
; NO-FMA-LABEL: constrained_vector_fsub_v2f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: retq
|
; NO-FMA-NEXT: retq
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fsub_v2f64:
|
; HAS-FMA-LABEL: constrained_vector_fsub_v2f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -733,7 +733,7 @@ define <3 x double> @constrained_vector_fsub_v3f64() {
|
||||||
; NO-FMA-NEXT: xorpd %xmm0, %xmm0
|
; NO-FMA-NEXT: xorpd %xmm0, %xmm0
|
||||||
; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
; NO-FMA-NEXT: subsd %xmm0, %xmm1
|
; NO-FMA-NEXT: subsd %xmm0, %xmm1
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
|
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
|
||||||
; NO-FMA-NEXT: movapd %xmm0, %xmm1
|
; NO-FMA-NEXT: movapd %xmm0, %xmm1
|
||||||
|
|
@ -746,7 +746,7 @@ define <3 x double> @constrained_vector_fsub_v3f64() {
|
||||||
; HAS-FMA-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
; HAS-FMA-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
||||||
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
; HAS-FMA-NEXT: vsubsd %xmm0, %xmm1, %xmm0
|
; HAS-FMA-NEXT: vsubsd %xmm0, %xmm1, %xmm0
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [-1.797693e+308,-1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
|
; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
|
|
@ -763,7 +763,7 @@ entry:
|
||||||
define <4 x double> @constrained_vector_fsub_v4f64() {
|
define <4 x double> @constrained_vector_fsub_v4f64() {
|
||||||
; NO-FMA-LABEL: constrained_vector_fsub_v4f64:
|
; NO-FMA-LABEL: constrained_vector_fsub_v4f64:
|
||||||
; NO-FMA: # %bb.0: # %entry
|
; NO-FMA: # %bb.0: # %entry
|
||||||
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [-1.797693e+308,-1.797693e+308]
|
; NO-FMA-NEXT: movapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
|
||||||
; NO-FMA-NEXT: movapd %xmm1, %xmm0
|
; NO-FMA-NEXT: movapd %xmm1, %xmm0
|
||||||
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
|
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
|
||||||
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm1
|
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm1
|
||||||
|
|
@ -771,7 +771,7 @@ define <4 x double> @constrained_vector_fsub_v4f64() {
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fsub_v4f64:
|
; HAS-FMA-LABEL: constrained_vector_fsub_v4f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [-1.797693e+308,-1.797693e+308,-1.797693e+308,-1.797693e+308]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
|
||||||
; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0
|
; HAS-FMA-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -836,8 +836,8 @@ define <2 x double> @constrained_vector_fma_v2f64() {
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fma_v2f64:
|
; HAS-FMA-LABEL: constrained_vector_fma_v2f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.500000e+00,5.000000e-01]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm1 = [1.5,0.5]
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [3.500000e+00,2.500000e+00]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [3.5,2.5]
|
||||||
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
|
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -936,8 +936,8 @@ define <3 x double> @constrained_vector_fma_v3f64() {
|
||||||
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
; HAS-FMA-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
|
; HAS-FMA-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [2.500000e+00,1.500000e+00]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm0 = [2.5,1.5]
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm2 = [5.500000e+00,4.500000e+00]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} xmm2 = [5.5,4.5]
|
||||||
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm2 = (xmm0 * xmm2) + mem
|
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} xmm2 = (xmm0 * xmm2) + mem
|
||||||
; HAS-FMA-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
|
; HAS-FMA-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
|
|
@ -987,8 +987,8 @@ define <4 x double> @constrained_vector_fma_v4f64() {
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fma_v4f64:
|
; HAS-FMA-LABEL: constrained_vector_fma_v4f64:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm1 = [3.5,2.5,1.5,0.5]
|
||||||
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
|
; HAS-FMA-NEXT: vmovapd {{.*#+}} ymm0 = [7.5,6.5,5.5,4.5]
|
||||||
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
|
; HAS-FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -1037,8 +1037,8 @@ define <4 x float> @constrained_vector_fma_v4f32() {
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fma_v4f32:
|
; HAS-FMA-LABEL: constrained_vector_fma_v4f32:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01]
|
; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm1 = [3.5,2.5,1.5,0.5]
|
||||||
; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
|
; HAS-FMA-NEXT: vmovaps {{.*#+}} xmm0 = [7.5,6.5,5.5,4.5]
|
||||||
; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
|
; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
@ -1115,8 +1115,8 @@ define <8 x float> @constrained_vector_fma_v8f32() {
|
||||||
;
|
;
|
||||||
; HAS-FMA-LABEL: constrained_vector_fma_v8f32:
|
; HAS-FMA-LABEL: constrained_vector_fma_v8f32:
|
||||||
; HAS-FMA: # %bb.0: # %entry
|
; HAS-FMA: # %bb.0: # %entry
|
||||||
; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm1 = [3.500000e+00,2.500000e+00,1.500000e+00,5.000000e-01,7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00]
|
; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm1 = [3.5,2.5,1.5,0.5,7.5,6.5,5.5,4.5]
|
||||||
; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm0 = [7.500000e+00,6.500000e+00,5.500000e+00,4.500000e+00,1.150000e+01,1.050000e+01,9.500000e+00,8.500000e+00]
|
; HAS-FMA-NEXT: vmovaps {{.*#+}} ymm0 = [7.5,6.5,5.5,4.5,11.5,10.5,9.5,8.5]
|
||||||
; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
|
; HAS-FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem
|
||||||
; HAS-FMA-NEXT: retq
|
; HAS-FMA-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
|
|
||||||
|
|
@ -541,7 +541,7 @@ define <8 x float> @expand14(<4 x float> %a) {
|
||||||
define <8 x float> @expand15(<4 x float> %a) {
|
define <8 x float> @expand15(<4 x float> %a) {
|
||||||
; SKX64-LABEL: expand15:
|
; SKX64-LABEL: expand15:
|
||||||
; SKX64: # %bb.0:
|
; SKX64: # %bb.0:
|
||||||
; SKX64-NEXT: vmovaps {{.*#+}} xmm1 = [0.000000e+00,2.000000e+00,0.000000e+00,0.000000e+00]
|
; SKX64-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
|
||||||
; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
|
; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
|
||||||
; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
|
; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
|
||||||
; SKX64-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
|
; SKX64-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
|
||||||
|
|
@ -549,7 +549,7 @@ define <8 x float> @expand15(<4 x float> %a) {
|
||||||
;
|
;
|
||||||
; KNL64-LABEL: expand15:
|
; KNL64-LABEL: expand15:
|
||||||
; KNL64: # %bb.0:
|
; KNL64: # %bb.0:
|
||||||
; KNL64-NEXT: vmovaps {{.*#+}} xmm1 = [0.000000e+00,2.000000e+00,0.000000e+00,0.000000e+00]
|
; KNL64-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
|
||||||
; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
|
; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
|
||||||
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||||
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
||||||
|
|
@ -558,7 +558,7 @@ define <8 x float> @expand15(<4 x float> %a) {
|
||||||
;
|
;
|
||||||
; SKX32-LABEL: expand15:
|
; SKX32-LABEL: expand15:
|
||||||
; SKX32: # %bb.0:
|
; SKX32: # %bb.0:
|
||||||
; SKX32-NEXT: vmovaps {{.*#+}} xmm1 = [0.000000e+00,2.000000e+00,0.000000e+00,0.000000e+00]
|
; SKX32-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
|
||||||
; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
|
; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
|
||||||
; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
|
; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
|
||||||
; SKX32-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
|
; SKX32-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
|
||||||
|
|
@ -566,7 +566,7 @@ define <8 x float> @expand15(<4 x float> %a) {
|
||||||
;
|
;
|
||||||
; KNL32-LABEL: expand15:
|
; KNL32-LABEL: expand15:
|
||||||
; KNL32: # %bb.0:
|
; KNL32: # %bb.0:
|
||||||
; KNL32-NEXT: vmovaps {{.*#+}} xmm1 = [0.000000e+00,2.000000e+00,0.000000e+00,0.000000e+00]
|
; KNL32-NEXT: vmovaps {{.*#+}} xmm1 = [0,2,0,0]
|
||||||
; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
|
; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
|
||||||
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||||
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
||||||
|
|
|
||||||
|
|
@ -383,12 +383,12 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) {
|
||||||
define <2 x double> @constant_fold_vpermilvar_pd() {
|
define <2 x double> @constant_fold_vpermilvar_pd() {
|
||||||
; X32-LABEL: constant_fold_vpermilvar_pd:
|
; X32-LABEL: constant_fold_vpermilvar_pd:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermilvar_pd:
|
; X64-LABEL: constant_fold_vpermilvar_pd:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [2.000000e+00,1.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [2,1]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> <double 1.0, double 2.0>, <2 x i64> <i64 2, i64 0>)
|
%1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> <double 1.0, double 2.0>, <2 x i64> <i64 2, i64 0>)
|
||||||
ret <2 x double> %1
|
ret <2 x double> %1
|
||||||
|
|
@ -397,12 +397,12 @@ define <2 x double> @constant_fold_vpermilvar_pd() {
|
||||||
define <4 x double> @constant_fold_vpermilvar_pd_256() {
|
define <4 x double> @constant_fold_vpermilvar_pd_256() {
|
||||||
; X32-LABEL: constant_fold_vpermilvar_pd_256:
|
; X32-LABEL: constant_fold_vpermilvar_pd_256:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [2,1,3,4]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermilvar_pd_256:
|
; X64-LABEL: constant_fold_vpermilvar_pd_256:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [2.000000e+00,1.000000e+00,3.000000e+00,4.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [2,1,3,4]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
|
%1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
|
||||||
ret <4 x double> %1
|
ret <4 x double> %1
|
||||||
|
|
@ -411,12 +411,12 @@ define <4 x double> @constant_fold_vpermilvar_pd_256() {
|
||||||
define <4 x float> @constant_fold_vpermilvar_ps() {
|
define <4 x float> @constant_fold_vpermilvar_ps() {
|
||||||
; X32-LABEL: constant_fold_vpermilvar_ps:
|
; X32-LABEL: constant_fold_vpermilvar_ps:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [4,1,3,2]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermilvar_ps:
|
; X64-LABEL: constant_fold_vpermilvar_ps:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [4.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [4,1,3,2]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x i32> <i32 3, i32 0, i32 2, i32 1>)
|
%1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x i32> <i32 3, i32 0, i32 2, i32 1>)
|
||||||
ret <4 x float> %1
|
ret <4 x float> %1
|
||||||
|
|
@ -425,12 +425,12 @@ define <4 x float> @constant_fold_vpermilvar_ps() {
|
||||||
define <8 x float> @constant_fold_vpermilvar_ps_256() {
|
define <8 x float> @constant_fold_vpermilvar_ps_256() {
|
||||||
; X32-LABEL: constant_fold_vpermilvar_ps_256:
|
; X32-LABEL: constant_fold_vpermilvar_ps_256:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1,1,3,2,5,6,6,6]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermilvar_ps_256:
|
; X64-LABEL: constant_fold_vpermilvar_ps_256:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1.000000e+00,1.000000e+00,3.000000e+00,2.000000e+00,5.000000e+00,6.000000e+00,6.000000e+00,6.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1,1,3,2,5,6,6,6]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 1, i32 0, i32 1, i32 1, i32 1>)
|
%1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 1, i32 0, i32 1, i32 1, i32 1>)
|
||||||
ret <8 x float> %1
|
ret <8 x float> %1
|
||||||
|
|
|
||||||
|
|
@ -691,7 +691,7 @@ define <8 x i32> @constant_fold_permd() {
|
||||||
define <8 x float> @constant_fold_permps() {
|
define <8 x float> @constant_fold_permps() {
|
||||||
; CHECK-LABEL: constant_fold_permps:
|
; CHECK-LABEL: constant_fold_permps:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5.000000e+00,7.000000e+00,3.000000e+00,2.000000e+00,8.000000e+00,2.000000e+00,6.000000e+00,1.000000e+00]
|
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,3,2,8,2,6,1]
|
||||||
; CHECK-NEXT: ret{{[l|q]}}
|
; CHECK-NEXT: ret{{[l|q]}}
|
||||||
%1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 6, i32 2, i32 1, i32 7, i32 1, i32 5, i32 0>)
|
%1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 6, i32 2, i32 1, i32 7, i32 1, i32 5, i32 0>)
|
||||||
ret <8 x float> %1
|
ret <8 x float> %1
|
||||||
|
|
|
||||||
|
|
@ -384,12 +384,12 @@ define void @buildvector_v4f32_07z6(float %a, <4 x float> %b, <4 x float>* %ptr)
|
||||||
define <2 x double> @constant_fold_vpermil2pd() {
|
define <2 x double> @constant_fold_vpermil2pd() {
|
||||||
; X32-LABEL: constant_fold_vpermil2pd:
|
; X32-LABEL: constant_fold_vpermil2pd:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermil2pd:
|
; X64-LABEL: constant_fold_vpermil2pd:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-2,2]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> <double 1.0, double 2.0>, <2 x double> <double -2.0, double -1.0>, <2 x i64> <i64 4, i64 2>, i8 2)
|
%1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> <double 1.0, double 2.0>, <2 x double> <double -2.0, double -1.0>, <2 x i64> <i64 4, i64 2>, i8 2)
|
||||||
ret <2 x double> %1
|
ret <2 x double> %1
|
||||||
|
|
@ -398,12 +398,12 @@ define <2 x double> @constant_fold_vpermil2pd() {
|
||||||
define <4 x double> @constant_fold_vpermil2pd_256() {
|
define <4 x double> @constant_fold_vpermil2pd_256() {
|
||||||
; X32-LABEL: constant_fold_vpermil2pd_256:
|
; X32-LABEL: constant_fold_vpermil2pd_256:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-4.000000e+00,0.000000e+00,4.000000e+00,3.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-4,0,4,3]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermil2pd_256:
|
; X64-LABEL: constant_fold_vpermil2pd_256:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-4.000000e+00,0.000000e+00,4.000000e+00,3.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-4,0,4,3]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x double> <double -4.0, double -3.0, double -2.0, double -1.0>, <4 x i64> <i64 4, i64 8, i64 2, i64 0>, i8 2)
|
%1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, <4 x double> <double -4.0, double -3.0, double -2.0, double -1.0>, <4 x i64> <i64 4, i64 8, i64 2, i64 0>, i8 2)
|
||||||
ret <4 x double> %1
|
ret <4 x double> %1
|
||||||
|
|
@ -412,12 +412,12 @@ define <4 x double> @constant_fold_vpermil2pd_256() {
|
||||||
define <4 x float> @constant_fold_vpermil2ps() {
|
define <4 x float> @constant_fold_vpermil2ps() {
|
||||||
; X32-LABEL: constant_fold_vpermil2ps:
|
; X32-LABEL: constant_fold_vpermil2ps:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,1.000000e+00,3.000000e+00,0.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-4,1,3,0]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermil2ps:
|
; X64-LABEL: constant_fold_vpermil2ps:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,1.000000e+00,3.000000e+00,0.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-4,1,3,0]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float> <float -4.0, float -3.0, float -2.0, float -1.0>, <4 x i32> <i32 4, i32 0, i32 2, i32 8>, i8 2)
|
%1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float> <float -4.0, float -3.0, float -2.0, float -1.0>, <4 x i32> <i32 4, i32 0, i32 2, i32 8>, i8 2)
|
||||||
ret <4 x float> %1
|
ret <4 x float> %1
|
||||||
|
|
@ -426,12 +426,12 @@ define <4 x float> @constant_fold_vpermil2ps() {
|
||||||
define <8 x float> @constant_fold_vpermil2ps_256() {
|
define <8 x float> @constant_fold_vpermil2ps_256() {
|
||||||
; X32-LABEL: constant_fold_vpermil2ps_256:
|
; X32-LABEL: constant_fold_vpermil2ps_256:
|
||||||
; X32: # %bb.0:
|
; X32: # %bb.0:
|
||||||
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-8.000000e+00,1.000000e+00,3.000000e+00,0.000000e+00,5.000000e+00,0.000000e+00,5.000000e+00,7.000000e+00]
|
; X32-NEXT: vmovaps {{.*#+}} ymm0 = [-8,1,3,0,5,0,5,7]
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: constant_fold_vpermil2ps_256:
|
; X64-LABEL: constant_fold_vpermil2ps_256:
|
||||||
; X64: # %bb.0:
|
; X64: # %bb.0:
|
||||||
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-8.000000e+00,1.000000e+00,3.000000e+00,0.000000e+00,5.000000e+00,0.000000e+00,5.000000e+00,7.000000e+00]
|
; X64-NEXT: vmovaps {{.*#+}} ymm0 = [-8,1,3,0,5,0,5,7]
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x float> <float -8.0, float -7.0, float -6.0, float -5.0, float -4.0, float -3.0, float -2.0, float -1.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 8, i32 0, i32 8, i32 0, i32 2>, i8 2)
|
%1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x float> <float -8.0, float -7.0, float -6.0, float -5.0, float -4.0, float -3.0, float -2.0, float -1.0>, <8 x i32> <i32 4, i32 0, i32 2, i32 8, i32 0, i32 8, i32 0, i32 2>, i8 2)
|
||||||
ret <8 x float> %1
|
ret <8 x float> %1
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ define void @test2(double** %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
|
||||||
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; AVX1-NEXT: movq (%rdi,%rsi,8), %rax
|
; AVX1-NEXT: movq (%rdi,%rsi,8), %rax
|
||||||
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01]
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [0.5,0.5,0.5,0.5]
|
||||||
; AVX1-NEXT: vblendvpd %ymm0, {{.*}}(%rip), %ymm1, %ymm0
|
; AVX1-NEXT: vblendvpd %ymm0, {{.*}}(%rip), %ymm1, %ymm0
|
||||||
; AVX1-NEXT: vmovupd %ymm0, (%rax)
|
; AVX1-NEXT: vmovupd %ymm0, (%rax)
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ define void @foo2(<4 x float>* noalias %result) nounwind {
|
||||||
; CHECK-NEXT: .long 1088421888 ## float 7
|
; CHECK-NEXT: .long 1088421888 ## float 7
|
||||||
; CHECK-LABEL: foo2:
|
; CHECK-LABEL: foo2:
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00]
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4,5,6,7]
|
||||||
; CHECK-NEXT: movaps %xmm0, (%rdi)
|
; CHECK-NEXT: movaps %xmm0, (%rdi)
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%val = uitofp <4 x i32> <i32 4, i32 5, i32 6, i32 7> to <4 x float>
|
%val = uitofp <4 x i32> <i32 4, i32 5, i32 6, i32 7> to <4 x float>
|
||||||
|
|
@ -89,7 +89,7 @@ define void @foo4(<4 x float>* noalias %result) nounwind {
|
||||||
; CHECK-NEXT: .long 1132396544 ## float 255
|
; CHECK-NEXT: .long 1132396544 ## float 255
|
||||||
; CHECK-LABEL: foo4:
|
; CHECK-LABEL: foo4:
|
||||||
; CHECK: ## %bb.0:
|
; CHECK: ## %bb.0:
|
||||||
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.270000e+02,1.280000e+02,2.550000e+02]
|
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,127,128,255]
|
||||||
; CHECK-NEXT: movaps %xmm0, (%rdi)
|
; CHECK-NEXT: movaps %xmm0, (%rdi)
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%val = uitofp <4 x i8> <i8 1, i8 127, i8 -128, i8 -1> to <4 x float>
|
%val = uitofp <4 x i8> <i8 1, i8 127, i8 -128, i8 -1> to <4 x float>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue