diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index dd4249fd6626..22fe0e61cbad 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -326,6 +326,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( case ISD::FADD: case ISD::FSUB: case ISD::FMUL: + case ISD::FDIV: return LT.first; break; } diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll index 15b423137da4..b3991a8c4e12 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll @@ -269,16 +269,49 @@ define i32 @fmul(i32 %arg) { } define i32 @fdiv(i32 %arg) { -; CHECK-LABEL: 'fdiv' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE1-LABEL: 'fdiv' +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE2-LABEL: 'fdiv' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX-LABEL: 'fdiv' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; AVX512-LABEL: 'fdiv' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fdiv float undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fdiv double undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = fdiv float undef, undef %V4F32 = fdiv <4 x float> undef, undef diff --git a/llvm/test/Analysis/CostModel/X86/size-cost.ll b/llvm/test/Analysis/CostModel/X86/size-cost.ll index dd37fb8cdb81..733e164ac7e7 100644 --- a/llvm/test/Analysis/CostModel/X86/size-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/size-cost.ll @@ -216,11 +216,11 @@ define double @fmul_f64(double %x, double %y) { ret double %r } -; FIXME: divsd is 1 instruction. +; divsd is 1 instruction. define double @fdiv_f64(double %x, double %y) { ; CHECK-LABEL: 'fdiv_f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = fdiv double %x, %y +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = fdiv double %x, %y ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret double %r ; %r = fdiv double %x, %y diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 9eb24fefb022..8b1915b2d588 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -35,107 +35,212 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT11]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT13]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT10]] -; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT12]] -; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT14]] +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -16 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; CHECK: vector.ph.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], 2305843009213693950 +; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT10]] +; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT12]] +; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT14]] +; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT10]] +; CHECK-NEXT: [[TMP11:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT12]] +; CHECK-NEXT: [[TMP12:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT14]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TMP5]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 8, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 4 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 8 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, <4 x double>* [[TMP10]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 12 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, <4 x double>* [[TMP12]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP1]] -; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP3]] -; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP4]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP14]], align 8, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP15]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP16]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 8 ; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP13]], <4 x double>* [[TMP18]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP17]], i64 4 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, <4 x double>* [[TMP18]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 12 ; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP14]], <4 x double>* [[TMP20]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[TMP17]], i64 8 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast double* [[TMP21]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP22]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[TMP17]], i64 12 -; CHECK-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP16]], <4 x double>* [[TMP24]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, <4 x double>* [[TMP20]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP6]] +; CHECK-NEXT: [[TMP23:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP7]] +; CHECK-NEXT: [[TMP24:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[TMP26]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 4 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP22]], <4 x double>* [[TMP28]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP23]], <4 x double>* [[TMP30]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 12 +; CHECK-NEXT: [[TMP32:%.*]] = bitcast double* [[TMP31]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[TMP32]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_NEXT]] +; CHECK-NEXT: [[TMP34:%.*]] = bitcast double* [[TMP33]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x double>, <4 x double>* [[TMP34]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 4 +; CHECK-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD6_1:%.*]] = load <4 x double>, <4 x double>* [[TMP36]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 8 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast double* [[TMP37]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD7_1:%.*]] = load <4 x double>, <4 x double>* [[TMP38]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 12 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast double* [[TMP39]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD8_1:%.*]] = load <4 x double>, <4 x double>* [[TMP40]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP41:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_1]], [[TMP9]] +; CHECK-NEXT: [[TMP42:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6_1]], [[TMP10]] +; CHECK-NEXT: [[TMP43:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7_1]], [[TMP11]] +; CHECK-NEXT: [[TMP44:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8_1]], [[TMP12]] +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_NEXT]] +; CHECK-NEXT: [[TMP46:%.*]] = bitcast double* [[TMP45]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP41]], <4 x double>* [[TMP46]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, double* [[TMP45]], i64 4 +; CHECK-NEXT: [[TMP48:%.*]] = bitcast double* [[TMP47]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP42]], <4 x double>* [[TMP48]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP45]], i64 8 +; CHECK-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP43]], <4 x double>* [[TMP50]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP45]], i64 12 +; CHECK-NEXT: [[TMP52:%.*]] = bitcast double* [[TMP51]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP44]], <4 x double>* [[TMP52]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: middle.block.unr-lcssa: +; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL:%.*]] +; CHECK: vector.body.epil: +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_UNR]] +; CHECK-NEXT: [[TMP54:%.*]] = bitcast double* [[TMP53]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <4 x double>, <4 x double>* [[TMP54]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP53]], i64 4 +; CHECK-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD6_EPIL:%.*]] = load <4 x double>, <4 x double>* [[TMP56]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP53]], i64 8 +; CHECK-NEXT: [[TMP58:%.*]] = bitcast double* [[TMP57]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD7_EPIL:%.*]] = load <4 x double>, <4 x double>* [[TMP58]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds double, double* [[TMP53]], i64 12 +; CHECK-NEXT: [[TMP60:%.*]] = bitcast double* [[TMP59]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD8_EPIL:%.*]] = load <4 x double>, <4 x double>* [[TMP60]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP61:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD_EPIL]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD6_EPIL]], [[BROADCAST_SPLAT10]] +; CHECK-NEXT: [[TMP63:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD7_EPIL]], [[BROADCAST_SPLAT12]] +; CHECK-NEXT: [[TMP64:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD8_EPIL]], [[BROADCAST_SPLAT14]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_UNR]] +; CHECK-NEXT: [[TMP66:%.*]] = bitcast double* [[TMP65]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP61]], <4 x double>* [[TMP66]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds double, double* [[TMP65]], i64 4 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast double* [[TMP67]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP62]], <4 x double>* [[TMP68]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds double, double* [[TMP65]], i64 8 +; CHECK-NEXT: [[TMP70:%.*]] = bitcast double* [[TMP69]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP63]], <4 x double>* [[TMP70]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds double, double* [[TMP65]], i64 12 +; CHECK-NEXT: [[TMP72:%.*]] = bitcast double* [[TMP71]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP64]], <4 x double>* [[TMP72]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: br label [[MIDDLE_BLOCK]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER15]] ; CHECK: for.body.preheader15: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP26:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 -; CHECK-NEXT: [[TMP27:%.*]] = add nsw i64 [[TMP26]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 -; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] +; CHECK-NEXT: [[TMP73:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 +; CHECK-NEXT: [[TMP74:%.*]] = add nsw i64 [[TMP73]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: [[XTRAITER16:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 7 +; CHECK-NEXT: [[LCMP_MOD17_NOT:%.*]] = icmp eq i64 [[XTRAITER16]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD17_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] ; CHECK: for.body.prol.preheader: -; CHECK-NEXT: [[TMP28:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP75:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] ; CHECK: for.body.prol: ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_PROL]] ; CHECK-NEXT: [[T0_PROL:%.*]] = load double, double* [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP29:%.*]] = fmul fast double [[T0_PROL]], [[TMP28]] +; CHECK-NEXT: [[TMP76:%.*]] = fmul fast double [[T0_PROL]], [[TMP75]] ; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_PROL]] -; CHECK-NEXT: store double [[TMP29]], double* [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP76]], double* [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 -; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER16]] ; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.body.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER15]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] -; CHECK-NEXT: [[TMP30:%.*]] = icmp ult i64 [[TMP27]], 3 -; CHECK-NEXT: br i1 [[TMP30]], label [[FOR_END]], label [[FOR_BODY_PREHEADER15_NEW:%.*]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ult i64 [[TMP74]], 7 +; CHECK-NEXT: br i1 [[TMP77]], label [[FOR_END]], label [[FOR_BODY_PREHEADER15_NEW:%.*]] ; CHECK: for.body.preheader15.new: -; CHECK-NEXT: [[TMP31:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP32:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP33:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP78:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP79:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP80:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP81:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP82:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP83:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP84:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP85:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER15_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER15_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[T0:%.*]] = load double, double* [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP35:%.*]] = fmul fast double [[T0]], [[TMP31]] +; CHECK-NEXT: [[TMP86:%.*]] = fmul fast double [[T0]], [[TMP78]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store double [[TMP35]], double* [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP86]], double* [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[T0_1:%.*]] = load double, double* [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP36:%.*]] = fmul fast double [[T0_1]], [[TMP32]] +; CHECK-NEXT: [[TMP87:%.*]] = fmul fast double [[T0_1]], [[TMP79]] ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store double [[TMP36]], double* [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP87]], double* [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: [[T0_2:%.*]] = load double, double* [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_2]], [[TMP33]] +; CHECK-NEXT: [[TMP88:%.*]] = fmul fast double [[T0_2]], [[TMP80]] ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: store double [[TMP37]], double* [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP88]], double* [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: [[T0_3:%.*]] = load double, double* [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP38:%.*]] = fmul fast double [[T0_3]], [[TMP34]] +; CHECK-NEXT: [[TMP89:%.*]] = fmul fast double [[T0_3]], [[TMP81]] ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: store double [[TMP38]], double* [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4 -; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: store double [[TMP89]], double* [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_3]] +; CHECK-NEXT: [[T0_4:%.*]] = load double, double* [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP90:%.*]] = fmul fast double [[T0_4]], [[TMP82]] +; CHECK-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_3]] +; CHECK-NEXT: store double [[TMP90]], double* [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_4]] +; CHECK-NEXT: [[T0_5:%.*]] = load double, double* [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP91:%.*]] = fmul fast double [[T0_5]], [[TMP83]] +; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_4]] +; CHECK-NEXT: store double [[TMP91]], double* [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_5]] +; CHECK-NEXT: [[T0_6:%.*]] = load double, double* [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP92:%.*]] = fmul fast double [[T0_6]], [[TMP84]] +; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_5]] +; CHECK-NEXT: store double [[TMP92]], double* [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_6]] +; CHECK-NEXT: [[T0_7:%.*]] = load double, double* [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[TMP93:%.*]] = fmul fast double [[T0_7]], [[TMP85]] +; CHECK-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_6]] +; CHECK-NEXT: store double [[TMP93]], double* [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 +; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ;