107 lines
4.4 KiB
LLVM
107 lines
4.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; REQUIRES: asserts
|
|
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -vectorizer-maximize-bandwidth -mtriple=arm64-apple-ios -debug -S %s 2>&1 | FileCheck %s
|
|
|
|
target triple = "arm64-apple-ios"
|
|
|
|
; CHECK-LABEL: LV: Checking a loop in 'test'
|
|
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
|
|
; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.ph:
|
|
; CHECK-NEXT: Successor(s): vector loop
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: <x1> vector loop: {
|
|
; CHECK-NEXT: vector.body:
|
|
; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION
|
|
; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<0>, ir<1>
|
|
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<%3>
|
|
; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src>
|
|
; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l>
|
|
; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using library function)
|
|
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%3>
|
|
; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst>
|
|
; CHECK-NEXT: EMIT vp<%10> = VF * UF +(nuw) vp<%2>
|
|
; CHECK-NEXT: EMIT branch-on-count vp<%10> vp<%1>
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: Successor(s): middle.block
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.block:
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-NEXT: }
|
|
|
|
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
|
|
; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: vector.ph:
|
|
; CHECK-NEXT: Successor(s): vector loop
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: <x1> vector loop: {
|
|
; CHECK-NEXT: vector.body:
|
|
; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION
|
|
; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<0>, ir<1>
|
|
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<%3>
|
|
; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src>
|
|
; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l>
|
|
; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using vector intrinsic)
|
|
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%3>
|
|
; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst>
|
|
; CHECK-NEXT: EMIT vp<%10> = VF * UF +(nuw) vp<%2>
|
|
; CHECK-NEXT: EMIT branch-on-count vp<%10> vp<%1>
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: Successor(s): middle.block
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: middle.block:
|
|
; CHECK-NEXT: No successors
|
|
; CHECK-NEXT: }
|
|
;
|
|
;
|
|
define void @test(ptr noalias %src, ptr noalias %dst) {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @__simd_sin_v2f64(<2 x double> [[TMP4]])
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[TMP0]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
|
|
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP6]], align 8
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
|
|
; CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.src = getelementptr inbounds float, ptr %src, i64 %iv
|
|
%l = load float, ptr %gep.src, align 4
|
|
%conv = fpext float %l to double
|
|
%s = call fast double @llvm.sin.f64(double %conv) #0
|
|
%gep.dst = getelementptr inbounds float, ptr %dst, i64 %iv
|
|
store double %s, ptr %gep.dst
|
|
%iv.next = add nsw i64 %iv, 1
|
|
%cmp = icmp ne i64 %iv.next, 1024
|
|
br i1 %cmp, label %loop, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare double @llvm.sin.f64(double)
|
|
|
|
declare <2 x double> @__simd_sin_v2f64(<2 x double>)
|
|
|
|
attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(__simd_sin_v2f64)" }
|