forked from OSchip/llvm-project
[SLPVectorizer][X86] Regenerate some tests. NFCI
llvm-svn: 329196
This commit is contained in:
parent
1728fee6c3
commit
f1e668830f
|
|
@ -1,20 +1,25 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; int foo(double * restrict A, double * restrict B, double G) {
|
||||
; A[0] = (B[10] ? G : 1);
|
||||
; A[1] = (B[11] ? G : 1);
|
||||
; }
|
||||
|
||||
;CHECK-LABEL: @foo(
|
||||
;CHECK: load <2 x double>
|
||||
;CHECK: fcmp une <2 x double>
|
||||
;CHECK: select <2 x i1>
|
||||
;CHECK: store <2 x double>
|
||||
;CHECK: ret i32 undef
|
||||
define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, double %G) {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 10
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[G:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[G]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[TMP4]], <2 x double> <double 1.000000e+00, double 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
|
||||
; CHECK-NEXT: ret i32 undef
|
||||
;
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds double, double* %B, i64 10
|
||||
%0 = load double, double* %arrayidx, align 8
|
||||
|
|
|
|||
|
|
@ -1,14 +1,10 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+sse4.2 | FileCheck %s
|
||||
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+sse4.2 | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
@a = common local_unnamed_addr global [4 x i32] zeroinitializer, align 4
|
||||
@b = common local_unnamed_addr global [4 x i32] zeroinitializer, align 4
|
||||
|
||||
@a = common local_unnamed_addr global [4 x i32] zeroinitializer, align 4
|
||||
@b = common local_unnamed_addr global [4 x i32] zeroinitializer, align 4
|
||||
|
||||
define i32 @fn1() {
|
||||
define i32 @fn1() {
|
||||
; CHECK-LABEL: @fn1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -S -mtriple=x86_64-unknown -mattr=+avx -slp-vectorizer | FileCheck %s
|
||||
|
||||
|
||||
;void jumble (int * restrict A, int * restrict B) {
|
||||
; int tmp0 = A[10]*A[0];
|
||||
; int tmp1 = A[11]*A[1];
|
||||
|
|
@ -13,9 +12,8 @@
|
|||
; B[3] = tmp3;
|
||||
;}
|
||||
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @jumble1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @jumble1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
|
||||
; CHECK-LABEL: @jumble1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
|
||||
|
|
@ -29,13 +27,13 @@
|
|||
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[TMP1]], [[TMP4]]
|
||||
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP1]], [[REORDER_SHUFFLE]]
|
||||
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[B]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
@ -66,11 +64,12 @@ entry:
|
|||
%arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3
|
||||
store i32 %mul10, i32* %arrayidx14, align 4
|
||||
ret void
|
||||
}
|
||||
}
|
||||
|
||||
;Reversing the operand of MUL
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @jumble2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @jumble2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
|
||||
; CHECK-LABEL: @jumble2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
|
||||
|
|
@ -84,13 +83,13 @@ entry:
|
|||
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP1]]
|
||||
; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[REORDER_SHUFFLE]], [[TMP1]]
|
||||
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[B]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
@ -121,5 +120,5 @@ entry:
|
|||
%arrayidx14 = getelementptr inbounds i32, i32* %B, i64 3
|
||||
store i32 %mul10, i32* %arrayidx14, align 4
|
||||
ret void
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,23 +1,39 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
;int foo(char * restrict A, float * restrict B, float T) {
|
||||
; A[0] = (T * B[10] + 4.0);
|
||||
; A[1] = (T * B[11] + 5.0);
|
||||
; A[2] = (T * B[12] + 6.0);
|
||||
;}
|
||||
|
||||
;CHECK-LABEL: @foo(
|
||||
;CHECK-NOT: load <3 x float>
|
||||
;CHECK-NOT: fmul <3 x float>
|
||||
;CHECK-NOT: fpext <3 x float>
|
||||
;CHECK-NOT: fadd <3 x double>
|
||||
;CHECK-NOT: fptosi <3 x double>
|
||||
;CHECK-NOT: store <3 x i8>
|
||||
;CHECK: ret
|
||||
define i32 @foo(i8* noalias nocapture %A, float* noalias nocapture %B, float %T) {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 10
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP2]], [[T:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[TMP3]] to double
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 4.000000e+00
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fptosi double [[TMP5]] to i8
|
||||
; CHECK-NEXT: store i8 [[TMP6]], i8* [[A:%.*]], align 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[B]], i64 11
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fmul float [[TMP8]], [[T]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[TMP9]] to double
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fadd double [[TMP10]], 5.000000e+00
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = fptosi double [[TMP11]] to i8
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 1
|
||||
; CHECK-NEXT: store i8 [[TMP12]], i8* [[TMP13]], align 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[B]], i64 12
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP14]], align 4
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = fmul float [[TMP15]], [[T]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = fpext float [[TMP16]] to double
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = fadd double [[TMP17]], 6.000000e+00
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = fptosi double [[TMP18]] to i8
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 2
|
||||
; CHECK-NEXT: store i8 [[TMP19]], i8* [[TMP20]], align 1
|
||||
; CHECK-NEXT: ret i32 undef
|
||||
;
|
||||
%1 = getelementptr inbounds float, float* %B, i64 10
|
||||
%2 = load float, float* %1, align 4
|
||||
%3 = fmul float %2, %T
|
||||
|
|
|
|||
|
|
@ -1,14 +1,29 @@
|
|||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
|
||||
|
||||
|
||||
; CHECK: tiny_tree_fully_vectorizable
|
||||
; CHECK: load <2 x double>
|
||||
; CHECK: store <2 x double>
|
||||
; CHECK: ret
|
||||
|
||||
define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
|
||||
; CHECK-LABEL: @tiny_tree_fully_vectorizable(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP12:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP12]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_015:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[DST_ADDR_014:%.*]] = phi double* [ [[ADD_PTR4:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[SRC_ADDR_013:%.*]] = phi double* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[SRC_ADDR_013]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[DST_ADDR_014]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 [[I_015]]
|
||||
; CHECK-NEXT: [[ADD_PTR4]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 [[I_015]]
|
||||
; CHECK-NEXT: [[INC]] = add i64 [[I_015]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp12 = icmp eq i64 %count, 0
|
||||
br i1 %cmp12, label %for.end, label %for.body
|
||||
|
|
@ -33,12 +48,33 @@ for.end: ; preds = %for.body, %entry
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK: tiny_tree_fully_vectorizable2
|
||||
; CHECK: load <4 x float>
|
||||
; CHECK: store <4 x float>
|
||||
; CHECK: ret
|
||||
|
||||
define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
|
||||
; CHECK-LABEL: @tiny_tree_fully_vectorizable2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP20]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_023:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[DST_ADDR_022:%.*]] = phi float* [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[SRC_ADDR_021:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC_ADDR_021]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[DST_ADDR_022]] to <4 x float>*
|
||||
; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 [[I_023]]
|
||||
; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 [[I_023]]
|
||||
; CHECK-NEXT: [[INC]] = add i64 [[I_023]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp20 = icmp eq i64 %count, 0
|
||||
br i1 %cmp20, label %for.end, label %for.body
|
||||
|
|
@ -71,12 +107,31 @@ for.end: ; preds = %for.body, %entry
|
|||
ret void
|
||||
}
|
||||
|
||||
; We do not vectorize the tiny tree which is not fully vectorizable.
|
||||
; CHECK: tiny_tree_not_fully_vectorizable
|
||||
; CHECK-NOT: <2 x double>
|
||||
; CHECK: ret
|
||||
; We do not vectorize the tiny tree which is not fully vectorizable.
|
||||
|
||||
define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
|
||||
; CHECK-LABEL: @tiny_tree_not_fully_vectorizable(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP12:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP12]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_015:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[DST_ADDR_014:%.*]] = phi double* [ [[ADD_PTR4:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[SRC_ADDR_013:%.*]] = phi double* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[SRC_ADDR_013]], align 8
|
||||
; CHECK-NEXT: store double [[TMP0]], double* [[DST_ADDR_014]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX2]], align 8
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 1
|
||||
; CHECK-NEXT: store double [[TMP1]], double* [[ARRAYIDX3]], align 8
|
||||
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[SRC_ADDR_013]], i64 [[I_015]]
|
||||
; CHECK-NEXT: [[ADD_PTR4]] = getelementptr inbounds double, double* [[DST_ADDR_014]], i64 [[I_015]]
|
||||
; CHECK-NEXT: [[INC]] = add i64 [[I_015]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp12 = icmp eq i64 %count, 0
|
||||
br i1 %cmp12, label %for.end, label %for.body
|
||||
|
|
@ -89,7 +144,7 @@ for.body: ; preds = %entry, %for.body
|
|||
store double %0, double* %dst.addr.014, align 8
|
||||
%arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2
|
||||
%1 = load double, double* %arrayidx2, align 8
|
||||
%arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
|
||||
%arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
|
||||
store double %1, double* %arrayidx3, align 8
|
||||
%add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
|
||||
%add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
|
||||
|
|
@ -101,12 +156,37 @@ for.end: ; preds = %for.body, %entry
|
|||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK: tiny_tree_not_fully_vectorizable2
|
||||
; CHECK-NOT: <2 x double>
|
||||
; CHECK: ret
|
||||
|
||||
define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
|
||||
; CHECK-LABEL: @tiny_tree_not_fully_vectorizable2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i64 [[COUNT:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP20]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_023:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: [[DST_ADDR_022:%.*]] = phi float* [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[SRC_ADDR_021:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_021]], align 4
|
||||
; CHECK-NEXT: store float [[TMP0]], float* [[DST_ADDR_022]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 1
|
||||
; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX3]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX4]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 2
|
||||
; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX5]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX6]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 3
|
||||
; CHECK-NEXT: store float [[TMP3]], float* [[ARRAYIDX7]], align 4
|
||||
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 [[I_023]]
|
||||
; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 [[I_023]]
|
||||
; CHECK-NEXT: [[INC]] = add i64 [[I_023]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[COUNT]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp20 = icmp eq i64 %count, 0
|
||||
br i1 %cmp20, label %for.end, label %for.body
|
||||
|
|
@ -117,7 +197,7 @@ for.body: ; preds = %entry, %for.body
|
|||
%src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
|
||||
%0 = load float, float* %src.addr.021, align 4
|
||||
store float %0, float* %dst.addr.022, align 4
|
||||
%arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4
|
||||
%arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4
|
||||
%1 = load float, float* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
|
||||
store float %1, float* %arrayidx3, align 4
|
||||
|
|
@ -139,10 +219,20 @@ for.end: ; preds = %for.body, %entry
|
|||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: store_splat
|
||||
; CHECK: store <4 x float>
|
||||
define void @store_splat(float*, float) {
|
||||
; CHECK-LABEL: @store_splat(
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> undef, float [[TMP1:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i32 2
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP1]], i32 3
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
|
||||
; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%3 = getelementptr inbounds float, float* %0, i64 0
|
||||
store float %1, float* %3, align 4
|
||||
%4 = getelementptr inbounds float, float* %0, i64 1
|
||||
|
|
@ -154,10 +244,17 @@ define void @store_splat(float*, float) {
|
|||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: store_const
|
||||
; CHECK: store <4 x i32>
|
||||
define void @store_const(i32* %a) {
|
||||
; CHECK-LABEL: @store_const(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
|
||||
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
|
||||
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR0]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> <i32 10, i32 30, i32 20, i32 40>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%ptr0 = getelementptr inbounds i32, i32* %a, i64 0
|
||||
store i32 10, i32* %ptr0, align 4
|
||||
|
|
|
|||
Loading…
Reference in New Issue