[SLP][X86] Add common check prefix for horizontal reduction tests
This commit is contained in:
parent
478c237e21
commit
ea071884b0
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s
|
||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE
|
||||
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,CHECK
|
||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,STORE
|
||||
|
||||
; #include <stdint.h>
|
||||
;
|
||||
|
|
@ -16,71 +16,38 @@
|
|||
; }
|
||||
|
||||
define i32 @add_red(float* %A, i32 %n) {
|
||||
; CHECK-LABEL: @add_red(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; CHECK: for.body.lr.ph:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; CHECK-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]]
|
||||
; CHECK-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]]
|
||||
; CHECK-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3
|
||||
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
|
||||
; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
|
||||
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.cond.for.end_crit_edge:
|
||||
; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32
|
||||
; CHECK-NEXT: br label [[FOR_END]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
; STORE-LABEL: @add_red(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; STORE-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; STORE: for.body.lr.ph:
|
||||
; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
|
||||
; STORE-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; STORE: for.body:
|
||||
; STORE-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; STORE-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1
|
||||
; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]]
|
||||
; STORE-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]]
|
||||
; STORE-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3
|
||||
; STORE-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]]
|
||||
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
|
||||
; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
|
||||
; STORE-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
|
||||
; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
|
||||
; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
|
||||
; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
|
||||
; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; STORE: for.cond.for.end_crit_edge:
|
||||
; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32
|
||||
; STORE-NEXT: br label [[FOR_END]]
|
||||
; STORE: for.end:
|
||||
; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; STORE-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
; ALL-LABEL: @add_red(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; ALL-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; ALL: for.body.lr.ph:
|
||||
; ALL-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
|
||||
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; ALL: for.body:
|
||||
; ALL-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; ALL-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1
|
||||
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]]
|
||||
; ALL-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]]
|
||||
; ALL-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3
|
||||
; ALL-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]]
|
||||
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
|
||||
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; ALL-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
|
||||
; ALL-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
|
||||
; ALL-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
|
||||
; ALL-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
|
||||
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
|
||||
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; ALL: for.cond.for.end_crit_edge:
|
||||
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32
|
||||
; ALL-NEXT: br label [[FOR_END]]
|
||||
; ALL: for.end:
|
||||
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
%cmp31 = icmp sgt i32 %n, 0
|
||||
|
|
@ -138,81 +105,43 @@ for.end:
|
|||
; }
|
||||
|
||||
define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
|
||||
; CHECK-LABEL: @mul_red(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; CHECK: for.body.lr.ph:
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; CHECK-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
|
||||
; CHECK-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]]
|
||||
; CHECK-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3
|
||||
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
||||
; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
|
||||
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.cond.for.end_crit_edge:
|
||||
; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32
|
||||
; CHECK-NEXT: br label [[FOR_END]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
; STORE-LABEL: @mul_red(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; STORE-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; STORE: for.body.lr.ph:
|
||||
; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
|
||||
; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
||||
; STORE-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; STORE: for.body:
|
||||
; STORE-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; STORE-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1
|
||||
; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
|
||||
; STORE-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]]
|
||||
; STORE-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3
|
||||
; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]]
|
||||
; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
||||
; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
||||
; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
|
||||
; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
||||
; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
|
||||
; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
|
||||
; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
||||
; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; STORE: for.cond.for.end_crit_edge:
|
||||
; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32
|
||||
; STORE-NEXT: br label [[FOR_END]]
|
||||
; STORE: for.end:
|
||||
; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; STORE-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
; ALL-LABEL: @mul_red(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; ALL-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; ALL: for.body.lr.ph:
|
||||
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
|
||||
; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
||||
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; ALL: for.body:
|
||||
; ALL-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; ALL-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1
|
||||
; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
|
||||
; ALL-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]]
|
||||
; ALL-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3
|
||||
; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]]
|
||||
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
||||
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
||||
; ALL-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
|
||||
; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
||||
; ALL-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
|
||||
; ALL-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
|
||||
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
||||
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; ALL: for.cond.for.end_crit_edge:
|
||||
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32
|
||||
; ALL-NEXT: br label [[FOR_END]]
|
||||
; ALL: for.end:
|
||||
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
%cmp38 = icmp sgt i32 %n, 0
|
||||
|
|
@ -282,119 +211,62 @@ for.end:
|
|||
; }
|
||||
|
||||
define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
|
||||
; CHECK-LABEL: @long_red(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; CHECK: for.body.lr.ph:
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4
|
||||
; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5
|
||||
; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6
|
||||
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; CHECK-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]]
|
||||
; CHECK-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]]
|
||||
; CHECK-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3
|
||||
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]]
|
||||
; CHECK-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4
|
||||
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]]
|
||||
; CHECK-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5
|
||||
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]]
|
||||
; CHECK-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6
|
||||
; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
|
||||
; CHECK-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7
|
||||
; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
|
||||
; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
|
||||
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
|
||||
; CHECK-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]])
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
|
||||
; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
|
||||
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.cond.for.end_crit_edge:
|
||||
; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32
|
||||
; CHECK-NEXT: br label [[FOR_END]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
; STORE-LABEL: @long_red(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; STORE-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; STORE: for.body.lr.ph:
|
||||
; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; STORE-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4
|
||||
; STORE-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5
|
||||
; STORE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6
|
||||
; STORE-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7
|
||||
; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>*
|
||||
; STORE-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
|
||||
; STORE-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8
|
||||
; STORE-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4
|
||||
; STORE-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64
|
||||
; STORE-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; STORE: for.body:
|
||||
; STORE-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6
|
||||
; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; STORE-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1
|
||||
; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]]
|
||||
; STORE-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]]
|
||||
; STORE-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3
|
||||
; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]]
|
||||
; STORE-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4
|
||||
; STORE-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]]
|
||||
; STORE-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5
|
||||
; STORE-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]]
|
||||
; STORE-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6
|
||||
; STORE-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
|
||||
; STORE-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7
|
||||
; STORE-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]]
|
||||
; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
|
||||
; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
|
||||
; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
|
||||
; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
|
||||
; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
|
||||
; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
|
||||
; STORE-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]]
|
||||
; STORE-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]])
|
||||
; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
|
||||
; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
|
||||
; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
|
||||
; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
|
||||
; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; STORE: for.cond.for.end_crit_edge:
|
||||
; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32
|
||||
; STORE-NEXT: br label [[FOR_END]]
|
||||
; STORE: for.end:
|
||||
; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; STORE-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
; ALL-LABEL: @long_red(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; ALL-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; ALL: for.body.lr.ph:
|
||||
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; ALL-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4
|
||||
; ALL-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5
|
||||
; ALL-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6
|
||||
; ALL-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7
|
||||
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>*
|
||||
; ALL-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
|
||||
; ALL-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8
|
||||
; ALL-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4
|
||||
; ALL-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64
|
||||
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; ALL: for.body:
|
||||
; ALL-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6
|
||||
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; ALL-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1
|
||||
; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]]
|
||||
; ALL-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]]
|
||||
; ALL-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3
|
||||
; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]]
|
||||
; ALL-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4
|
||||
; ALL-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]]
|
||||
; ALL-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5
|
||||
; ALL-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]]
|
||||
; ALL-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6
|
||||
; ALL-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
|
||||
; ALL-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7
|
||||
; ALL-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]]
|
||||
; ALL-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
|
||||
; ALL-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
|
||||
; ALL-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
|
||||
; ALL-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
|
||||
; ALL-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
|
||||
; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
|
||||
; ALL-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]]
|
||||
; ALL-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]])
|
||||
; ALL-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
|
||||
; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
|
||||
; ALL-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
|
||||
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
|
||||
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; ALL: for.cond.for.end_crit_edge:
|
||||
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32
|
||||
; ALL-NEXT: br label [[FOR_END]]
|
||||
; ALL: for.end:
|
||||
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
%cmp81 = icmp sgt i32 %n, 0
|
||||
|
|
@ -494,81 +366,43 @@ for.end:
|
|||
; }
|
||||
|
||||
define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
|
||||
; CHECK-LABEL: @chain_red(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; CHECK: for.body.lr.ph:
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
|
||||
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; CHECK-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]]
|
||||
; CHECK-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]]
|
||||
; CHECK-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3
|
||||
; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
||||
; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
|
||||
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.cond.for.end_crit_edge:
|
||||
; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32
|
||||
; CHECK-NEXT: br label [[FOR_END]]
|
||||
; CHECK: for.end:
|
||||
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
; STORE-LABEL: @chain_red(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; STORE-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; STORE: for.body.lr.ph:
|
||||
; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; STORE-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; STORE-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; STORE-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
|
||||
; STORE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
||||
; STORE-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; STORE: for.body:
|
||||
; STORE-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
|
||||
; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; STORE-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1
|
||||
; STORE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]]
|
||||
; STORE-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]]
|
||||
; STORE-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3
|
||||
; STORE-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]]
|
||||
; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
||||
; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
||||
; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
|
||||
; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
||||
; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
|
||||
; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
|
||||
; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
||||
; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; STORE: for.cond.for.end_crit_edge:
|
||||
; STORE-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32
|
||||
; STORE-NEXT: br label [[FOR_END]]
|
||||
; STORE: for.end:
|
||||
; STORE-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; STORE-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
; ALL-LABEL: @chain_red(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
||||
; ALL-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
||||
; ALL: for.body.lr.ph:
|
||||
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; ALL-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
||||
; ALL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
|
||||
; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
||||
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; ALL: for.body:
|
||||
; ALL-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
|
||||
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
||||
; ALL-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1
|
||||
; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]]
|
||||
; ALL-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]]
|
||||
; ALL-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3
|
||||
; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]]
|
||||
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
||||
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
||||
; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
|
||||
; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
||||
; ALL-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
|
||||
; ALL-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
|
||||
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
||||
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
||||
; ALL: for.cond.for.end_crit_edge:
|
||||
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32
|
||||
; ALL-NEXT: br label [[FOR_END]]
|
||||
; ALL: for.end:
|
||||
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
%cmp41 = icmp sgt i32 %n, 0
|
||||
|
|
@ -648,125 +482,65 @@ for.end:
|
|||
; }
|
||||
|
||||
define void @foo(float* nocapture readonly %arg_A, i32 %arg_B, float* nocapture %array) {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0
|
||||
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; CHECK: for.cond.cleanup:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: for.body:
|
||||
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4
|
||||
; CHECK-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]]
|
||||
; CHECK: for.body16.lr.ph:
|
||||
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4
|
||||
; CHECK-NEXT: br label [[FOR_BODY16:%.*]]
|
||||
; CHECK: for.cond.cleanup15:
|
||||
; CHECK-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4
|
||||
; CHECK-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4
|
||||
; CHECK-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4
|
||||
; CHECK-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4
|
||||
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; CHECK-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6
|
||||
; CHECK-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
|
||||
; CHECK: for.body16:
|
||||
; CHECK-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ]
|
||||
; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000
|
||||
; CHECK-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000
|
||||
; CHECK-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]]
|
||||
; CHECK-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]]
|
||||
; CHECK-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000
|
||||
; CHECK-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000
|
||||
; CHECK-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000
|
||||
; CHECK-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000
|
||||
; CHECK-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000
|
||||
; CHECK-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]]
|
||||
; CHECK-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]]
|
||||
; CHECK-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]]
|
||||
; CHECK-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]]
|
||||
; CHECK-NEXT: [[INC]] = add nuw i32 [[J_098]], 1
|
||||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]]
|
||||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]]
|
||||
;
|
||||
; STORE-LABEL: @foo(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0
|
||||
; STORE-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; STORE: for.cond.cleanup:
|
||||
; STORE-NEXT: ret void
|
||||
; STORE: for.body:
|
||||
; STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ]
|
||||
; STORE-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]]
|
||||
; STORE-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
||||
; STORE-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1
|
||||
; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]]
|
||||
; STORE-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4
|
||||
; STORE-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2
|
||||
; STORE-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]]
|
||||
; STORE-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4
|
||||
; STORE-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3
|
||||
; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]]
|
||||
; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4
|
||||
; STORE-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]]
|
||||
; STORE: for.body16.lr.ph:
|
||||
; STORE-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]]
|
||||
; STORE-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4
|
||||
; STORE-NEXT: br label [[FOR_BODY16:%.*]]
|
||||
; STORE: for.cond.cleanup15:
|
||||
; STORE-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4
|
||||
; STORE-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4
|
||||
; STORE-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4
|
||||
; STORE-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4
|
||||
; STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; STORE-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6
|
||||
; STORE-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
|
||||
; STORE: for.body16:
|
||||
; STORE-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ]
|
||||
; STORE-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000
|
||||
; STORE-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000
|
||||
; STORE-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]]
|
||||
; STORE-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]]
|
||||
; STORE-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000
|
||||
; STORE-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000
|
||||
; STORE-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000
|
||||
; STORE-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000
|
||||
; STORE-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000
|
||||
; STORE-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]]
|
||||
; STORE-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]]
|
||||
; STORE-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]]
|
||||
; STORE-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]]
|
||||
; STORE-NEXT: [[INC]] = add nuw i32 [[J_098]], 1
|
||||
; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]]
|
||||
; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]]
|
||||
; ALL-LABEL: @foo(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0
|
||||
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; ALL: for.cond.cleanup:
|
||||
; ALL-NEXT: ret void
|
||||
; ALL: for.body:
|
||||
; ALL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ]
|
||||
; ALL-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]]
|
||||
; ALL-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
||||
; ALL-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1
|
||||
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]]
|
||||
; ALL-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4
|
||||
; ALL-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2
|
||||
; ALL-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]]
|
||||
; ALL-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4
|
||||
; ALL-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3
|
||||
; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]]
|
||||
; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4
|
||||
; ALL-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]]
|
||||
; ALL: for.body16.lr.ph:
|
||||
; ALL-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]]
|
||||
; ALL-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4
|
||||
; ALL-NEXT: br label [[FOR_BODY16:%.*]]
|
||||
; ALL: for.cond.cleanup15:
|
||||
; ALL-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4
|
||||
; ALL-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4
|
||||
; ALL-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4
|
||||
; ALL-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4
|
||||
; ALL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; ALL-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6
|
||||
; ALL-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
|
||||
; ALL: for.body16:
|
||||
; ALL-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ]
|
||||
; ALL-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000
|
||||
; ALL-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000
|
||||
; ALL-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]]
|
||||
; ALL-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]]
|
||||
; ALL-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000
|
||||
; ALL-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000
|
||||
; ALL-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000
|
||||
; ALL-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000
|
||||
; ALL-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000
|
||||
; ALL-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]]
|
||||
; ALL-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]]
|
||||
; ALL-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]]
|
||||
; ALL-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]]
|
||||
; ALL-NEXT: [[INC]] = add nuw i32 [[J_098]], 1
|
||||
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]]
|
||||
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]]
|
||||
;
|
||||
entry:
|
||||
%cmp1495 = icmp eq i32 %arg_B, 0
|
||||
|
|
@ -1541,19 +1315,12 @@ entry:
|
|||
declare i32 @foobar(i32)
|
||||
|
||||
define void @i32_red_call(i32 %val) {
|
||||
; CHECK-LABEL: @i32_red_call(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
||||
; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; STORE-LABEL: @i32_red_call(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
||||
; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
|
||||
; STORE-NEXT: ret void
|
||||
; ALL-LABEL: @i32_red_call(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
||||
; ALL-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
|
||||
; ALL-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
||||
|
|
@ -1576,31 +1343,18 @@ entry:
|
|||
}
|
||||
|
||||
define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 {
|
||||
; CHECK-LABEL: @i32_red_invoke(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
||||
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
|
||||
; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
|
||||
; CHECK: exception:
|
||||
; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8
|
||||
; CHECK-NEXT: cleanup
|
||||
; CHECK-NEXT: br label [[NORMAL]]
|
||||
; CHECK: normal:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; STORE-LABEL: @i32_red_invoke(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
||||
; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
|
||||
; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
|
||||
; STORE: exception:
|
||||
; STORE-NEXT: [[CLEANUP:%.*]] = landingpad i8
|
||||
; STORE-NEXT: cleanup
|
||||
; STORE-NEXT: br label [[NORMAL]]
|
||||
; STORE: normal:
|
||||
; STORE-NEXT: ret void
|
||||
; ALL-LABEL: @i32_red_invoke(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
||||
; ALL-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
|
||||
; ALL-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
|
||||
; ALL: exception:
|
||||
; ALL-NEXT: [[CLEANUP:%.*]] = landingpad i8
|
||||
; ALL-NEXT: cleanup
|
||||
; ALL-NEXT: br label [[NORMAL]]
|
||||
; ALL: normal:
|
||||
; ALL-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
||||
|
|
@ -1628,35 +1382,20 @@ normal:
|
|||
|
||||
; Test case from PR47670. Reduction result is used as incoming value in phi.
|
||||
define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroext %b) {
|
||||
; CHECK-LABEL: @reduction_result_used_in_phi(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
|
||||
; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
|
||||
; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_1]]
|
||||
;
|
||||
; STORE-LABEL: @reduction_result_used_in_phi(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
||||
; STORE: bb:
|
||||
; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
|
||||
; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
|
||||
; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
|
||||
; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
|
||||
; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; STORE-NEXT: br label [[EXIT]]
|
||||
; STORE: exit:
|
||||
; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
||||
; STORE-NEXT: ret i32 [[SUM_1]]
|
||||
; ALL-LABEL: @reduction_result_used_in_phi(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
||||
; ALL: bb:
|
||||
; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
|
||||
; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
|
||||
; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
|
||||
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
|
||||
; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; ALL-NEXT: br label [[EXIT]]
|
||||
; ALL: exit:
|
||||
; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
||||
; ALL-NEXT: ret i32 [[SUM_1]]
|
||||
;
|
||||
entry:
|
||||
br i1 %b, label %bb, label %exit
|
||||
|
|
@ -1680,35 +1419,20 @@ exit:
|
|||
}
|
||||
|
||||
define i32 @reduction_result_used_in_phi_loop(i32* nocapture readonly %data, i1 zeroext %b) {
|
||||
; CHECK-LABEL: @reduction_result_used_in_phi_loop(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: bb:
|
||||
; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
|
||||
; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
|
||||
; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
||||
; CHECK-NEXT: ret i32 [[SUM_1]]
|
||||
;
|
||||
; STORE-LABEL: @reduction_result_used_in_phi_loop(
|
||||
; STORE-NEXT: entry:
|
||||
; STORE-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
||||
; STORE: bb:
|
||||
; STORE-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
|
||||
; STORE-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
|
||||
; STORE-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
|
||||
; STORE-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
|
||||
; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; STORE-NEXT: br label [[EXIT]]
|
||||
; STORE: exit:
|
||||
; STORE-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
||||
; STORE-NEXT: ret i32 [[SUM_1]]
|
||||
; ALL-LABEL: @reduction_result_used_in_phi_loop(
|
||||
; ALL-NEXT: entry:
|
||||
; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
||||
; ALL: bb:
|
||||
; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
|
||||
; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
|
||||
; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
|
||||
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
|
||||
; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
||||
; ALL-NEXT: br label [[EXIT]]
|
||||
; ALL: exit:
|
||||
; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
||||
; ALL-NEXT: ret i32 [[SUM_1]]
|
||||
;
|
||||
entry:
|
||||
br i1 %b, label %bb, label %exit
|
||||
|
|
@ -1734,25 +1458,15 @@ exit:
|
|||
; Make sure we do not crash or infinite loop on ill-formed IR.
|
||||
|
||||
define void @unreachable_block() {
|
||||
; CHECK-LABEL: @unreachable_block(
|
||||
; CHECK-NEXT: bb.0:
|
||||
; CHECK-NEXT: br label [[BB_1:%.*]]
|
||||
; CHECK: dead:
|
||||
; CHECK-NEXT: [[T0:%.*]] = add i16 [[T0]], undef
|
||||
; CHECK-NEXT: br label [[BB_1]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
; STORE-LABEL: @unreachable_block(
|
||||
; STORE-NEXT: bb.0:
|
||||
; STORE-NEXT: br label [[BB_1:%.*]]
|
||||
; STORE: dead:
|
||||
; STORE-NEXT: [[T0:%.*]] = add i16 [[T0]], undef
|
||||
; STORE-NEXT: br label [[BB_1]]
|
||||
; STORE: bb.1:
|
||||
; STORE-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ]
|
||||
; STORE-NEXT: ret void
|
||||
; ALL-LABEL: @unreachable_block(
|
||||
; ALL-NEXT: bb.0:
|
||||
; ALL-NEXT: br label [[BB_1:%.*]]
|
||||
; ALL: dead:
|
||||
; ALL-NEXT: [[T0:%.*]] = add i16 [[T0]], undef
|
||||
; ALL-NEXT: br label [[BB_1]]
|
||||
; ALL: bb.1:
|
||||
; ALL-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ]
|
||||
; ALL-NEXT: ret void
|
||||
;
|
||||
bb.0:
|
||||
br label %bb.1
|
||||
|
|
@ -1769,23 +1483,14 @@ bb.1:
|
|||
; The FMF on the reduction should match the incoming insts.
|
||||
|
||||
define float @fadd_v4f32_fmf(float* %p) {
|
||||
; CHECK-LABEL: @fadd_v4f32_fmf(
|
||||
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
|
||||
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
||||
; CHECK-NEXT: ret float [[TMP3]]
|
||||
;
|
||||
; STORE-LABEL: @fadd_v4f32_fmf(
|
||||
; STORE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
|
||||
; STORE-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
|
||||
; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
|
||||
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
|
||||
; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; STORE-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
||||
; STORE-NEXT: ret float [[TMP3]]
|
||||
; ALL-LABEL: @fadd_v4f32_fmf(
|
||||
; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
|
||||
; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
|
||||
; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
|
||||
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
|
||||
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; ALL-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
||||
; ALL-NEXT: ret float [[TMP3]]
|
||||
;
|
||||
%p1 = getelementptr inbounds float, float* %p, i64 1
|
||||
%p2 = getelementptr inbounds float, float* %p, i64 2
|
||||
|
|
@ -1805,23 +1510,14 @@ define float @fadd_v4f32_fmf(float* %p) {
|
|||
; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with the required flags.
|
||||
|
||||
define float @fadd_v4f32_fmf_intersect(float* %p) {
|
||||
; CHECK-LABEL: @fadd_v4f32_fmf_intersect(
|
||||
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
|
||||
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
||||
; CHECK-NEXT: ret float [[TMP3]]
|
||||
;
|
||||
; STORE-LABEL: @fadd_v4f32_fmf_intersect(
|
||||
; STORE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
|
||||
; STORE-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
|
||||
; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
|
||||
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
|
||||
; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; STORE-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
||||
; STORE-NEXT: ret float [[TMP3]]
|
||||
; ALL-LABEL: @fadd_v4f32_fmf_intersect(
|
||||
; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
|
||||
; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
|
||||
; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
|
||||
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
|
||||
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; ALL-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
||||
; ALL-NEXT: ret float [[TMP3]]
|
||||
;
|
||||
%p1 = getelementptr inbounds float, float* %p, i64 1
|
||||
%p2 = getelementptr inbounds float, float* %p, i64 2
|
||||
|
|
|
|||
Loading…
Reference in New Issue