[SLP]Fix an assertion for the size of user nodes.
For the nodes with reused scalars the user may be not only of the size of the final shuffle but also of the size of the scalars themselves, need to check for this. It is safe to just modify the check here, since the order of the scalars themselves is preserved, only indeces of the reused scalars are changed. So, the users with the same size as the number of scalars in the node, will not be affected, they still will get the operands in the required order. Reported by @mstorsjo in D105020. Differential Revision: https://reviews.llvm.org/D107080
This commit is contained in:
parent
f4fb854811
commit
4b25c11321
|
|
@ -2660,12 +2660,14 @@ void BoUpSLP::reorderTopToBottom(bool FreeReorder) {
|
|||
if (TE->Scalars.size() != VF) {
|
||||
if (TE->ReuseShuffleIndices.size() == VF) {
|
||||
// Need to reorder the reuses masks of the operands with smaller VF to
|
||||
// be able to find the math between the graph nodes and scalar
|
||||
// be able to find the match between the graph nodes and scalar
|
||||
// operands of the given node during vectorization/cost estimation.
|
||||
// Build a list of such operands for future reordering.
|
||||
assert(all_of(TE->UserTreeIndices,
|
||||
[VF](const EdgeInfo &EI) {
|
||||
return EI.UserTE->Scalars.size() == VF;
|
||||
[VF, &TE](const EdgeInfo &EI) {
|
||||
return EI.UserTE->Scalars.size() == VF ||
|
||||
EI.UserTE->Scalars.size() ==
|
||||
TE->Scalars.size();
|
||||
}) &&
|
||||
"All users must be of VF size.");
|
||||
SmallOperandsToReorder.insert(TE.get());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,103 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; ModuleID = 'repro1.ll'
|
||||
; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=aarch64-w32-windows-gnu | FileCheck %s
|
||||
|
||||
define i32 @foo() {
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[FOR_COND15_PREHEADER:%.*]]
|
||||
; CHECK: for.cond15.preheader:
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]]
|
||||
; CHECK: for.cond15:
|
||||
; CHECK-NEXT: br label [[IF_END_1:%.*]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: br label [[FOR_COND15:%.*]]
|
||||
; CHECK: for.end39:
|
||||
; CHECK-NEXT: switch i32 undef, label [[DO_BODY:%.*]] [
|
||||
; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
|
||||
; CHECK-NEXT: i32 1, label [[SW_BB195:%.*]]
|
||||
; CHECK-NEXT: ]
|
||||
; CHECK: sw.bb:
|
||||
; CHECK-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
|
||||
; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
|
||||
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
|
||||
; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
|
||||
; CHECK-NEXT: br label [[SW_EPILOG:%.*]]
|
||||
; CHECK: sw.bb195:
|
||||
; CHECK-NEXT: br label [[SW_EPILOG]]
|
||||
; CHECK: do.body:
|
||||
; CHECK-NEXT: unreachable
|
||||
; CHECK: sw.epilog:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ]
|
||||
; CHECK-NEXT: ret i32 undef
|
||||
; CHECK: if.end.1:
|
||||
; CHECK-NEXT: br label [[FOR_COND15_1:%.*]]
|
||||
; CHECK: for.cond15.1:
|
||||
; CHECK-NEXT: br i1 undef, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]]
|
||||
;
|
||||
entry:
|
||||
%conv = sitofp i32 undef to double
|
||||
%conv2 = sitofp i32 undef to double
|
||||
br label %for.cond15.preheader
|
||||
|
||||
for.cond15.preheader: ; preds = %for.cond15.1, %entry
|
||||
br label %if.end
|
||||
|
||||
for.cond15: ; preds = %if.end
|
||||
br label %if.end.1
|
||||
|
||||
if.end: ; preds = %for.cond15.preheader
|
||||
br label %for.cond15
|
||||
|
||||
for.end39: ; preds = %for.cond15.1
|
||||
switch i32 undef, label %do.body [
|
||||
i32 0, label %sw.bb
|
||||
i32 1, label %sw.bb195
|
||||
]
|
||||
|
||||
sw.bb: ; preds = %for.end39
|
||||
%arrayidx43 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
|
||||
%0 = load double, double* %arrayidx43, align 8
|
||||
%arrayidx45 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
|
||||
%1 = load double, double* %arrayidx45, align 8
|
||||
%arrayidx51 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
|
||||
%2 = load double, double* %arrayidx51, align 8
|
||||
%arrayidx58 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
|
||||
%3 = load double, double* %arrayidx58, align 8
|
||||
%mul = fmul double undef, %conv2
|
||||
%mul109 = fmul double undef, %conv
|
||||
%mul143 = fmul double %0, %mul
|
||||
%4 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul143)
|
||||
%mul154 = fmul double %1, %mul109
|
||||
%5 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul154)
|
||||
%mul172 = fmul double %3, %mul
|
||||
%6 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul172)
|
||||
%mul183 = fmul double %2, %mul109
|
||||
%7 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul183)
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb195: ; preds = %for.end39
|
||||
br label %sw.epilog
|
||||
|
||||
do.body: ; preds = %for.end39
|
||||
unreachable
|
||||
|
||||
sw.epilog: ; preds = %sw.bb195, %sw.bb
|
||||
%x4.0 = phi double [ undef, %sw.bb195 ], [ %7, %sw.bb ]
|
||||
%x3.0 = phi double [ undef, %sw.bb195 ], [ %6, %sw.bb ]
|
||||
%x1.0 = phi double [ undef, %sw.bb195 ], [ %5, %sw.bb ]
|
||||
%x0.0 = phi double [ undef, %sw.bb195 ], [ %4, %sw.bb ]
|
||||
ret i32 undef
|
||||
|
||||
if.end.1: ; preds = %for.cond15
|
||||
br label %for.cond15.1
|
||||
|
||||
for.cond15.1: ; preds = %if.end.1
|
||||
br i1 undef, label %for.end39, label %for.cond15.preheader
|
||||
}
|
||||
|
||||
declare double @llvm.fmuladd.f64(double, double, double)
|
||||
Loading…
Reference in New Issue