[SLP]Fix an assertion for the size of user nodes.

For the nodes with reused scalars the user may be not only of the size
of the final shuffle but also of the size of the scalars themselves,
need to check for this. It is safe to just modify the check here, since
the order of the scalars themselves is preserved, only indeces of the
reused scalars are changed. So, the users with the same size as the
number of scalars in the node, will not be affected, they still will get
the operands in the required order.

Reported by @mstorsjo in D105020.

Differential Revision: https://reviews.llvm.org/D107080
This commit is contained in:
Alexey Bataev 2021-07-29 07:12:15 -07:00
parent f4fb854811
commit 4b25c11321
2 changed files with 108 additions and 3 deletions

View File

@ -2660,12 +2660,14 @@ void BoUpSLP::reorderTopToBottom(bool FreeReorder) {
if (TE->Scalars.size() != VF) {
if (TE->ReuseShuffleIndices.size() == VF) {
// Need to reorder the reuses masks of the operands with smaller VF to
// be able to find the math between the graph nodes and scalar
// be able to find the match between the graph nodes and scalar
// operands of the given node during vectorization/cost estimation.
// Build a list of such operands for future reordering.
assert(all_of(TE->UserTreeIndices,
[VF](const EdgeInfo &EI) {
return EI.UserTE->Scalars.size() == VF;
[VF, &TE](const EdgeInfo &EI) {
return EI.UserTE->Scalars.size() == VF ||
EI.UserTE->Scalars.size() ==
TE->Scalars.size();
}) &&
"All users must be of VF size.");
SmallOperandsToReorder.insert(TE.get());

View File

@ -0,0 +1,103 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; ModuleID = 'repro1.ll'
; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=aarch64-w32-windows-gnu | FileCheck %s
define i32 @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND15_PREHEADER:%.*]]
; CHECK: for.cond15.preheader:
; CHECK-NEXT: br label [[IF_END:%.*]]
; CHECK: for.cond15:
; CHECK-NEXT: br label [[IF_END_1:%.*]]
; CHECK: if.end:
; CHECK-NEXT: br label [[FOR_COND15:%.*]]
; CHECK: for.end39:
; CHECK-NEXT: switch i32 undef, label [[DO_BODY:%.*]] [
; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
; CHECK-NEXT: i32 1, label [[SW_BB195:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb:
; CHECK-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX43]] to <4 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000, double 0x7FF8000000000000>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> poison, <4 x double> zeroinitializer, <4 x double> [[TMP2]])
; CHECK-NEXT: br label [[SW_EPILOG:%.*]]
; CHECK: sw.bb195:
; CHECK-NEXT: br label [[SW_EPILOG]]
; CHECK: do.body:
; CHECK-NEXT: unreachable
; CHECK: sw.epilog:
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x double> [ poison, [[SW_BB195]] ], [ [[TMP3]], [[SW_BB]] ]
; CHECK-NEXT: ret i32 undef
; CHECK: if.end.1:
; CHECK-NEXT: br label [[FOR_COND15_1:%.*]]
; CHECK: for.cond15.1:
; CHECK-NEXT: br i1 undef, label [[FOR_END39:%.*]], label [[FOR_COND15_PREHEADER]]
;
entry:
%conv = sitofp i32 undef to double
%conv2 = sitofp i32 undef to double
br label %for.cond15.preheader
for.cond15.preheader: ; preds = %for.cond15.1, %entry
br label %if.end
for.cond15: ; preds = %if.end
br label %if.end.1
if.end: ; preds = %for.cond15.preheader
br label %for.cond15
for.end39: ; preds = %for.cond15.1
switch i32 undef, label %do.body [
i32 0, label %sw.bb
i32 1, label %sw.bb195
]
sw.bb: ; preds = %for.end39
%arrayidx43 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 0
%0 = load double, double* %arrayidx43, align 8
%arrayidx45 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 0
%1 = load double, double* %arrayidx45, align 8
%arrayidx51 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 2, i64 1
%2 = load double, double* %arrayidx51, align 8
%arrayidx58 = getelementptr inbounds [4 x [2 x double]], [4 x [2 x double]]* undef, i32 0, i64 1, i64 1
%3 = load double, double* %arrayidx58, align 8
%mul = fmul double undef, %conv2
%mul109 = fmul double undef, %conv
%mul143 = fmul double %0, %mul
%4 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul143)
%mul154 = fmul double %1, %mul109
%5 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul154)
%mul172 = fmul double %3, %mul
%6 = call double @llvm.fmuladd.f64(double undef, double %conv2, double %mul172)
%mul183 = fmul double %2, %mul109
%7 = call double @llvm.fmuladd.f64(double undef, double %conv, double %mul183)
br label %sw.epilog
sw.bb195: ; preds = %for.end39
br label %sw.epilog
do.body: ; preds = %for.end39
unreachable
sw.epilog: ; preds = %sw.bb195, %sw.bb
%x4.0 = phi double [ undef, %sw.bb195 ], [ %7, %sw.bb ]
%x3.0 = phi double [ undef, %sw.bb195 ], [ %6, %sw.bb ]
%x1.0 = phi double [ undef, %sw.bb195 ], [ %5, %sw.bb ]
%x0.0 = phi double [ undef, %sw.bb195 ], [ %4, %sw.bb ]
ret i32 undef
if.end.1: ; preds = %for.cond15
br label %for.cond15.1
for.cond15.1: ; preds = %if.end.1
br i1 undef, label %for.end39, label %for.cond15.preheader
}
declare double @llvm.fmuladd.f64(double, double, double)