From c908196e100a5ac1b25cba859094bfc28cffb5f3 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Mon, 7 Jun 2021 13:02:38 +0100
Subject: [PATCH] [CostModel] Return Invalid cost in getArithmeticCost instead
 of crashing for scalable vectors.

This fixes an issue in BasicTTIImpl.h where it tries to do a
cast<FixedVectorType> on a scalable vector type in order to get the
scalarization cost. Because scalarization of scalable vectors is not
supported, we return Invalid instead.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D103798
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 10 ++-
 .../CostModel/AArch64/sve-remainder.ll        | 61 +++++++++++++++++++
 2 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e46df8d7132c..c6d92ad7f99d 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -776,18 +776,22 @@ public:
       return LT.first * 2 * OpCost;
     }
 
+    // We cannot scalarize scalable vectors, so return Invalid.
+    if (isa<ScalableVectorType>(Ty))
+      return InstructionCost::getInvalid();
+
     // Else, assume that we need to scalarize this op.
     // TODO: If one of the types get legalized by splitting, handle this
     // similarly to what getCastInstrCost() does.
-    if (auto *VTy = dyn_cast<VectorType>(Ty)) {
-      unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
+    if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
       InstructionCost Cost = thisT()->getArithmeticInstrCost(
           Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
           Opd1PropInfo, Opd2PropInfo, Args, CxtI);
       // Return the cost of multiple scalar invocation plus the cost of
       // inserting and extracting the values.
       SmallVector<Type *> Tys(Args.size(), Ty);
-      return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
+      return getScalarizationOverhead(VTy, Args, Tys) +
+             VTy->getNumElements() * Cost;
     }
 
     // We don't know anything about this scalar instruction.
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll b/llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll
new file mode 100644
index 000000000000..5105978a4ad9
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-remainder.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define void @test_urem_srem_expand() {
+; CHECK-LABEL: 'test_urem_srem_expand'
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_urem_0 = urem <vscale x 16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_urem_1 = urem <vscale x 8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_urem_2 = urem <vscale x 4 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_urem_3 = urem <vscale x 2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_srem_0 = srem <vscale x 16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_srem_1 = srem <vscale x 8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_srem_2 = srem <vscale x 4 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %legal_type_srem_3 = srem <vscale x 2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_urem_0 = urem <vscale x 32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_urem_1 = urem <vscale x 16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_urem_2 = urem <vscale x 8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_urem_3 = urem <vscale x 4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_srem_0 = srem <vscale x 32 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_srem_1 = srem <vscale x 16 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_srem_2 = srem <vscale x 8 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %split_type_srem_3 = srem <vscale x 4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_urem_0 = urem <vscale x 31 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_urem_1 = urem <vscale x 15 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_urem_2 = urem <vscale x 7 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_urem_3 = urem <vscale x 3 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_srem_0 = srem <vscale x 31 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_srem_1 = srem <vscale x 15 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_srem_2 = srem <vscale x 7 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %widen_type_srem_3 = srem <vscale x 3 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %legal_type_urem_0 = urem <vscale x 16 x i8> undef, undef
+  %legal_type_urem_1 = urem <vscale x 8 x i16> undef, undef
+  %legal_type_urem_2 = urem <vscale x 4 x i32> undef, undef
+  %legal_type_urem_3 = urem <vscale x 2 x i64> undef, undef
+  %legal_type_srem_0 = srem <vscale x 16 x i8> undef, undef
+  %legal_type_srem_1 = srem <vscale x 8 x i16> undef, undef
+  %legal_type_srem_2 = srem <vscale x 4 x i32> undef, undef
+  %legal_type_srem_3 = srem <vscale x 2 x i64> undef, undef
+
+  %split_type_urem_0 = urem <vscale x 32 x i8> undef, undef
+  %split_type_urem_1 = urem <vscale x 16 x i16> undef, undef
+  %split_type_urem_2 = urem <vscale x 8 x i32> undef, undef
+  %split_type_urem_3 = urem <vscale x 4 x i64> undef, undef
+  %split_type_srem_0 = srem <vscale x 32 x i8> undef, undef
+  %split_type_srem_1 = srem <vscale x 16 x i16> undef, undef
+  %split_type_srem_2 = srem <vscale x 8 x i32> undef, undef
+  %split_type_srem_3 = srem <vscale x 4 x i64> undef, undef
+
+  %widen_type_urem_0 = urem <vscale x 31 x i8> undef, undef
+  %widen_type_urem_1 = urem <vscale x 15 x i16> undef, undef
+  %widen_type_urem_2 = urem <vscale x 7 x i32> undef, undef
+  %widen_type_urem_3 = urem <vscale x 3 x i64> undef, undef
+  %widen_type_srem_0 = srem <vscale x 31 x i8> undef, undef
+  %widen_type_srem_1 = srem <vscale x 15 x i16> undef, undef
+  %widen_type_srem_2 = srem <vscale x 7 x i32> undef, undef
+  %widen_type_srem_3 = srem <vscale x 3 x i64> undef, undef
+
+  ret void
+}