[AArch64] Add tests for masked.gather costs.

This commit is contained in:
Florian Hahn 2020-11-23 17:33:27 +00:00
parent b23e84ffcf
commit 3a1c6cec15
No known key found for this signature in database
GPG Key ID: 61D7554B5CECDC0D
2 changed files with 68 additions and 1 deletions

View File

@ -86,3 +86,27 @@ define <8 x i64> @load_512(<8 x i64>* %ptr) {
%out = load <8 x i64>, <8 x i64>* %ptr
ret <8 x i64> %out
}
define <4 x i8> @gather_load_4xi8(<4 x i8*> %ptrs) {
; CHECK: gather_load_4xi8
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
;
%lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
ret <4 x i8> %lv
}
declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>)
define <4 x i32> @gather_load_4xi32(<4 x i32*> %ptrs) {
; CHECK: gather_load_4xi32
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
;
%lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %lv
}
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32 immarg, <4 x i1>, <4 x i32>)

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -slp-vectorizer -instcombine -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
; RUN: opt < %s -S -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@ -12,6 +12,11 @@ target triple = "aarch64--linux-gnu"
; REMARK-NEXT: - String: 'Vectorized horizontal reduction with cost '
; REMARK-NEXT: - Cost: '-7'
;
; REMARK-LABEL: Function: gather_load
; REMARK: Args:
; REMARK-NEXT: - String: 'Stores SLP vectorized with cost
; REMARK-NEXT: - Cost: '-2'
define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @gather_multiple_use(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
@ -51,3 +56,41 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
%tmp22 = add i32 %tmp21, %tmp19
ret i32 %tmp22
}
@data = global [6 x [258 x i8]] zeroinitializer, align 1
define void @gather_load(i16* noalias %ptr) {
; CHECK-LABEL: @gather_load(
; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> <i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3)>, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i16> [[TMP2]], <i16 10, i16 20, i16 30, i16 40>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX182]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
%arrayidx182 = getelementptr inbounds i16, i16* %ptr, i64 1
%arrayidx183 = getelementptr inbounds i16, i16* %ptr, i64 2
%arrayidx184 = getelementptr inbounds i16, i16* %ptr, i64 3
%arrayidx185 = getelementptr inbounds i16, i16* %ptr, i64 4
%arrayidx149 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0
%l0 = load i8, i8* %arrayidx149, align 1
%conv150 = zext i8 %l0 to i16
%add152 = add i16 10, %conv150
%arrayidx155 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1
%l1 = load i8, i8* %arrayidx155, align 1
%conv156 = zext i8 %l1 to i16
%add158 = add i16 20, %conv156
%arrayidx161 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2
%l2 = load i8, i8* %arrayidx161, align 1
%conv162 = zext i8 %l2 to i16
%add164 = add i16 30, %conv162
%arrayidx167 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3
%l3 = load i8, i8* %arrayidx167, align 1
%conv168 = zext i8 %l3 to i16
%add170 = add i16 40, %conv168
store i16 %add152, i16* %arrayidx182, align 2
store i16 %add158, i16* %arrayidx183, align 2
store i16 %add164, i16* %arrayidx184, align 2
store i16 %add170, i16* %arrayidx185, align 2
ret void
}