[AArch64] Add tests for masked.gather costs.
This commit is contained in:
parent
b23e84ffcf
commit
3a1c6cec15
|
|
@ -86,3 +86,27 @@ define <8 x i64> @load_512(<8 x i64>* %ptr) {
|
|||
%out = load <8 x i64>, <8 x i64>* %ptr
|
||||
ret <8 x i64> %out
|
||||
}
|
||||
|
||||
define <4 x i8> @gather_load_4xi8(<4 x i8*> %ptrs) {
|
||||
; CHECK: gather_load_4xi8
|
||||
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
|
||||
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
|
||||
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
|
||||
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
|
||||
;
|
||||
%lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
|
||||
ret <4 x i8> %lv
|
||||
}
|
||||
declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>)
|
||||
|
||||
define <4 x i32> @gather_load_4xi32(<4 x i32*> %ptrs) {
|
||||
; CHECK: gather_load_4xi32
|
||||
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
|
||||
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
|
||||
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
|
||||
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
|
||||
;
|
||||
%lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
|
||||
ret <4 x i32> %lv
|
||||
}
|
||||
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32 immarg, <4 x i1>, <4 x i32>)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -S -slp-vectorizer -instcombine -pass-remarks-output=%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
|
||||
; RUN: opt < %s -S -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
|
||||
; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='slp-vectorizer,instcombine' -pass-remarks-output=%t | FileCheck %s
|
||||
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
|
|
@ -12,6 +12,11 @@ target triple = "aarch64--linux-gnu"
|
|||
; REMARK-NEXT: - String: 'Vectorized horizontal reduction with cost '
|
||||
; REMARK-NEXT: - Cost: '-7'
|
||||
;
|
||||
; REMARK-LABEL: Function: gather_load
|
||||
; REMARK: Args:
|
||||
; REMARK-NEXT: - String: 'Stores SLP vectorized with cost
|
||||
; REMARK-NEXT: - Cost: '-2'
|
||||
|
||||
define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: @gather_multiple_use(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0
|
||||
|
|
@ -51,3 +56,41 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|||
%tmp22 = add i32 %tmp21, %tmp19
|
||||
ret i32 %tmp22
|
||||
}
|
||||
|
||||
@data = global [6 x [258 x i8]] zeroinitializer, align 1
|
||||
define void @gather_load(i16* noalias %ptr) {
|
||||
; CHECK-LABEL: @gather_load(
|
||||
; CHECK-NEXT: [[ARRAYIDX182:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> <i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2), i8* getelementptr inbounds ([6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3)>, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <4 x i16> [[TMP2]], <i16 10, i16 20, i16 30, i16 40>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX182]] to <4 x i16>*
|
||||
; CHECK-NEXT: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP4]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%arrayidx182 = getelementptr inbounds i16, i16* %ptr, i64 1
|
||||
%arrayidx183 = getelementptr inbounds i16, i16* %ptr, i64 2
|
||||
%arrayidx184 = getelementptr inbounds i16, i16* %ptr, i64 3
|
||||
%arrayidx185 = getelementptr inbounds i16, i16* %ptr, i64 4
|
||||
%arrayidx149 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 1, i64 0
|
||||
%l0 = load i8, i8* %arrayidx149, align 1
|
||||
%conv150 = zext i8 %l0 to i16
|
||||
%add152 = add i16 10, %conv150
|
||||
%arrayidx155 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 2, i64 1
|
||||
%l1 = load i8, i8* %arrayidx155, align 1
|
||||
%conv156 = zext i8 %l1 to i16
|
||||
%add158 = add i16 20, %conv156
|
||||
%arrayidx161 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 3, i64 2
|
||||
%l2 = load i8, i8* %arrayidx161, align 1
|
||||
%conv162 = zext i8 %l2 to i16
|
||||
%add164 = add i16 30, %conv162
|
||||
%arrayidx167 = getelementptr inbounds [6 x [258 x i8]], [6 x [258 x i8]]* @data, i64 0, i64 4, i64 3
|
||||
%l3 = load i8, i8* %arrayidx167, align 1
|
||||
%conv168 = zext i8 %l3 to i16
|
||||
%add170 = add i16 40, %conv168
|
||||
store i16 %add152, i16* %arrayidx182, align 2
|
||||
store i16 %add158, i16* %arrayidx183, align 2
|
||||
store i16 %add164, i16* %arrayidx184, align 2
|
||||
store i16 %add170, i16* %arrayidx185, align 2
|
||||
ret void
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue