[AArch64][SVE][InstCombine] Combine contiguous gather/scatter to load/store
Contiguous gather => masked load: (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1)) => (masked.load (gep BasePtr IndexBase) Align Mask undef) Contiguous scatter => masked store: (sve.ld1.scatter.index Value Mask BasePtr (sve.index IndexBase 1)) => (masked.store Value (gep BasePtr IndexBase) Align Mask) Tests with <vscale x 2 x double>: [Gather, Scatter] for each [Positive test (index=1), Negative test (index=2), Alignment propagation]. Differential Revision: https://reviews.llvm.org/D112076
This commit is contained in:
parent
d36dd1f842
commit
1febf42f03
|
@ -864,6 +864,77 @@ static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
|
||||||
|
IntrinsicInst &II) {
|
||||||
|
Value *Mask = II.getOperand(0);
|
||||||
|
Value *BasePtr = II.getOperand(1);
|
||||||
|
Value *Index = II.getOperand(2);
|
||||||
|
Type *Ty = II.getType();
|
||||||
|
Type *BasePtrTy = BasePtr->getType();
|
||||||
|
Value *PassThru = ConstantAggregateZero::get(Ty);
|
||||||
|
|
||||||
|
// Contiguous gather => masked load.
|
||||||
|
// (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1))
|
||||||
|
// => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer)
|
||||||
|
Value *IndexBase;
|
||||||
|
if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
|
||||||
|
m_Value(IndexBase), m_SpecificInt(1)))) {
|
||||||
|
IRBuilder<> Builder(II.getContext());
|
||||||
|
Builder.SetInsertPoint(&II);
|
||||||
|
|
||||||
|
Align Alignment =
|
||||||
|
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
|
||||||
|
|
||||||
|
Type *VecPtrTy = PointerType::getUnqual(Ty);
|
||||||
|
Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
|
||||||
|
IndexBase);
|
||||||
|
Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
|
||||||
|
CallInst *MaskedLoad =
|
||||||
|
Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
|
||||||
|
MaskedLoad->takeName(&II);
|
||||||
|
return IC.replaceInstUsesWith(II, MaskedLoad);
|
||||||
|
}
|
||||||
|
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
|
||||||
|
IntrinsicInst &II) {
|
||||||
|
Value *Val = II.getOperand(0);
|
||||||
|
Value *Mask = II.getOperand(1);
|
||||||
|
Value *BasePtr = II.getOperand(2);
|
||||||
|
Value *Index = II.getOperand(3);
|
||||||
|
Type *Ty = Val->getType();
|
||||||
|
Type *BasePtrTy = BasePtr->getType();
|
||||||
|
|
||||||
|
// Contiguous scatter => masked store.
|
||||||
|
// (sve.ld1.scatter.index Value Mask BasePtr (sve.index IndexBase 1))
|
||||||
|
// => (masked.store Value (gep BasePtr IndexBase) Align Mask)
|
||||||
|
Value *IndexBase;
|
||||||
|
if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
|
||||||
|
m_Value(IndexBase), m_SpecificInt(1)))) {
|
||||||
|
IRBuilder<> Builder(II.getContext());
|
||||||
|
Builder.SetInsertPoint(&II);
|
||||||
|
|
||||||
|
Align Alignment =
|
||||||
|
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
|
||||||
|
|
||||||
|
Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
|
||||||
|
IndexBase);
|
||||||
|
Type *VecPtrTy = PointerType::getUnqual(Ty);
|
||||||
|
Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
|
||||||
|
|
||||||
|
CallInst *MaskedStore =
|
||||||
|
Builder.CreateMaskedStore(Val, Ptr, Alignment, Mask);
|
||||||
|
MaskedStore->takeName(&II);
|
||||||
|
II.eraseFromParent();
|
||||||
|
|
||||||
|
return IC.replaceInstUsesWith(II, MaskedStore);
|
||||||
|
}
|
||||||
|
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
Optional<Instruction *>
|
Optional<Instruction *>
|
||||||
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||||
IntrinsicInst &II) const {
|
IntrinsicInst &II) const {
|
||||||
|
@ -915,6 +986,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||||
case Intrinsic::aarch64_sve_zip1:
|
case Intrinsic::aarch64_sve_zip1:
|
||||||
case Intrinsic::aarch64_sve_zip2:
|
case Intrinsic::aarch64_sve_zip2:
|
||||||
return instCombineSVEZip(IC, II);
|
return instCombineSVEZip(IC, II);
|
||||||
|
case Intrinsic::aarch64_sve_ld1_gather_index:
|
||||||
|
return instCombineLD1GatherIndex(IC, II);
|
||||||
|
case Intrinsic::aarch64_sve_st1_scatter_index:
|
||||||
|
return instCombineST1ScatterIndex(IC, II);
|
||||||
}
|
}
|
||||||
|
|
||||||
return None;
|
return None;
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt -S -instcombine -dce < %s | FileCheck %s
|
||||||
|
|
||||||
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; Gathers.
|
||||||
|
;;
|
||||||
|
|
||||||
|
define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride1(<vscale x 2 x i1> %pred, double* %x, i64 %base) #0 {
|
||||||
|
; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride1(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
|
||||||
|
; CHECK-NEXT: [[LD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP2]], i32 1, <vscale x 2 x i1> [[PRED:%.*]], <vscale x 2 x double> zeroinitializer)
|
||||||
|
; CHECK-NEXT: ret <vscale x 2 x double> [[LD]]
|
||||||
|
;
|
||||||
|
%idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
|
||||||
|
%ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
|
||||||
|
ret <vscale x 2 x double> %ld
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride2_negtest(<vscale x 2 x i1> %pred, double* %x, i64 %base) #0 {
|
||||||
|
; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride2_negtest(
|
||||||
|
; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 2)
|
||||||
|
; CHECK-NEXT: [[LD:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[PRED:%.*]], double* [[X:%.*]], <vscale x 2 x i64> [[IDX]])
|
||||||
|
; CHECK-NEXT: ret <vscale x 2 x double> [[LD]]
|
||||||
|
;
|
||||||
|
%idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 2)
|
||||||
|
%ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
|
||||||
|
ret <vscale x 2 x double> %ld
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride1_align8(<vscale x 2 x i1> %pred, double* align 8 %x, i64 %base) #0 {
|
||||||
|
; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride1_align8(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
|
||||||
|
; CHECK-NEXT: [[LD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP2]], i32 8, <vscale x 2 x i1> [[PRED:%.*]], <vscale x 2 x double> zeroinitializer)
|
||||||
|
; CHECK-NEXT: ret <vscale x 2 x double> [[LD]]
|
||||||
|
;
|
||||||
|
%idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
|
||||||
|
%ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
|
||||||
|
ret <vscale x 2 x double> %ld
|
||||||
|
}
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; Scatters.
|
||||||
|
;;
|
||||||
|
|
||||||
|
define void @test_st1_scatter_index_nxv2f64_stride1(<vscale x 2 x i1> %pred, double* %x, i64 %base, <vscale x 2 x double> %val) #0 {
|
||||||
|
; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride1(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
|
||||||
|
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[VAL:%.*]], <vscale x 2 x double>* [[TMP2]], i32 1, <vscale x 2 x i1> [[PRED:%.*]])
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
|
||||||
|
tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test_st1_scatter_index_nxv2f64_stride2_negtest(<vscale x 2 x i1> %pred, double* %x, i64 %base, <vscale x 2 x double> %val) #0 {
|
||||||
|
; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride2_negtest(
|
||||||
|
; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 2)
|
||||||
|
; CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[VAL:%.*]], <vscale x 2 x i1> [[PRED:%.*]], double* [[X:%.*]], <vscale x 2 x i64> [[IDX]])
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 2)
|
||||||
|
tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test_st1_scatter_index_nxv2f64_stride1_align8(<vscale x 2 x i1> %pred, double* align 8 %x, i64 %base, <vscale x 2 x double> %val) #0 {
|
||||||
|
; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride1_align8(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
|
||||||
|
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[VAL:%.*]], <vscale x 2 x double>* [[TMP2]], i32 8, <vscale x 2 x i1> [[PRED:%.*]])
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
|
||||||
|
tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64, i64)
|
||||||
|
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
||||||
|
declare void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+sve" }
|
Loading…
Reference in New Issue