forked from OSchip/llvm-project
[AArch64] Improve code generation of constant vectors
Use the whole gammut of constant immediates available to set up a vector. Instead of using, for example, `mov w0, #0xffff; dup v0.4s, w0`, which transfers between register files, use the more efficient `movi v0.4s, #-1` instead. Not limited to just a few values, but any immediate value that can be encoded by all the variants of `FMOV`, `MOVI`, `MVNI`, thus eliminating the need to there be patterns to optimize special cases. Differential revision: https://reviews.llvm.org/D42133 llvm-svn: 326718
This commit is contained in:
parent
3402e876ef
commit
cd855f70c5
|
|
@ -6339,8 +6339,8 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
if (LHS)
|
||||
Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
|
||||
DAG.getConstant(Value, dl, MVT::i32),
|
||||
DAG.getConstant(Shift, dl, MVT::i32));
|
||||
DAG.getConstant(Value, dl, MVT::i32),
|
||||
DAG.getConstant(Shift, dl, MVT::i32));
|
||||
else
|
||||
Mov = DAG.getNode(NewOp, dl, MovTy,
|
||||
DAG.getConstant(Value, dl, MVT::i32),
|
||||
|
|
@ -6583,9 +6583,9 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
|
|||
return Res;
|
||||
}
|
||||
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
BuildVectorSDNode *BVN =
|
||||
dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
|
||||
if (!BVN) {
|
||||
|
|
@ -6642,25 +6642,13 @@ static SDValue NormalizeBuildVector(SDValue Op,
|
|||
return DAG.getBuildVector(VT, dl, Ops);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
Op = NormalizeBuildVector(Op, DAG);
|
||||
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
|
||||
APInt DefBits(VT.getSizeInBits(), 0);
|
||||
APInt UndefBits(VT.getSizeInBits(), 0);
|
||||
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
|
||||
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
|
||||
// Certain magic vector constants (used to express things like NOT
|
||||
// and NEG) are passed through unmodified. This allows codegen patterns
|
||||
// for these operations to match. Special-purpose patterns will lower
|
||||
// these immediates to MOVI if it proves necessary.
|
||||
uint64_t DefVal = DefBits.zextOrTrunc(64).getZExtValue();
|
||||
if (DefBits.getHiBits(64) == DefBits.getLoBits(64) &&
|
||||
VT.isInteger() && (DefVal == 0 || DefVal == UINT64_MAX))
|
||||
return Op;
|
||||
|
||||
SDValue NewOp;
|
||||
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
|
||||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
|
||||
|
|
@ -6692,6 +6680,34 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
return NewOp;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
// Try to build a simple constant vector.
|
||||
Op = NormalizeBuildVector(Op, DAG);
|
||||
if (VT.isInteger()) {
|
||||
// Certain vector constants, used to express things like logical NOT and
|
||||
// arithmetic NEG, are passed through unmodified. This allows special
|
||||
// patterns for these operations to match, which will lower these constants
|
||||
// to whatever is proven necessary.
|
||||
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
|
||||
if (BVN->isConstant())
|
||||
if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
|
||||
unsigned BitSize = VT.getVectorElementType().getSizeInBits();
|
||||
APInt Val(BitSize,
|
||||
Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
|
||||
if (Val.isNullValue() || Val.isAllOnesValue())
|
||||
return Op;
|
||||
}
|
||||
}
|
||||
|
||||
if (SDValue V = ConstantBuildVector(Op, DAG))
|
||||
return V;
|
||||
|
||||
// Scan through the operands to find some interesting properties we can
|
||||
// exploit:
|
||||
// 1) If only one value is used, we can use a DUP, or
|
||||
|
|
@ -6704,6 +6720,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
// lanes such that we can directly materialize the vector
|
||||
// some other way (MOVI, e.g.), we can be sneaky.
|
||||
// 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
|
||||
SDLoc dl(Op);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
bool isOnlyLowElement = true;
|
||||
bool usesOnlyOneValue = true;
|
||||
|
|
@ -6855,16 +6872,23 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
// is better than the default, which will perform a separate initialization
|
||||
// for each lane.
|
||||
if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
|
||||
SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
|
||||
// Firstly, try to materialize the splat constant.
|
||||
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
|
||||
Val = ConstantBuildVector(Vec, DAG);
|
||||
if (!Val) {
|
||||
// Otherwise, materialize the constant and splat it.
|
||||
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
|
||||
DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
|
||||
}
|
||||
|
||||
// Now insert the non-constant lanes.
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue V = Op.getOperand(i);
|
||||
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
|
||||
if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
|
||||
if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V))
|
||||
// Note that type legalization likely mucked about with the VT of the
|
||||
// source operand, so we may have to convert it here before inserting.
|
||||
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
|
||||
}
|
||||
}
|
||||
return Val;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4644,21 +4644,6 @@ def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
|
|||
[(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
|
||||
}
|
||||
|
||||
// Use the more efficient MOVI instead of DUP from ZR to zero up vectors
|
||||
def : Pat<(v2f32 (AArch64dup (f32 fpimm0))), (MOVIv2i32 (i32 0), (i32 0))>;
|
||||
|
||||
def : Pat<(v2i32 (AArch64dup (i32 0))), (MOVIv2i32 (i32 0), (i32 0))>;
|
||||
def : Pat<(v4i16 (AArch64dup (i32 0))), (MOVIv4i16 (i32 0), (i32 0))>;
|
||||
def : Pat<(v8i8 (AArch64dup (i32 0))), (MOVIv8b_ns (i32 0))>;
|
||||
|
||||
def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
|
||||
def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv4i32 (i32 0), (i32 0))>;
|
||||
|
||||
def : Pat<(v2i64 (AArch64dup (i64 0))), (MOVIv2d_ns (i32 0))>;
|
||||
def : Pat<(v4i32 (AArch64dup (i32 0))), (MOVIv4i32 (i32 0), (i32 0))>;
|
||||
def : Pat<(v8i16 (AArch64dup (i32 0))), (MOVIv8i16 (i32 0), (i32 0))>;
|
||||
def : Pat<(v16i8 (AArch64dup (i32 0))), (MOVIv16b_ns (i32 0))>;
|
||||
|
||||
// AdvSIMD MVNI
|
||||
|
||||
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ entry:
|
|||
ret void
|
||||
|
||||
; CHECK-LABEL: test0f
|
||||
; CHECK: movi.4s v[[TEMP:[0-9]+]], #0
|
||||
; CHECK: movi.2d v[[TEMP:[0-9]+]], #0
|
||||
; CHECK: mov.s v[[TEMP]][0], v{{[0-9]+}}[0]
|
||||
; CHECK: str q[[TEMP]], [x0]
|
||||
; CHECK: ret
|
||||
|
|
@ -24,9 +24,8 @@ entry:
|
|||
ret void
|
||||
|
||||
; CHECK-LABEL: test1f
|
||||
; CHECK: fmov s[[TEMP:[0-9]+]], #1.0000000
|
||||
; CHECK: dup.4s v[[TEMP2:[0-9]+]], v[[TEMP]][0]
|
||||
; CHECK: mov.s v[[TEMP2]][0], v0[0]
|
||||
; CHECK: str q[[TEMP2]], [x0]
|
||||
; CHECK: fmov.4s v[[TEMP:[0-9]+]], #1.0
|
||||
; CHECK: mov.s v[[TEMP]][0], v0[0]
|
||||
; CHECK: str q[[TEMP]], [x0]
|
||||
; CHECK: ret
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,174 +1,272 @@
|
|||
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
|
||||
|
||||
; Check that building up a vector w/ only one non-zero lane initializes
|
||||
; intelligently.
|
||||
; efficiently.
|
||||
|
||||
define <8 x i8> @v8i8(i8 %t, i8 %s) nounwind {
|
||||
define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind {
|
||||
%v = insertelement <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 7
|
||||
ret <8 x i8> %v
|
||||
|
||||
; CHECK-LABEL: v8i8:
|
||||
; CHECK: movi v[[R:[0-9]+]].8b, #0
|
||||
; CHECK-LABEL: v8i8z
|
||||
; CHECK: movi d[[R:[0-9]+]], #0
|
||||
; CHECK: mov v[[R]].b[7], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <16 x i8> @v16i8(i8 %t, i8 %s) nounwind {
|
||||
define <16 x i8> @v16i8z(i8 %t, i8 %s) nounwind {
|
||||
%v = insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 15
|
||||
ret <16 x i8> %v
|
||||
|
||||
; CHECK-LABEL: v16i8:
|
||||
; CHECK: movi v[[R:[0-9]+]].16b, #0
|
||||
; CHECK-LABEL: v16i8z:
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0
|
||||
; CHECK: mov v[[R]].b[15], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <4 x i16> @v4i16(i16 %t, i16 %s) nounwind {
|
||||
define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind {
|
||||
%v = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 3
|
||||
ret <4 x i16> %v
|
||||
|
||||
; CHECK-LABEL: v4i16:
|
||||
; CHECK: movi v[[R:[0-9]+]].4h, #0
|
||||
; CHECK-LABEL: v4i16z:
|
||||
; CHECK: movi d[[R:[0-9]+]], #0
|
||||
; CHECK: mov v[[R]].h[3], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <8 x i16> @v8i16(i16 %t, i16 %s) nounwind {
|
||||
define <8 x i16> @v8i16z(i16 %t, i16 %s) nounwind {
|
||||
%v = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 7
|
||||
ret <8 x i16> %v
|
||||
|
||||
; CHECK-LABEL: v8i16:
|
||||
; CHECK: movi v[[R:[0-9]+]].8h, #0
|
||||
; CHECK-LABEL: v8i16z:
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0
|
||||
; CHECK: mov v[[R]].h[7], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <2 x i32> @v2i32(i32 %t, i32 %s) nounwind {
|
||||
define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind {
|
||||
%v = insertelement <2 x i32> <i32 0, i32 undef>, i32 %s, i32 1
|
||||
ret <2 x i32> %v
|
||||
|
||||
; CHECK-LABEL: v2i32:
|
||||
; CHECK: movi v[[R:[0-9]+]].2s, #0
|
||||
; CHECK-LABEL: v2i32z:
|
||||
; CHECK: movi d[[R:[0-9]+]], #0
|
||||
; CHECK: mov v[[R]].s[1], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <4 x i32> @v4i32(i32 %t, i32 %s) nounwind {
|
||||
define <4 x i32> @v4i32z(i32 %t, i32 %s) nounwind {
|
||||
%v = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, i32 %s, i32 3
|
||||
ret <4 x i32> %v
|
||||
|
||||
; CHECK-LABEL: v4i32:
|
||||
; CHECK: movi v[[R:[0-9]+]].4s, #0
|
||||
; CHECK-LABEL: v4i32z:
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0
|
||||
; CHECK: mov v[[R]].s[3], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <2 x i64> @v2i64(i64 %t, i64 %s) nounwind {
|
||||
define <2 x i64> @v2i64z(i64 %t, i64 %s) nounwind {
|
||||
%v = insertelement <2 x i64> <i64 0, i64 undef>, i64 %s, i32 1
|
||||
ret <2 x i64> %v
|
||||
|
||||
; CHECK-LABEL: v2i64:
|
||||
; CHECK-LABEL: v2i64z:
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0
|
||||
; CHECK: mov v[[R]].d[1], x{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <2 x float> @v2f32(float %t, float %s) nounwind {
|
||||
define <2 x float> @v2f32z(float %t, float %s) nounwind {
|
||||
%v = insertelement <2 x float> <float 0.0, float undef>, float %s, i32 1
|
||||
ret <2 x float> %v
|
||||
|
||||
; CHECK-LABEL: v2f32:
|
||||
; CHECK: movi v[[R:[0-9]+]].2s, #0
|
||||
; CHECK-LABEL: v2f32z:
|
||||
; CHECK: movi d[[R:[0-9]+]], #0
|
||||
; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0]
|
||||
}
|
||||
|
||||
define <4 x float> @v4f32(float %t, float %s) nounwind {
|
||||
define <4 x float> @v4f32z(float %t, float %s) nounwind {
|
||||
%v = insertelement <4 x float> <float 0.0, float 0.0, float 0.0, float undef>, float %s, i32 3
|
||||
ret <4 x float> %v
|
||||
|
||||
; CHECK-LABEL: v4f32:
|
||||
; CHECK: movi v[[R:[0-9]+]].4s, #0
|
||||
; CHECK-LABEL: v4f32z:
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0
|
||||
; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0]
|
||||
}
|
||||
|
||||
define <2 x double> @v2f64(double %t, double %s) nounwind {
|
||||
define <2 x double> @v2f64z(double %t, double %s) nounwind {
|
||||
%v = insertelement <2 x double> <double 0.0, double undef>, double %s, i32 1
|
||||
ret <2 x double> %v
|
||||
|
||||
; CHECK-LABEL: v2f64:
|
||||
; CHECK-LABEL: v2f64z:
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0
|
||||
; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0]
|
||||
}
|
||||
|
||||
define void @v8i8st(<8 x i8>* %p, <8 x i8> %s) nounwind {
|
||||
store <8 x i8> <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>, <8 x i8>* %p, align 8
|
||||
; Check that building up a vector w/ only one non-ones lane initializes
|
||||
; efficiently.
|
||||
|
||||
define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind {
|
||||
%v = insertelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 7
|
||||
ret <8 x i8> %v
|
||||
|
||||
; CHECK-LABEL: v8i8m
|
||||
; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff
|
||||
; CHECK: mov v[[R]].b[7], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <16 x i8> @v16i8m(i8 %t, i8 %s) nounwind {
|
||||
%v = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 15
|
||||
ret <16 x i8> %v
|
||||
|
||||
; CHECK-LABEL: v16i8m
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff
|
||||
; CHECK: mov v[[R]].b[15], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind {
|
||||
%v = insertelement <4 x i16> <i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 3
|
||||
ret <4 x i16> %v
|
||||
|
||||
; CHECK-LABEL: v4i16m
|
||||
; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff
|
||||
; CHECK: mov v[[R]].h[3], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <8 x i16> @v8i16m(i16 %t, i16 %s) nounwind {
|
||||
%v = insertelement <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 7
|
||||
ret <8 x i16> %v
|
||||
|
||||
; CHECK-LABEL: v8i16m
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff
|
||||
; CHECK: mov v[[R]].h[7], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind {
|
||||
%v = insertelement <2 x i32> <i32 -1, i32 undef>, i32 %s, i32 1
|
||||
ret <2 x i32> %v
|
||||
|
||||
; CHECK-LABEL: v2i32m
|
||||
; CHECK: movi d{{[0-9]+}}, #0xffffffffffffffff
|
||||
; CHECK: mov v[[R]].s[1], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <4 x i32> @v4i32m(i32 %t, i32 %s) nounwind {
|
||||
%v = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, i32 %s, i32 3
|
||||
ret <4 x i32> %v
|
||||
|
||||
; CHECK-LABEL: v4i32m
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff
|
||||
; CHECK: mov v[[R]].s[3], w{{[0-9]+}}
|
||||
}
|
||||
|
||||
define <2 x i64> @v2i64m(i64 %t, i64 %s) nounwind {
|
||||
%v = insertelement <2 x i64> <i64 -1, i64 undef>, i64 %s, i32 1
|
||||
ret <2 x i64> %v
|
||||
|
||||
; CHECK-LABEL: v2i64m
|
||||
; CHECK: movi v[[R:[0-9]+]].2d, #0xffffffffffffffff
|
||||
; CHECK: mov v[[R]].d[1], x{{[0-9]+}}
|
||||
}
|
||||
|
||||
; Check that building up a vector w/ some constants initializes efficiently.
|
||||
|
||||
define void @v8i8st(<8 x i8>* %p, i8 %s) nounwind {
|
||||
%v = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef>, i8 %s, i32 7
|
||||
store <8 x i8> %v, <8 x i8>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v8i8st:
|
||||
; CHECK: movi v[[R:[0-9]+]].8b, #64
|
||||
; CHECK: movi v[[R:[0-9]+]].8b, #1
|
||||
; CHECK: mov v[[R]].b[7], w{{[0-9]+}}
|
||||
; CHECK: str d[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v16i8st(<16 x i8>* %p, <16 x i8> %s) nounwind {
|
||||
store <16 x i8> <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>, <16 x i8>* %p, align 16
|
||||
define void @v16i8st(<16 x i8>* %p, i8 %s) nounwind {
|
||||
%v = insertelement <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 undef>, i8 %s, i32 15
|
||||
store <16 x i8> %v, <16 x i8>* %p, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v16i8st:
|
||||
; CHECK: movi v[[R:[0-9]+]].16b, #64
|
||||
; CHECK: movi v[[R:[0-9]+]].16b, #128
|
||||
; CHECK: mov v[[R]].b[15], w{{[0-9]+}}
|
||||
; CHECK: str q[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v4i16st(<4 x i16>* %p, <4 x i16> %s) nounwind {
|
||||
store <4 x i16> <i16 16384, i16 16384, i16 16384, i16 16384>, <4 x i16>* %p, align 8
|
||||
define void @v4i16st(<4 x i16>* %p, i16 %s) nounwind {
|
||||
%v = insertelement <4 x i16> <i16 21760, i16 21760, i16 21760, i16 undef>, i16 %s, i32 3
|
||||
store <4 x i16> %v, <4 x i16>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v4i16st:
|
||||
; CHECK: movi v[[R:[0-9]+]].4h, #64, lsl #8
|
||||
; CHECK: movi v[[R:[0-9]+]].4h, #85, lsl #8
|
||||
; CHECK: mov v[[R]].h[3], w{{[0-9]+}}
|
||||
; CHECK: str d[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v8i16st(<8 x i16>* %p, <8 x i16> %s) nounwind {
|
||||
store <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>, <8 x i16>* %p, align 16
|
||||
define void @v8i16st(<8 x i16>* %p, i16 %s) nounwind {
|
||||
%v = insertelement <8 x i16> <i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 undef>, i16 %s, i32 7
|
||||
store <8 x i16> %v, <8 x i16>* %p, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v8i16st:
|
||||
; CHECK: movi v[[R:[0-9]+]].8h, #64, lsl #8
|
||||
; CHECK: mvni v[[R:[0-9]+]].8h, #85, lsl #8
|
||||
; CHECK: mov v[[R]].h[7], w{{[0-9]+}}
|
||||
; CHECK: str q[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v2i32st(<2 x i32>* %p, <2 x i32> %s) nounwind {
|
||||
store <2 x i32> <i32 1073741824, i32 1073741824>, <2 x i32>* %p, align 8
|
||||
define void @v2i32st(<2 x i32>* %p, i32 %s) nounwind {
|
||||
%v = insertelement <2 x i32> <i32 983040, i32 undef>, i32 %s, i32 1
|
||||
store <2 x i32> %v, <2 x i32>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v2i32st:
|
||||
; CHECK: movi v[[R:[0-9]+]].2s, #64, lsl #24
|
||||
; CHECK: movi v[[R:[0-9]+]].2s, #15, lsl #16
|
||||
; CHECK: mov v[[R]].s[1], w{{[0-9]+}}
|
||||
; CHECK: str d[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v4i32st(<4 x i32>* %p, <4 x i32> %s) nounwind {
|
||||
store <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, <4 x i32>* %p, align 16
|
||||
define void @v4i32st(<4 x i32>* %p, i32 %s) nounwind {
|
||||
%v = insertelement <4 x i32> <i32 16318463, i32 16318463, i32 16318463, i32 undef>, i32 %s, i32 3
|
||||
store <4 x i32> %v, <4 x i32>* %p, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v4i32st:
|
||||
; CHECK: movi v[[R:[0-9]+]].4s, #64, lsl #24
|
||||
; CHECK: movi v[[R:[0-9]+]].4s, #248, msl #16
|
||||
; CHECK: mov v[[R]].s[3], w{{[0-9]+}}
|
||||
; CHECK: str q[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v2i64st(<2 x i64>* %p, <2 x i64> %s) nounwind {
|
||||
store <2 x i64> <i64 4611686018427387904, i64 4611686018427387904>, <2 x i64>* %p, align 16
|
||||
define void @v2i64st(<2 x i64>* %p, i64 %s) nounwind {
|
||||
%v = insertelement <2 x i64> <i64 13835058055282163712, i64 undef>, i64 %s, i32 1
|
||||
store <2 x i64> %v, <2 x i64>* %p, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v2i64st
|
||||
; CHECK: fmov v[[R:[0-9]+]].2d, #2.0
|
||||
; CHECK-LABEL: v2i64st:
|
||||
; CHECK: fmov v[[R:[0-9]+]].2d, #-2.0
|
||||
; CHECK: mov v[[R]].d[1], x{{[0-9]+}}
|
||||
; CHECK: str q[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v2f32st(<2 x float>* %p, <2 x float> %s) nounwind {
|
||||
store <2 x float> <float 2.0, float 2.0>, <2 x float>* %p, align 8
|
||||
define void @v2f32st(<2 x float>* %p, float %s) nounwind {
|
||||
%v = insertelement <2 x float> <float 2.0, float undef>, float %s, i32 1
|
||||
store <2 x float> %v, <2 x float>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v2f32st
|
||||
; CHECK-LABEL: v2f32st:
|
||||
; CHECK: movi v[[R:[0-9]+]].2s, #64, lsl #24
|
||||
; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0]
|
||||
; CHECK: str d[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v4f32st(<4 x float>* %p, <4 x float> %s) nounwind {
|
||||
store <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float>* %p, align 16
|
||||
define void @v4f32st(<4 x float>* %p, float %s) nounwind {
|
||||
%v = insertelement <4 x float> <float -2.0, float -2.0, float -2.0, float undef>, float %s, i32 3
|
||||
store <4 x float> %v, <4 x float>* %p, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v4f32st:
|
||||
; CHECK: movi v[[R:[0-9]+]].4s, #64, lsl #24
|
||||
; CHECK: movi v[[R:[0-9]+]].4s, #192, lsl #24
|
||||
; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0]
|
||||
; CHECK: str q[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
||||
define void @v2f64st(<2 x double>* %p, <2 x double> %s) nounwind {
|
||||
store <2 x double> <double 2.0, double 2.0>, <2 x double>* %p, align 16
|
||||
define void @v2f64st(<2 x double>* %p, double %s) nounwind {
|
||||
%v = insertelement <2 x double> <double 2.0, double undef>, double %s, i32 1
|
||||
store <2 x double> %v, <2 x double>* %p, align 16
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: v2f64st:
|
||||
; CHECK: fmov v[[R:[0-9]+]].2d, #2.0
|
||||
; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0]
|
||||
; CHECK: str q[[R]], [x{{[0-9]+}}]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -537,8 +537,8 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
|
|||
define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
|
||||
; CHECK-LABEL: bsl1xi64_const:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
%tmp1 = and <1 x i64> %a, < i64 -16 >
|
||||
%tmp2 = and <1 x i64> %b, < i64 15 >
|
||||
%tmp1 = and <1 x i64> %a, < i64 -256 >
|
||||
%tmp2 = and <1 x i64> %b, < i64 255 >
|
||||
%tmp3 = or <1 x i64> %tmp1, %tmp2
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue