AVX512: Implemented encoding and intrinsics for vcmpss/sd.
Added tests for intrinsics and encoding. Differential Revision: http://reviews.llvm.org/D12593 llvm-svn: 248121
This commit is contained in:
parent
06c57b594c
commit
b7e1f9d680
|
|
@ -6973,6 +6973,14 @@ let TargetPrefix = "x86" in {
|
||||||
GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
|
GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
|
||||||
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||||
llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_mask_cmp_ss :
|
||||||
|
GCCBuiltin<"__builtin_ia32_cmpss_mask">,
|
||||||
|
Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||||
|
llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_x86_avx512_mask_cmp_sd :
|
||||||
|
GCCBuiltin<"__builtin_ia32_cmpsd_mask">,
|
||||||
|
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||||
|
llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_avx512_movntdqa :
|
def int_x86_avx512_movntdqa :
|
||||||
GCCBuiltin<"__builtin_ia32_movntdqa512">,
|
GCCBuiltin<"__builtin_ia32_movntdqa512">,
|
||||||
|
|
|
||||||
|
|
@ -15753,17 +15753,20 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
|
||||||
SDValue PreservedSrc,
|
SDValue PreservedSrc,
|
||||||
const X86Subtarget *Subtarget,
|
const X86Subtarget *Subtarget,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
if (isAllOnes(Mask))
|
if (isAllOnes(Mask))
|
||||||
return Op;
|
return Op;
|
||||||
|
|
||||||
EVT VT = Op.getValueType();
|
EVT VT = Op.getValueType();
|
||||||
SDLoc dl(Op);
|
SDLoc dl(Op);
|
||||||
// The mask should be of type MVT::i1
|
// The mask should be of type MVT::i1
|
||||||
SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
|
SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
|
||||||
|
|
||||||
if (PreservedSrc.getOpcode() == ISD::UNDEF)
|
if (Op.getOpcode() == X86ISD::FSETCC)
|
||||||
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
|
return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
|
||||||
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
|
|
||||||
|
if (PreservedSrc.getOpcode() == ISD::UNDEF)
|
||||||
|
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
|
||||||
|
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int getSEHRegistrationNodeSize(const Function *Fn) {
|
static int getSEHRegistrationNodeSize(const Function *Fn) {
|
||||||
|
|
@ -16124,6 +16127,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||||
DAG.getIntPtrConstant(0, dl));
|
DAG.getIntPtrConstant(0, dl));
|
||||||
return DAG.getBitcast(Op.getValueType(), Res);
|
return DAG.getBitcast(Op.getValueType(), Res);
|
||||||
}
|
}
|
||||||
|
case CMP_MASK_SCALAR_CC: {
|
||||||
|
SDValue Src1 = Op.getOperand(1);
|
||||||
|
SDValue Src2 = Op.getOperand(2);
|
||||||
|
SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
|
||||||
|
SDValue Mask = Op.getOperand(4);
|
||||||
|
|
||||||
|
SDValue Cmp;
|
||||||
|
if (IntrData->Opc1 != 0) {
|
||||||
|
SDValue Rnd = Op.getOperand(5);
|
||||||
|
if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
|
||||||
|
X86::STATIC_ROUNDING::CUR_DIRECTION)
|
||||||
|
Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd);
|
||||||
|
}
|
||||||
|
//default rounding mode
|
||||||
|
if(!Cmp.getNode())
|
||||||
|
Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC);
|
||||||
|
|
||||||
|
SDValue CmpMask = getScalarMaskingNode(Cmp, Mask,
|
||||||
|
DAG.getTargetConstant(0, dl,
|
||||||
|
MVT::i1),
|
||||||
|
Subtarget, DAG);
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i8,
|
||||||
|
DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, CmpMask),
|
||||||
|
DAG.getValueType(MVT::i1));
|
||||||
|
}
|
||||||
case COMI: { // Comparison intrinsics
|
case COMI: { // Comparison intrinsics
|
||||||
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
|
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
|
||||||
SDValue LHS = Op.getOperand(1);
|
SDValue LHS = Op.getOperand(1);
|
||||||
|
|
|
||||||
|
|
@ -1363,41 +1363,85 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
|
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
|
||||||
multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
|
|
||||||
SDNode OpNode, ValueType VT,
|
multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
|
||||||
PatFrag ld_frag, string Suffix> {
|
|
||||||
def rr : AVX512Ii8<0xC2, MRMSrcReg,
|
defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
|
||||||
(outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
|
(outs _.KRC:$dst),
|
||||||
!strconcat("vcmp${cc}", Suffix,
|
(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"vcmp${cc}"#_.Suffix,
|
||||||
[(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
|
"$src2, $src1", "$src1, $src2",
|
||||||
IIC_SSE_ALU_F32S_RR>, EVEX_4V;
|
(OpNode (_.VT _.RC:$src1),
|
||||||
def rm : AVX512Ii8<0xC2, MRMSrcMem,
|
(_.VT _.RC:$src2),
|
||||||
(outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
|
imm:$cc)>, EVEX_4V;
|
||||||
!strconcat("vcmp${cc}", Suffix,
|
let mayLoad = 1 in
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
|
||||||
[(set VK1:$dst, (OpNode (VT RC:$src1),
|
(outs _.KRC:$dst),
|
||||||
(ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
|
(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
|
||||||
|
"vcmp${cc}"#_.Suffix,
|
||||||
|
"$src2, $src1", "$src1, $src2",
|
||||||
|
(OpNode (_.VT _.RC:$src1),
|
||||||
|
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||||
|
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
|
||||||
|
|
||||||
|
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
|
||||||
|
(outs _.KRC:$dst),
|
||||||
|
(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
|
||||||
|
"vcmp${cc}"#_.Suffix,
|
||||||
|
"{sae}, $src2, $src1", "$src1, $src2,{sae}",
|
||||||
|
(OpNodeRnd (_.VT _.RC:$src1),
|
||||||
|
(_.VT _.RC:$src2),
|
||||||
|
imm:$cc,
|
||||||
|
(i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
|
||||||
|
// Accept explicit immediate argument form instead of comparison code.
|
||||||
let isAsmParserOnly = 1, hasSideEffects = 0 in {
|
let isAsmParserOnly = 1, hasSideEffects = 0 in {
|
||||||
def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
|
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
|
||||||
(outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
|
(outs VK1:$dst),
|
||||||
!strconcat("vcmp", Suffix,
|
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
|
||||||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
|
"vcmp"#_.Suffix,
|
||||||
[], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
|
"$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
|
||||||
|
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
|
||||||
|
(outs _.KRC:$dst),
|
||||||
|
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
|
||||||
|
"vcmp"#_.Suffix,
|
||||||
|
"$cc, $src2, $src1", "$src1, $src2, $cc">,
|
||||||
|
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
|
||||||
|
|
||||||
|
defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
|
||||||
|
(outs _.KRC:$dst),
|
||||||
|
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
|
||||||
|
"vcmp"#_.Suffix,
|
||||||
|
"$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
|
||||||
|
EVEX_4V, EVEX_B;
|
||||||
|
}// let isAsmParserOnly = 1, hasSideEffects = 0
|
||||||
|
|
||||||
|
let isCodeGenOnly = 1 in {
|
||||||
|
def rr : AVX512Ii8<0xC2, MRMSrcReg,
|
||||||
|
(outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
|
||||||
|
!strconcat("vcmp${cc}", _.Suffix,
|
||||||
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
|
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
|
||||||
|
_.FRC:$src2,
|
||||||
|
imm:$cc))],
|
||||||
|
IIC_SSE_ALU_F32S_RR>, EVEX_4V;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
|
def rm : AVX512Ii8<0xC2, MRMSrcMem,
|
||||||
(outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
|
(outs _.KRC:$dst),
|
||||||
!strconcat("vcmp", Suffix,
|
(ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
|
||||||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
|
!strconcat("vcmp${cc}", _.Suffix,
|
||||||
[], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
|
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
|
||||||
|
(_.ScalarLdFrag addr:$src2),
|
||||||
|
imm:$cc))],
|
||||||
|
IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX512] in {
|
let Predicates = [HasAVX512] in {
|
||||||
defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, X86cmpms, f32, loadf32, "ss">,
|
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
|
||||||
XS;
|
AVX512XSIi8Base;
|
||||||
defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, X86cmpms, f64, loadf64, "sd">,
|
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
|
||||||
XD, VEX_W;
|
AVX512XDIi8Base, VEX_W;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||||
|
|
@ -2023,10 +2067,15 @@ let Predicates = [HasAVX512] in {
|
||||||
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
|
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
|
||||||
def : Pat<(i32 (anyext VK1:$src)),
|
def : Pat<(i32 (anyext VK1:$src)),
|
||||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
|
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
|
||||||
|
|
||||||
def : Pat<(i8 (zext VK1:$src)),
|
def : Pat<(i8 (zext VK1:$src)),
|
||||||
(EXTRACT_SUBREG
|
(EXTRACT_SUBREG
|
||||||
(AND32ri (KMOVWrk
|
(AND32ri (KMOVWrk
|
||||||
(COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
|
(COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
|
||||||
|
def : Pat<(i8 (anyext VK1:$src)),
|
||||||
|
(EXTRACT_SUBREG
|
||||||
|
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
|
||||||
|
|
||||||
def : Pat<(i64 (zext VK1:$src)),
|
def : Pat<(i64 (zext VK1:$src)),
|
||||||
(AND64ri8 (SUBREG_TO_REG (i64 0),
|
(AND64ri8 (SUBREG_TO_REG (i64 0),
|
||||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
|
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
|
||||||
|
|
|
||||||
|
|
@ -189,10 +189,15 @@ def X86CmpMaskCCRound :
|
||||||
def X86CmpMaskCCScalar :
|
def X86CmpMaskCCScalar :
|
||||||
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||||
|
|
||||||
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
|
def X86CmpMaskCCScalarRound :
|
||||||
def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
|
SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>,
|
||||||
def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
|
SDTCisInt<4>]>;
|
||||||
def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
|
|
||||||
|
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
|
||||||
|
def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
|
||||||
|
def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
|
||||||
|
def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
|
||||||
|
def X86cmpmsRnd : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalarRound>;
|
||||||
|
|
||||||
def X86vshl : SDNode<"X86ISD::VSHL",
|
def X86vshl : SDNode<"X86ISD::VSHL",
|
||||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ enum IntrinsicType {
|
||||||
INTR_NO_TYPE,
|
INTR_NO_TYPE,
|
||||||
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS,
|
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS,
|
||||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
|
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
|
||||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||||
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
|
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
|
||||||
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
||||||
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
|
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
|
||||||
|
|
@ -436,6 +436,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||||
X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
|
X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||||
|
X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
|
||||||
|
X86ISD::FSETCC),
|
||||||
|
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
|
||||||
|
X86ISD::FSETCC),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||||
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
|
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
|
||||||
|
|
|
||||||
|
|
@ -4065,6 +4065,96 @@ define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x dou
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
|
||||||
|
|
||||||
|
define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
|
||||||
|
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: andl $1, %edi
|
||||||
|
; CHECK-NEXT: kmovw %edi, %k1
|
||||||
|
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
|
||||||
|
; CHECK-NEXT: kmovw %k0, %eax
|
||||||
|
; CHECK-NEXT: shlb $7, %al
|
||||||
|
; CHECK-NEXT: sarb $7, %al
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
|
||||||
|
%res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
|
||||||
|
ret i8 %res4
|
||||||
|
}
|
||||||
|
|
||||||
|
define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
|
||||||
|
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
|
||||||
|
; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
|
||||||
|
; CHECK-NEXT: korw %k0, %k1, %k0
|
||||||
|
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
|
||||||
|
; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
|
||||||
|
; CHECK-NEXT: korw %k1, %k2, %k1
|
||||||
|
; CHECK-NEXT: andl $1, %edi
|
||||||
|
; CHECK-NEXT: kmovw %edi, %k2
|
||||||
|
; CHECK-NEXT: kandw %k2, %k1, %k1
|
||||||
|
; CHECK-NEXT: korw %k1, %k0, %k0
|
||||||
|
; CHECK-NEXT: kmovw %k0, %eax
|
||||||
|
; CHECK-NEXT: shlb $7, %al
|
||||||
|
; CHECK-NEXT: sarb $7, %al
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
|
||||||
|
%res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
|
||||||
|
%res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
|
||||||
|
%res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
|
||||||
|
%res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
|
||||||
|
|
||||||
|
%res11 = or i8 %res1, %res2
|
||||||
|
%res12 = or i8 %res3, %res4
|
||||||
|
%res13 = or i8 %res11, %res12
|
||||||
|
ret i8 %res13
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
|
||||||
|
|
||||||
|
define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
|
||||||
|
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: andl $1, %edi
|
||||||
|
; CHECK-NEXT: kmovw %edi, %k1
|
||||||
|
; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
|
||||||
|
; CHECK-NEXT: kmovw %k0, %eax
|
||||||
|
; CHECK-NEXT: shlb $7, %al
|
||||||
|
; CHECK-NEXT: sarb $7, %al
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
|
||||||
|
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
|
||||||
|
ret i8 %res2
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
|
||||||
|
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
|
||||||
|
; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
|
||||||
|
; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
|
||||||
|
; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
|
||||||
|
; CHECK-NEXT: andl $1, %edi
|
||||||
|
; CHECK-NEXT: kmovw %edi, %k2
|
||||||
|
; CHECK-NEXT: kandw %k2, %k1, %k1
|
||||||
|
; CHECK-NEXT: kandw %k1, %k0, %k0
|
||||||
|
; CHECK-NEXT: kmovw %k0, %eax
|
||||||
|
; CHECK-NEXT: shlb $7, %al
|
||||||
|
; CHECK-NEXT: sarb $7, %al
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
|
||||||
|
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
|
||||||
|
%res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
|
||||||
|
%res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
|
||||||
|
|
||||||
|
%res11 = and i8 %res1, %res2
|
||||||
|
%res12 = and i8 %res3, %res4
|
||||||
|
%res13 = and i8 %res11, %res12
|
||||||
|
ret i8 %res13
|
||||||
|
}
|
||||||
|
|
||||||
declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
|
declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
|
||||||
|
|
||||||
define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
|
define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
|
||||||
|
|
|
||||||
|
|
@ -14958,6 +14958,94 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
|
||||||
// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff]
|
// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff]
|
||||||
vgetexpsd -1032(%rdx), %xmm7, %xmm2
|
vgetexpsd -1032(%rdx), %xmm7, %xmm2
|
||||||
|
|
||||||
|
// CHECK: vcmpss $171, %xmm12, %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0xab]
|
||||||
|
vcmpss $0xab, %xmm12, %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $171, %xmm12, %xmm15, %k4 {%k5}
|
||||||
|
// CHECK: encoding: [0x62,0xd1,0x06,0x0d,0xc2,0xe4,0xab]
|
||||||
|
vcmpss $0xab, %xmm12, %xmm15, %k4 {%k5}
|
||||||
|
|
||||||
|
// CHECK: vcmpss $171,{sae}, %xmm12, %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0xab]
|
||||||
|
vcmpss $0xab,{sae}, %xmm12, %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123, %xmm12, %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0x7b]
|
||||||
|
vcmpss $0x7b, %xmm12, %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123,{sae}, %xmm12, %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0x7b]
|
||||||
|
vcmpss $0x7b,{sae}, %xmm12, %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123, (%rcx), %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x21,0x7b]
|
||||||
|
vcmpss $0x7b, (%rcx), %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123, 291(%rax,%r14,8), %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xb1,0x06,0x08,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||||
|
vcmpss $0x7b, 291(%rax,%r14,8), %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123, 508(%rdx), %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x7f,0x7b]
|
||||||
|
vcmpss $0x7b, 508(%rdx), %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123, 512(%rdx), %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
|
||||||
|
vcmpss $0x7b, 512(%rdx), %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123, -512(%rdx), %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x80,0x7b]
|
||||||
|
vcmpss $0x7b, -512(%rdx), %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpss $123, -516(%rdx), %xmm15, %k4
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
|
||||||
|
vcmpss $0x7b, -516(%rdx), %xmm15, %k4
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0xab]
|
||||||
|
vcmpsd $0xab, %xmm4, %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5 {%k1}
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x01,0xc2,0xec,0xab]
|
||||||
|
vcmpsd $0xab, %xmm4, %xmm19, %k5 {%k1}
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $171,{sae}, %xmm4, %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0xab]
|
||||||
|
vcmpsd $0xab,{sae}, %xmm4, %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123, %xmm4, %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0x7b]
|
||||||
|
vcmpsd $0x7b, %xmm4, %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123,{sae}, %xmm4, %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0x7b]
|
||||||
|
vcmpsd $0x7b,{sae}, %xmm4, %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123, (%rcx), %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x29,0x7b]
|
||||||
|
vcmpsd $0x7b, (%rcx), %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123, 291(%rax,%r14,8), %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xb1,0xe7,0x00,0xc2,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||||
|
vcmpsd $0x7b, 291(%rax,%r14,8), %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123, 1016(%rdx), %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x7f,0x7b]
|
||||||
|
vcmpsd $0x7b, 1016(%rdx), %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123, 1024(%rdx), %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0x00,0x04,0x00,0x00,0x7b]
|
||||||
|
vcmpsd $0x7b, 1024(%rdx), %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123, -1024(%rdx), %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x80,0x7b]
|
||||||
|
vcmpsd $0x7b, -1024(%rdx), %xmm19, %k5
|
||||||
|
|
||||||
|
// CHECK: vcmpsd $123, -1032(%rdx), %xmm19, %k5
|
||||||
|
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
|
||||||
|
vcmpsd $0x7b, -1032(%rdx), %xmm19, %k5
|
||||||
|
|
||||||
// CHECK: vsqrtss %xmm8, %xmm19, %xmm22
|
// CHECK: vsqrtss %xmm8, %xmm19, %xmm22
|
||||||
// CHECK: encoding: [0x62,0xc1,0x66,0x00,0x51,0xf0]
|
// CHECK: encoding: [0x62,0xc1,0x66,0x00,0x51,0xf0]
|
||||||
vsqrtss %xmm8, %xmm19, %xmm22
|
vsqrtss %xmm8, %xmm19, %xmm22
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue