[X86][AVX512DQ] add scalar fpclass

Differential Revision: http://reviews.llvm.org/D13769

llvm-svn: 250650
This commit is contained in:
Asaf Badouh 2015-10-18 11:04:38 +00:00
parent cbb9550537
commit 696e8e0bb7
7 changed files with 247 additions and 6 deletions

View File

@ -1705,6 +1705,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_fpclassps512_mask">,
Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_fpclass_sd :
GCCBuiltin<"__builtin_ia32_fpclasssd">,
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_fpclass_ss :
GCCBuiltin<"__builtin_ia32_fpclassss">,
Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem]>;
}
// Vector extract sign mask

View File

@ -16012,6 +16012,8 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
if (Op.getOpcode() == X86ISD::FSETCC)
return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
if (Op.getOpcode() == X86ISD::VFPCLASS)
return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
if (PreservedSrc.getOpcode() == ISD::UNDEF)
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
@ -16357,6 +16359,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(Op.getValueType(), Res);
}
case FPCLASSS: {
SDValue Src1 = Op.getOperand(1);
SDValue Imm = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm);
SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask);
}
case CMP_MASK:
case CMP_MASK_CC: {
// Comparison intrinsics with masks.

View File

@ -1803,6 +1803,42 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
// ----------------------------------------------------------------
// FPClass
//handle fpclass instruction mask = op(reg_scalar,imm)
// op(mem_scalar,imm)
multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2)))], NoItinerary>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
"\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
let mayLoad = 1, AddedComplexity = 20 in {
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
[(set _.KRC:$dst,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
"\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
}
}
}
//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
// fpclass(reg_vec, mem_vec, imm)
// fpclass(reg_vec, broadcast(eltVt), imm)
@ -1873,15 +1909,19 @@ multiclass avx512_vector_fpclass_all<string OpcodeStr,
}
multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
SDNode OpNode, Predicate prd>{
bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
OpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
OpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
}
defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, X86Vfpclass, HasDQI>,
AVX512AIi8Base,EVEX;
defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
//-----------------------------------------------------------------
// Mask register copy, including

View File

@ -376,6 +376,8 @@ def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImmRound>;
def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisInt<2>]>, []>;
def X86Vfpclasss : SDNode<"X86ISD::VFPCLASS", SDTypeProfile<1, 2, [SDTCisInt<0>,
SDTCisFP<1>, SDTCisInt<2>]>,[]>;
def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,

View File

@ -18,7 +18,7 @@ namespace llvm {
enum IntrinsicType {
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
@ -688,6 +688,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,

View File

@ -467,3 +467,37 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
%res2 = add i16 %res, %res1
ret i16 %res2
}
declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vfpclasssd
; CHECK: %k0 {%k1}
; CHECK: vfpclasssd
; CHECK: %k0
define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
%res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
%res2 = add i8 %res, %res1
ret i8 %res2
}
declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss
; CHECK-NOT: call
; CHECK: kmovw
; CHECK: vfpclassss
; CHECK: %k0
; CHECK: {%k1}
; CHECK: kmovw
; CHECK: vfpclassss
; CHECK: %k0
define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
%res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
%res2 = add i8 %res, %res1
ret i8 %res2
}

View File

@ -3847,3 +3847,147 @@
// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
// CHECK: vfpclasssd $171, %xmm28, %k4
// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0xab]
vfpclasssd $0xab, %xmm28, %k4
// CHECK: vfpclasssd $171, %xmm28, %k4 {%k3}
// CHECK: encoding: [0x62,0x93,0xfd,0x0b,0x67,0xe4,0xab]
vfpclasssd $0xab, %xmm28, %k4 {%k3}
// CHECK: vfpclasssd $123, %xmm28, %k4
// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0x7b]
vfpclasssd $0x7b, %xmm28, %k4
// CHECK: vfpclasssd $123, (%rcx), %k4
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x21,0x7b]
vfpclasssd $0x7b,(%rcx), %k4
// CHECK: vfpclasssd $123, 291(%rax,%r14,8), %k4
// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vfpclasssd $0x7b,291(%rax,%r14,8), %k4
// CHECK: vfpclasssd $123, 1016(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x7f,0x7b]
vfpclasssd $0x7b,1016(%rdx), %k4
// CHECK: vfpclasssd $123, 1024(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0x00,0x04,0x00,0x00,0x7b]
vfpclasssd $0x7b,1024(%rdx), %k4
// CHECK: vfpclasssd $123, -1024(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x80,0x7b]
vfpclasssd $0x7b,-1024(%rdx), %k4
// CHECK: vfpclasssd $123, -1032(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
vfpclasssd $0x7b,-1032(%rdx), %k4
// CHECK: vfpclassss $171, %xmm26, %k5
// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0xab]
vfpclassss $0xab, %xmm26, %k5
// CHECK: vfpclassss $171, %xmm26, %k5 {%k4}
// CHECK: encoding: [0x62,0x93,0x7d,0x0c,0x67,0xea,0xab]
vfpclassss $0xab, %xmm26, %k5 {%k4}
// CHECK: vfpclassss $123, %xmm26, %k5
// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0x7b]
vfpclassss $0x7b, %xmm26, %k5
// CHECK: vfpclassss $123, (%rcx), %k5
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x29,0x7b]
vfpclassss $0x7b,(%rcx), %k5
// CHECK: vfpclassss $123, 291(%rax,%r14,8), %k5
// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x67,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
vfpclassss $0x7b,291(%rax,%r14,8), %k5
// CHECK: vfpclassss $123, 508(%rdx), %k5
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x7f,0x7b]
vfpclassss $0x7b,508(%rdx), %k5
// CHECK: vfpclassss $123, 512(%rdx), %k5
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0x00,0x02,0x00,0x00,0x7b]
vfpclassss $0x7b,512(%rdx), %k5
// CHECK: vfpclassss $123, -512(%rdx), %k5
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x80,0x7b]
vfpclassss $0x7b,-512(%rdx), %k5
// CHECK: vfpclassss $123, -516(%rdx), %k5
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
vfpclassss $0x7b,-516(%rdx), %k5
// CHECK: vfpclasssd $171, %xmm20, %k3
// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0xab]
vfpclasssd $0xab, %xmm20, %k3
// CHECK: vfpclasssd $171, %xmm20, %k3 {%k6}
// CHECK: encoding: [0x62,0xb3,0xfd,0x0e,0x67,0xdc,0xab]
vfpclasssd $0xab, %xmm20, %k3 {%k6}
// CHECK: vfpclasssd $123, %xmm20, %k3
// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0x7b]
vfpclasssd $0x7b, %xmm20, %k3
// CHECK: vfpclasssd $123, (%rcx), %k3
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x19,0x7b]
vfpclasssd $0x7b,(%rcx), %k3
// CHECK: vfpclasssd $123, 4660(%rax,%r14,8), %k3
// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
vfpclasssd $0x7b,4660(%rax,%r14,8), %k3
// CHECK: vfpclasssd $123, 1016(%rdx), %k3
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x7f,0x7b]
vfpclasssd $0x7b,1016(%rdx), %k3
// CHECK: vfpclasssd $123, 1024(%rdx), %k3
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0x00,0x04,0x00,0x00,0x7b]
vfpclasssd $0x7b,1024(%rdx), %k3
// CHECK: vfpclasssd $123, -1024(%rdx), %k3
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x80,0x7b]
vfpclasssd $0x7b,-1024(%rdx), %k3
// CHECK: vfpclasssd $123, -1032(%rdx), %k3
// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
vfpclasssd $0x7b,-1032(%rdx), %k3
// CHECK: vfpclassss $171, %xmm28, %k4
// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0xab]
vfpclassss $0xab, %xmm28, %k4
// CHECK: vfpclassss $171, %xmm28, %k4 {%k6}
// CHECK: encoding: [0x62,0x93,0x7d,0x0e,0x67,0xe4,0xab]
vfpclassss $0xab, %xmm28, %k4 {%k6}
// CHECK: vfpclassss $123, %xmm28, %k4
// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0x7b]
vfpclassss $0x7b, %xmm28, %k4
// CHECK: vfpclassss $123, (%rcx), %k4
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x21,0x7b]
vfpclassss $0x7b,(%rcx), %k4
// CHECK: vfpclassss $123, 4660(%rax,%r14,8), %k4
// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
vfpclassss $0x7b,4660(%rax,%r14,8), %k4
// CHECK: vfpclassss $123, 508(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x7f,0x7b]
vfpclassss $0x7b,508(%rdx), %k4
// CHECK: vfpclassss $123, 512(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0x00,0x02,0x00,0x00,0x7b]
vfpclassss $0x7b,512(%rdx), %k4
// CHECK: vfpclassss $123, -512(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x80,0x7b]
vfpclassss $0x7b,-512(%rdx), %k4
// CHECK: vfpclassss $123, -516(%rdx), %k4
// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vfpclassss $0x7b,-516(%rdx), %k4