mirror of https://github.com/dotnet/runtime
Merge b6fb8599e1
into 02596ba8d9
This commit is contained in:
commit
7879c8ecf0
|
@ -21310,19 +21310,133 @@ GenTree* Compiler::gtNewSimdBinOpNode(
|
|||
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
|
||||
case GT_DIV:
|
||||
{
|
||||
if (simdBaseType == TYP_INT)
|
||||
if (varTypeIsIntegral(simdBaseType))
|
||||
{
|
||||
assert(compIsaSupportedDebugOnly(InstructionSet_AVX) ||
|
||||
assert(!varTypeIsLong(simdBaseType));
|
||||
if (((varTypeIsShort(simdBaseType) || varTypeIsByte(simdBaseType) ||
|
||||
(varTypeIsInt(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512))) &&
|
||||
simdSize > 16) ||
|
||||
(varTypeIsInt(simdBaseType) &&
|
||||
(simdSize == 64 || (simdSize == 32 && !compOpportunisticallyDependsOn(InstructionSet_AVX)))))
|
||||
{
|
||||
var_types divType = simdSize == 64 ? TYP_SIMD32 : TYP_SIMD16;
|
||||
GenTree* op1Dup = fgMakeMultiUse(&op1);
|
||||
GenTree* op2Dup = fgMakeMultiUse(&op2);
|
||||
GenTree* op1Lower = gtNewSimdGetLowerNode(divType, op1, simdBaseJitType, simdSize);
|
||||
GenTree* op2Lower = gtNewSimdGetLowerNode(divType, op2, simdBaseJitType, simdSize);
|
||||
GenTree* divLower =
|
||||
gtNewSimdBinOpNode(GT_DIV, divType, op1Lower, op2Lower, simdBaseJitType, simdSize / 2);
|
||||
GenTree* op1Upper = gtNewSimdGetUpperNode(divType, op1Dup, simdBaseJitType, simdSize);
|
||||
GenTree* op2Upper = gtNewSimdGetUpperNode(divType, op2Dup, simdBaseJitType, simdSize);
|
||||
GenTree* divUpper =
|
||||
gtNewSimdBinOpNode(GT_DIV, divType, op1Upper, op2Upper, simdBaseJitType, simdSize / 2);
|
||||
GenTree* divResult = gtNewSimdWithUpperNode(type, divLower, divUpper, simdBaseJitType, simdSize);
|
||||
return divResult;
|
||||
}
|
||||
|
||||
if (varTypeIsShort(simdBaseType) && compOpportunisticallyDependsOn(InstructionSet_AVX2))
|
||||
{
|
||||
assert(simdSize == 16);
|
||||
CorInfoType cvtType =
|
||||
varTypeIsUnsigned(simdBaseType) && compOpportunisticallyDependsOn(InstructionSet_AVX512)
|
||||
? CORINFO_TYPE_UINT
|
||||
: CORINFO_TYPE_INT;
|
||||
NamedIntrinsic cvtIntrinsic = NI_AVX2_ConvertToVector256Int32;
|
||||
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, cvtIntrinsic, simdBaseJitType, simdSize);
|
||||
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op2, cvtIntrinsic, simdBaseJitType, simdSize);
|
||||
GenTree* divResult = gtNewSimdBinOpNode(GT_DIV, TYP_SIMD32, op1, op2, cvtType, simdSize * 2);
|
||||
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
|
||||
{
|
||||
return gtNewSimdHWIntrinsicNode(type, divResult,
|
||||
varTypeIsSigned(simdBaseType)
|
||||
? NI_AVX512_ConvertToVector128Int16
|
||||
: NI_AVX512_ConvertToVector128UInt16,
|
||||
cvtType, simdSize * 2);
|
||||
}
|
||||
GenTree* divResultDup = fgMakeMultiUse(&divResult);
|
||||
GenTree* divResultLower = gtNewSimdGetLowerNode(type, divResult, cvtType, simdSize * 2);
|
||||
GenTree* divResultUpper = gtNewSimdGetUpperNode(type, divResultDup, cvtType, simdSize * 2);
|
||||
return gtNewSimdNarrowNode(type, divResultLower, divResultUpper, simdBaseJitType, simdSize);
|
||||
}
|
||||
|
||||
if (varTypeIsByte(simdBaseType) && compOpportunisticallyDependsOn(InstructionSet_AVX512))
|
||||
{
|
||||
assert(simdSize == 16);
|
||||
NamedIntrinsic cvtIntrinsic = varTypeIsSigned(simdBaseType) ? NI_AVX512_ConvertToVector512Int32
|
||||
: NI_AVX512_ConvertToVector512UInt32;
|
||||
CorInfoType cvtType = varTypeIsSigned(simdBaseType) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT;
|
||||
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op1, cvtIntrinsic, simdBaseJitType, simdSize * 4);
|
||||
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op2, cvtIntrinsic, simdBaseJitType, simdSize * 4);
|
||||
|
||||
GenTree* op1Dup = fgMakeMultiUse(&op1);
|
||||
GenTree* op2Dup = fgMakeMultiUse(&op2);
|
||||
GenTree* op1Lower = gtNewSimdGetLowerNode(TYP_SIMD32, op1, cvtType, simdSize * 4);
|
||||
GenTree* op2Lower = gtNewSimdGetLowerNode(TYP_SIMD32, op2, cvtType, simdSize * 4);
|
||||
GenTree* divLower =
|
||||
gtNewSimdBinOpNode(GT_DIV, TYP_SIMD32, op1Lower, op2Lower, cvtType, simdSize * 2);
|
||||
GenTree* op1Upper = gtNewSimdGetUpperNode(TYP_SIMD32, op1Dup, cvtType, simdSize * 4);
|
||||
GenTree* op2Upper = gtNewSimdGetUpperNode(TYP_SIMD32, op2Dup, cvtType, simdSize * 4);
|
||||
GenTree* divUpper =
|
||||
gtNewSimdBinOpNode(GT_DIV, TYP_SIMD32, op1Upper, op2Upper, cvtType, simdSize * 2);
|
||||
|
||||
GenTree* divResult = gtNewSimdWithUpperNode(TYP_SIMD64, divLower, divUpper, cvtType, simdSize * 4);
|
||||
return gtNewSimdHWIntrinsicNode(TYP_SIMD16, divResult,
|
||||
varTypeIsSigned(simdBaseType) ? NI_AVX512_ConvertToVector128SByte
|
||||
: NI_AVX512_ConvertToVector128Byte,
|
||||
cvtType, simdSize * 4);
|
||||
}
|
||||
|
||||
if (varTypeIsShort(simdBaseType) || varTypeIsByte(simdBaseType))
|
||||
{
|
||||
assert(simdSize == 16);
|
||||
CorInfoType cvtType = varTypeIsShort(simdBaseType) ? CORINFO_TYPE_INT : CORINFO_TYPE_SHORT;
|
||||
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
|
||||
{
|
||||
CorInfoType signedType = varTypeIsShort(simdBaseType) ? CORINFO_TYPE_INT : CORINFO_TYPE_SHORT;
|
||||
CorInfoType unsignedType =
|
||||
varTypeIsShort(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_USHORT;
|
||||
cvtType = varTypeIsSigned(simdBaseType) ? signedType : unsignedType;
|
||||
}
|
||||
GenTree* op1Dup = fgMakeMultiUse(&op1);
|
||||
GenTree* op2Dup = fgMakeMultiUse(&op2);
|
||||
GenTree* op1LowerWiden = gtNewSimdWidenLowerNode(type, op1, simdBaseJitType, simdSize);
|
||||
GenTree* op2LowerWiden = gtNewSimdWidenLowerNode(type, op2, simdBaseJitType, simdSize);
|
||||
GenTree* divLower =
|
||||
gtNewSimdBinOpNode(GT_DIV, type, op1LowerWiden, op2LowerWiden, cvtType, simdSize);
|
||||
GenTree* op1UpperWiden = gtNewSimdWidenUpperNode(type, op1Dup, simdBaseJitType, simdSize);
|
||||
GenTree* op2UpperWiden = gtNewSimdWidenUpperNode(type, op2Dup, simdBaseJitType, simdSize);
|
||||
GenTree* divUpper =
|
||||
gtNewSimdBinOpNode(GT_DIV, type, op1UpperWiden, op2UpperWiden, cvtType, simdSize);
|
||||
return gtNewSimdNarrowNode(type, divLower, divUpper, simdBaseJitType, simdSize);
|
||||
}
|
||||
|
||||
assert((varTypeIsSigned(simdBaseType) && compIsaSupportedDebugOnly(InstructionSet_SSE42)) ||
|
||||
compIsaSupportedDebugOnly(InstructionSet_AVX512));
|
||||
assert(varTypeIsInt(simdBaseType));
|
||||
|
||||
assert(simdSize == 16 || simdSize == 32);
|
||||
if (compOpportunisticallyDependsOn(InstructionSet_AVX))
|
||||
{
|
||||
NamedIntrinsic divIntrinsic = simdSize == 16 ? NI_Vector128_op_Division : NI_Vector256_op_Division;
|
||||
return gtNewSimdHWIntrinsicNode(op1->TypeGet(), op1, op2, divIntrinsic, simdBaseJitType, simdSize);
|
||||
}
|
||||
|
||||
NamedIntrinsic divIntrinsic = simdSize == 16 ? NI_Vector128_op_Division : NI_Vector256_op_Division;
|
||||
unsigned int divideOpSimdSize = simdSize * 2;
|
||||
|
||||
GenTree* divOp =
|
||||
gtNewSimdHWIntrinsicNode(op1->TypeGet(), op1, op2, divIntrinsic, simdBaseJitType, divideOpSimdSize);
|
||||
return divOp;
|
||||
assert(simdSize == 16);
|
||||
NamedIntrinsic divIntrinsic = NI_Vector128_op_Division;
|
||||
GenTree* op1Dup = fgMakeMultiUse(&op1);
|
||||
GenTree* op2Dup = fgMakeMultiUse(&op2);
|
||||
GenTree* op1Hi =
|
||||
gtNewSimdHWIntrinsicNode(type, op1, op1Dup, NI_X86Base_MoveHighToLow, CORINFO_TYPE_FLOAT, simdSize);
|
||||
GenTree* op2Hi =
|
||||
gtNewSimdHWIntrinsicNode(type, op2, op2Dup, NI_X86Base_MoveHighToLow, CORINFO_TYPE_FLOAT, simdSize);
|
||||
GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup);
|
||||
GenTree* op2Dup2 = fgMakeMultiUse(&op2Dup);
|
||||
GenTree* divHi = gtNewSimdHWIntrinsicNode(type, op1Hi, op2Hi, divIntrinsic, simdBaseJitType, simdSize);
|
||||
GenTree* divLo =
|
||||
gtNewSimdHWIntrinsicNode(type, op1Dup2, op2Dup2, divIntrinsic, simdBaseJitType, simdSize);
|
||||
GenTree* div = gtNewSimdHWIntrinsicNode(type, divHi, divLo, NI_X86Base_MoveLowToHigh,
|
||||
CORINFO_TYPE_FLOAT, simdSize);
|
||||
return gtNewSimdHWIntrinsicNode(type, div, gtNewIconNode(0x4E), NI_X86Base_Shuffle, CORINFO_TYPE_INT,
|
||||
simdSize);
|
||||
}
|
||||
unreached();
|
||||
}
|
||||
|
@ -30121,7 +30235,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp,
|
|||
case GT_DIV:
|
||||
{
|
||||
#if defined(TARGET_XARCH)
|
||||
assert(varTypeIsFloating(simdBaseType) || varTypeIsInt(simdBaseType));
|
||||
assert(varTypeIsFloating(simdBaseType) ||
|
||||
(varTypeIsIntegral(simdBaseType) && !varTypeIsLong(simdBaseType)));
|
||||
#else
|
||||
assert(varTypeIsFloating(simdBaseType));
|
||||
#endif
|
||||
|
|
|
@ -2363,19 +2363,17 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
regNumber op2Reg = op2->GetRegNum();
|
||||
regNumber tmpReg1 = internalRegisters.Extract(node, RBM_ALLFLOAT);
|
||||
regNumber tmpReg2 = internalRegisters.Extract(node, RBM_ALLFLOAT);
|
||||
emitAttr typeSize = emitTypeSize(node->TypeGet());
|
||||
var_types nodeType = node->TypeGet();
|
||||
emitAttr typeSize = emitTypeSize(nodeType);
|
||||
noway_assert(typeSize == EA_16BYTE || typeSize == EA_32BYTE);
|
||||
emitAttr divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE;
|
||||
emitAttr divTypeSize = typeSize;
|
||||
|
||||
simd_t negOneIntVec = simd_t::AllBitsSet();
|
||||
simd_t minValueInt{};
|
||||
int numElements = genTypeSize(node->TypeGet()) / 4;
|
||||
for (int i = 0; i < numElements; i++)
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX))
|
||||
{
|
||||
minValueInt.i32[i] = INT_MIN;
|
||||
divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE;
|
||||
}
|
||||
CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst(&minValueInt, typeSize);
|
||||
CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst(&negOneIntVec, typeSize);
|
||||
simd_t negOneIntVec = simd_t::AllBitsSet();
|
||||
CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst(&negOneIntVec, typeSize);
|
||||
|
||||
// div-by-zero check
|
||||
emit->emitIns_SIMD_R_R_R(INS_xorpd, typeSize, tmpReg1, tmpReg1, tmpReg1, instOptions);
|
||||
|
@ -2384,16 +2382,31 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
genJumpToThrowHlpBlk(EJ_jne, SCK_DIV_BY_ZERO);
|
||||
|
||||
// overflow check
|
||||
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg1, op1Reg, minValueFld, 0, instOptions);
|
||||
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg2, op2Reg, negOneFld, 0, instOptions);
|
||||
emit->emitIns_SIMD_R_R_R(INS_pandd, typeSize, tmpReg1, tmpReg1, tmpReg2, instOptions);
|
||||
emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions);
|
||||
genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
|
||||
if (varTypeIsSigned(baseType))
|
||||
{
|
||||
simd_t minValueInt{};
|
||||
int numElements = genTypeSize(nodeType) / 4;
|
||||
for (int i = 0; i < numElements; i++)
|
||||
{
|
||||
minValueInt.i32[i] = INT_MIN;
|
||||
}
|
||||
CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst(&minValueInt, typeSize);
|
||||
|
||||
emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions);
|
||||
emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg2, op2Reg, instOptions);
|
||||
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg1, op1Reg, minValueFld, 0, instOptions);
|
||||
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg2, op2Reg, negOneFld, 0, instOptions);
|
||||
emit->emitIns_SIMD_R_R_R(INS_pandd, typeSize, tmpReg1, tmpReg1, tmpReg2, instOptions);
|
||||
emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions);
|
||||
genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
|
||||
}
|
||||
|
||||
emit->emitIns_R_R(varTypeIsSigned(baseType) ? INS_cvtdq2pd : INS_vcvtudq2pd, divTypeSize, tmpReg1, op1Reg,
|
||||
instOptions);
|
||||
emit->emitIns_R_R(varTypeIsSigned(baseType) ? INS_cvtdq2pd : INS_vcvtudq2pd, divTypeSize, tmpReg2, op2Reg,
|
||||
instOptions);
|
||||
emit->emitIns_SIMD_R_R_R(INS_divpd, divTypeSize, targetReg, tmpReg1, tmpReg2, instOptions);
|
||||
emit->emitIns_R_R(INS_cvttpd2dq, divTypeSize, targetReg, targetReg, instOptions);
|
||||
emit->emitIns_R_R(varTypeIsSigned(baseType) ? INS_cvttpd2dq : INS_vcvttpd2udq, divTypeSize, targetReg,
|
||||
targetReg, instOptions);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -2280,9 +2280,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
{
|
||||
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
|
||||
// Check to see if it is possible to emulate the integer division
|
||||
if (!(simdBaseType == TYP_INT &&
|
||||
((simdSize == 16 && compOpportunisticallyDependsOn(InstructionSet_AVX)) ||
|
||||
(simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512)))))
|
||||
if (varTypeIsLong(simdBaseType))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (!compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (simdBaseType == TYP_UINT && !compOpportunisticallyDependsOn(InstructionSet_AVX512))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue