This commit is contained in:
Alex Covington 2025-07-30 22:32:07 +08:00 committed by GitHub
commit 7879c8ecf0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 164 additions and 30 deletions

View File

@ -21310,19 +21310,133 @@ GenTree* Compiler::gtNewSimdBinOpNode(
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
case GT_DIV:
{
if (simdBaseType == TYP_INT)
if (varTypeIsIntegral(simdBaseType))
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX) ||
assert(!varTypeIsLong(simdBaseType));
if (((varTypeIsShort(simdBaseType) || varTypeIsByte(simdBaseType) ||
(varTypeIsInt(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512))) &&
simdSize > 16) ||
(varTypeIsInt(simdBaseType) &&
(simdSize == 64 || (simdSize == 32 && !compOpportunisticallyDependsOn(InstructionSet_AVX)))))
{
var_types divType = simdSize == 64 ? TYP_SIMD32 : TYP_SIMD16;
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op1Lower = gtNewSimdGetLowerNode(divType, op1, simdBaseJitType, simdSize);
GenTree* op2Lower = gtNewSimdGetLowerNode(divType, op2, simdBaseJitType, simdSize);
GenTree* divLower =
gtNewSimdBinOpNode(GT_DIV, divType, op1Lower, op2Lower, simdBaseJitType, simdSize / 2);
GenTree* op1Upper = gtNewSimdGetUpperNode(divType, op1Dup, simdBaseJitType, simdSize);
GenTree* op2Upper = gtNewSimdGetUpperNode(divType, op2Dup, simdBaseJitType, simdSize);
GenTree* divUpper =
gtNewSimdBinOpNode(GT_DIV, divType, op1Upper, op2Upper, simdBaseJitType, simdSize / 2);
GenTree* divResult = gtNewSimdWithUpperNode(type, divLower, divUpper, simdBaseJitType, simdSize);
return divResult;
}
if (varTypeIsShort(simdBaseType) && compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
assert(simdSize == 16);
CorInfoType cvtType =
varTypeIsUnsigned(simdBaseType) && compOpportunisticallyDependsOn(InstructionSet_AVX512)
? CORINFO_TYPE_UINT
: CORINFO_TYPE_INT;
NamedIntrinsic cvtIntrinsic = NI_AVX2_ConvertToVector256Int32;
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, cvtIntrinsic, simdBaseJitType, simdSize);
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op2, cvtIntrinsic, simdBaseJitType, simdSize);
GenTree* divResult = gtNewSimdBinOpNode(GT_DIV, TYP_SIMD32, op1, op2, cvtType, simdSize * 2);
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
return gtNewSimdHWIntrinsicNode(type, divResult,
varTypeIsSigned(simdBaseType)
? NI_AVX512_ConvertToVector128Int16
: NI_AVX512_ConvertToVector128UInt16,
cvtType, simdSize * 2);
}
GenTree* divResultDup = fgMakeMultiUse(&divResult);
GenTree* divResultLower = gtNewSimdGetLowerNode(type, divResult, cvtType, simdSize * 2);
GenTree* divResultUpper = gtNewSimdGetUpperNode(type, divResultDup, cvtType, simdSize * 2);
return gtNewSimdNarrowNode(type, divResultLower, divResultUpper, simdBaseJitType, simdSize);
}
if (varTypeIsByte(simdBaseType) && compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
assert(simdSize == 16);
NamedIntrinsic cvtIntrinsic = varTypeIsSigned(simdBaseType) ? NI_AVX512_ConvertToVector512Int32
: NI_AVX512_ConvertToVector512UInt32;
CorInfoType cvtType = varTypeIsSigned(simdBaseType) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT;
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op1, cvtIntrinsic, simdBaseJitType, simdSize * 4);
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op2, cvtIntrinsic, simdBaseJitType, simdSize * 4);
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op1Lower = gtNewSimdGetLowerNode(TYP_SIMD32, op1, cvtType, simdSize * 4);
GenTree* op2Lower = gtNewSimdGetLowerNode(TYP_SIMD32, op2, cvtType, simdSize * 4);
GenTree* divLower =
gtNewSimdBinOpNode(GT_DIV, TYP_SIMD32, op1Lower, op2Lower, cvtType, simdSize * 2);
GenTree* op1Upper = gtNewSimdGetUpperNode(TYP_SIMD32, op1Dup, cvtType, simdSize * 4);
GenTree* op2Upper = gtNewSimdGetUpperNode(TYP_SIMD32, op2Dup, cvtType, simdSize * 4);
GenTree* divUpper =
gtNewSimdBinOpNode(GT_DIV, TYP_SIMD32, op1Upper, op2Upper, cvtType, simdSize * 2);
GenTree* divResult = gtNewSimdWithUpperNode(TYP_SIMD64, divLower, divUpper, cvtType, simdSize * 4);
return gtNewSimdHWIntrinsicNode(TYP_SIMD16, divResult,
varTypeIsSigned(simdBaseType) ? NI_AVX512_ConvertToVector128SByte
: NI_AVX512_ConvertToVector128Byte,
cvtType, simdSize * 4);
}
if (varTypeIsShort(simdBaseType) || varTypeIsByte(simdBaseType))
{
assert(simdSize == 16);
CorInfoType cvtType = varTypeIsShort(simdBaseType) ? CORINFO_TYPE_INT : CORINFO_TYPE_SHORT;
if (compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
CorInfoType signedType = varTypeIsShort(simdBaseType) ? CORINFO_TYPE_INT : CORINFO_TYPE_SHORT;
CorInfoType unsignedType =
varTypeIsShort(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_USHORT;
cvtType = varTypeIsSigned(simdBaseType) ? signedType : unsignedType;
}
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op1LowerWiden = gtNewSimdWidenLowerNode(type, op1, simdBaseJitType, simdSize);
GenTree* op2LowerWiden = gtNewSimdWidenLowerNode(type, op2, simdBaseJitType, simdSize);
GenTree* divLower =
gtNewSimdBinOpNode(GT_DIV, type, op1LowerWiden, op2LowerWiden, cvtType, simdSize);
GenTree* op1UpperWiden = gtNewSimdWidenUpperNode(type, op1Dup, simdBaseJitType, simdSize);
GenTree* op2UpperWiden = gtNewSimdWidenUpperNode(type, op2Dup, simdBaseJitType, simdSize);
GenTree* divUpper =
gtNewSimdBinOpNode(GT_DIV, type, op1UpperWiden, op2UpperWiden, cvtType, simdSize);
return gtNewSimdNarrowNode(type, divLower, divUpper, simdBaseJitType, simdSize);
}
assert((varTypeIsSigned(simdBaseType) && compIsaSupportedDebugOnly(InstructionSet_SSE42)) ||
compIsaSupportedDebugOnly(InstructionSet_AVX512));
assert(varTypeIsInt(simdBaseType));
assert(simdSize == 16 || simdSize == 32);
if (compOpportunisticallyDependsOn(InstructionSet_AVX))
{
NamedIntrinsic divIntrinsic = simdSize == 16 ? NI_Vector128_op_Division : NI_Vector256_op_Division;
return gtNewSimdHWIntrinsicNode(op1->TypeGet(), op1, op2, divIntrinsic, simdBaseJitType, simdSize);
}
NamedIntrinsic divIntrinsic = simdSize == 16 ? NI_Vector128_op_Division : NI_Vector256_op_Division;
unsigned int divideOpSimdSize = simdSize * 2;
GenTree* divOp =
gtNewSimdHWIntrinsicNode(op1->TypeGet(), op1, op2, divIntrinsic, simdBaseJitType, divideOpSimdSize);
return divOp;
assert(simdSize == 16);
NamedIntrinsic divIntrinsic = NI_Vector128_op_Division;
GenTree* op1Dup = fgMakeMultiUse(&op1);
GenTree* op2Dup = fgMakeMultiUse(&op2);
GenTree* op1Hi =
gtNewSimdHWIntrinsicNode(type, op1, op1Dup, NI_X86Base_MoveHighToLow, CORINFO_TYPE_FLOAT, simdSize);
GenTree* op2Hi =
gtNewSimdHWIntrinsicNode(type, op2, op2Dup, NI_X86Base_MoveHighToLow, CORINFO_TYPE_FLOAT, simdSize);
GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup);
GenTree* op2Dup2 = fgMakeMultiUse(&op2Dup);
GenTree* divHi = gtNewSimdHWIntrinsicNode(type, op1Hi, op2Hi, divIntrinsic, simdBaseJitType, simdSize);
GenTree* divLo =
gtNewSimdHWIntrinsicNode(type, op1Dup2, op2Dup2, divIntrinsic, simdBaseJitType, simdSize);
GenTree* div = gtNewSimdHWIntrinsicNode(type, divHi, divLo, NI_X86Base_MoveLowToHigh,
CORINFO_TYPE_FLOAT, simdSize);
return gtNewSimdHWIntrinsicNode(type, div, gtNewIconNode(0x4E), NI_X86Base_Shuffle, CORINFO_TYPE_INT,
simdSize);
}
unreached();
}
@ -30121,7 +30235,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp,
case GT_DIV:
{
#if defined(TARGET_XARCH)
assert(varTypeIsFloating(simdBaseType) || varTypeIsInt(simdBaseType));
assert(varTypeIsFloating(simdBaseType) ||
(varTypeIsIntegral(simdBaseType) && !varTypeIsLong(simdBaseType)));
#else
assert(varTypeIsFloating(simdBaseType));
#endif

View File

@ -2363,19 +2363,17 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
regNumber op2Reg = op2->GetRegNum();
regNumber tmpReg1 = internalRegisters.Extract(node, RBM_ALLFLOAT);
regNumber tmpReg2 = internalRegisters.Extract(node, RBM_ALLFLOAT);
emitAttr typeSize = emitTypeSize(node->TypeGet());
var_types nodeType = node->TypeGet();
emitAttr typeSize = emitTypeSize(nodeType);
noway_assert(typeSize == EA_16BYTE || typeSize == EA_32BYTE);
emitAttr divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE;
emitAttr divTypeSize = typeSize;
simd_t negOneIntVec = simd_t::AllBitsSet();
simd_t minValueInt{};
int numElements = genTypeSize(node->TypeGet()) / 4;
for (int i = 0; i < numElements; i++)
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX))
{
minValueInt.i32[i] = INT_MIN;
divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE;
}
CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst(&minValueInt, typeSize);
CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst(&negOneIntVec, typeSize);
simd_t negOneIntVec = simd_t::AllBitsSet();
CORINFO_FIELD_HANDLE negOneFld = emit->emitSimdConst(&negOneIntVec, typeSize);
// div-by-zero check
emit->emitIns_SIMD_R_R_R(INS_xorpd, typeSize, tmpReg1, tmpReg1, tmpReg1, instOptions);
@ -2384,16 +2382,31 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
genJumpToThrowHlpBlk(EJ_jne, SCK_DIV_BY_ZERO);
// overflow check
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg1, op1Reg, minValueFld, 0, instOptions);
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg2, op2Reg, negOneFld, 0, instOptions);
emit->emitIns_SIMD_R_R_R(INS_pandd, typeSize, tmpReg1, tmpReg1, tmpReg2, instOptions);
emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions);
genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
if (varTypeIsSigned(baseType))
{
simd_t minValueInt{};
int numElements = genTypeSize(nodeType) / 4;
for (int i = 0; i < numElements; i++)
{
minValueInt.i32[i] = INT_MIN;
}
CORINFO_FIELD_HANDLE minValueFld = emit->emitSimdConst(&minValueInt, typeSize);
emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions);
emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg2, op2Reg, instOptions);
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg1, op1Reg, minValueFld, 0, instOptions);
emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg2, op2Reg, negOneFld, 0, instOptions);
emit->emitIns_SIMD_R_R_R(INS_pandd, typeSize, tmpReg1, tmpReg1, tmpReg2, instOptions);
emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions);
genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
}
emit->emitIns_R_R(varTypeIsSigned(baseType) ? INS_cvtdq2pd : INS_vcvtudq2pd, divTypeSize, tmpReg1, op1Reg,
instOptions);
emit->emitIns_R_R(varTypeIsSigned(baseType) ? INS_cvtdq2pd : INS_vcvtudq2pd, divTypeSize, tmpReg2, op2Reg,
instOptions);
emit->emitIns_SIMD_R_R_R(INS_divpd, divTypeSize, targetReg, tmpReg1, tmpReg2, instOptions);
emit->emitIns_R_R(INS_cvttpd2dq, divTypeSize, targetReg, targetReg, instOptions);
emit->emitIns_R_R(varTypeIsSigned(baseType) ? INS_cvttpd2dq : INS_vcvttpd2udq, divTypeSize, targetReg,
targetReg, instOptions);
break;
}

View File

@ -2280,9 +2280,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
// Check to see if it is possible to emulate the integer division
if (!(simdBaseType == TYP_INT &&
((simdSize == 16 && compOpportunisticallyDependsOn(InstructionSet_AVX)) ||
(simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512)))))
if (varTypeIsLong(simdBaseType))
{
break;
}
if (!compOpportunisticallyDependsOn(InstructionSet_SSE42))
{
break;
}
if (simdBaseType == TYP_UINT && !compOpportunisticallyDependsOn(InstructionSet_AVX512))
{
break;
}