mirror of https://github.com/dotnet/runtime
Merge 92e66137c9
into 02596ba8d9
This commit is contained in:
commit
3d0bf88e75
|
@ -40,8 +40,8 @@ For AOT compilation, the situation is far more complex. This is due to the follo
|
|||
|
||||
## Crossgen2 model of hardware intrinsic usage
|
||||
There are 2 sets of instruction sets known to the compiler.
|
||||
- The baseline instruction set which defaults to (Sse, Sse2), but may be adjusted via compiler option.
|
||||
- The optimistic instruction set which defaults to (Sse3, Ssse3, Sse41, Sse42, Popcnt, Pclmulqdq, and Lzcnt).
|
||||
- The baseline instruction set which defaults to x86-64-v2 (SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, and POPCNT), but may be adjusted via compiler option.
|
||||
- The optimistic instruction set which defaults to (AES, GFNI, SHA, WAITPKG, and X86SERIALIZE).
|
||||
|
||||
Code will be compiled using the optimistic instruction set to drive compilation, but any use of an instruction set beyond the baseline instruction set will be recorded, as will any attempt to use an instruction set beyond the optimistic set if that attempted use has a semantic effect. If the baseline instruction set includes `Avx2` then the size and characteristics of of `Vector<T>` is known. Any other decisions about ABI may also be encoded. For instance, it is likely that the ABI of `Vector256<T>` and `Vector512<T>` will vary based on the presence/absence of `Avx` support.
|
||||
|
||||
|
|
|
@ -359,74 +359,31 @@ jobs:
|
|||
- jitstress_random_2
|
||||
${{ if in(parameters.testGroup, 'jitstress-isas-arm') }}:
|
||||
scenarios:
|
||||
- jitstress_isas_incompletehwintrinsic
|
||||
- jitstress_isas_nohwintrinsic
|
||||
- jitstress_isas_nohwintrinsic_nosimd
|
||||
- jitstress_isas_nosimd
|
||||
${{ if in(parameters.testGroup, 'jitstress-isas-x86') }}:
|
||||
scenarios:
|
||||
- jitstress_isas_incompletehwintrinsic
|
||||
- jitstress_isas_nohwintrinsic
|
||||
- jitstress_isas_nohwintrinsic_nosimd
|
||||
- jitstress_isas_nosimd
|
||||
- jitstress_isas_x86_evex
|
||||
- jitstress_isas_x86_noaes
|
||||
- jitstress_isas_x86_noavx
|
||||
- jitstress_isas_x86_noavx2
|
||||
- jitstress_isas_x86_noavx512
|
||||
- jitstress_isas_x86_nobmi1
|
||||
- jitstress_isas_x86_nobmi2
|
||||
- jitstress_isas_x86_nofma
|
||||
- jitstress_isas_x86_nohwintrinsic
|
||||
- jitstress_isas_x86_nolzcnt
|
||||
- jitstress_isas_x86_nopclmulqdq
|
||||
- jitstress_isas_x86_nopopcnt
|
||||
- jitstress_isas_x86_nosse
|
||||
- jitstress_isas_x86_nosse2
|
||||
- jitstress_isas_x86_nosse3
|
||||
- jitstress_isas_x86_nosse3_4
|
||||
- jitstress_isas_x86_nosse41
|
||||
- jitstress_isas_x86_nosse42
|
||||
- jitstress_isas_x86_nossse3
|
||||
- jitstress_isas_x86_vectort128
|
||||
- jitstress_isas_x86_vectort512
|
||||
- jitstress_isas_x86_noavx512_vectort128
|
||||
- jitstress_isas_1_x86_noaes
|
||||
- jitstress_isas_1_x86_evex
|
||||
- jitstress_isas_1_x86_noavx
|
||||
- jitstress_isas_1_x86_noavx2
|
||||
- jitstress_isas_1_x86_noavx512
|
||||
- jitstress_isas_1_x86_nobmi1
|
||||
- jitstress_isas_1_x86_nobmi2
|
||||
- jitstress_isas_1_x86_nofma
|
||||
- jitstress_isas_1_x86_nohwintrinsic
|
||||
- jitstress_isas_1_x86_nolzcnt
|
||||
- jitstress_isas_1_x86_nopclmulqdq
|
||||
- jitstress_isas_1_x86_nopopcnt
|
||||
- jitstress_isas_1_x86_nosse
|
||||
- jitstress_isas_1_x86_nosse2
|
||||
- jitstress_isas_1_x86_nosse3
|
||||
- jitstress_isas_1_x86_nosse3_4
|
||||
- jitstress_isas_1_x86_nosse41
|
||||
- jitstress_isas_1_x86_nosse42
|
||||
- jitstress_isas_1_x86_nossse3
|
||||
- jitstress_isas_2_x86_noaes
|
||||
- jitstress_isas_1_x86_vectort128
|
||||
- jitstress_isas_1_x86_vectort512
|
||||
- jitstress_isas_1_x86_noavx512_vectort128
|
||||
- jitstress_isas_2_x86_evex
|
||||
- jitstress_isas_2_x86_noavx
|
||||
- jitstress_isas_2_x86_noavx2
|
||||
- jitstress_isas_2_x86_noavx512
|
||||
- jitstress_isas_2_x86_nobmi1
|
||||
- jitstress_isas_2_x86_nobmi2
|
||||
- jitstress_isas_2_x86_nofma
|
||||
- jitstress_isas_2_x86_nohwintrinsic
|
||||
- jitstress_isas_2_x86_nolzcnt
|
||||
- jitstress_isas_2_x86_nopclmulqdq
|
||||
- jitstress_isas_2_x86_nopopcnt
|
||||
- jitstress_isas_2_x86_nosse
|
||||
- jitstress_isas_2_x86_nosse2
|
||||
- jitstress_isas_2_x86_nosse3
|
||||
- jitstress_isas_2_x86_nosse3_4
|
||||
- jitstress_isas_2_x86_nosse41
|
||||
- jitstress_isas_2_x86_nosse42
|
||||
- jitstress_isas_2_x86_nossse3
|
||||
- jitstress_isas_2_x86_vectort128
|
||||
- jitstress_isas_2_x86_vectort512
|
||||
- jitstress_isas_2_x86_noavx512_vectort128
|
||||
${{ if in(parameters.testGroup, 'jitstress-isas-avx512') }}:
|
||||
scenarios:
|
||||
- jitstress_isas_x86_evex
|
||||
|
|
|
@ -669,7 +669,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntri
|
|||
#endif // defined(TARGET_LOONGARCH64)
|
||||
|
||||
#if defined(TARGET_AMD64) || defined(TARGET_X86)
|
||||
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE42, W("EnableSSE42"), 1, "Allows SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, and dependent hardware intrinsics to be disabled")
|
||||
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX and dependent hardware intrinsics to be disabled")
|
||||
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled")
|
||||
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512, W("EnableAVX512"), 1, "Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled")
|
||||
|
|
|
@ -51,101 +51,97 @@ enum CORINFO_InstructionSet
|
|||
#endif // TARGET_RISCV64
|
||||
#ifdef TARGET_AMD64
|
||||
InstructionSet_X86Base=1,
|
||||
InstructionSet_SSE42=2,
|
||||
InstructionSet_AVX=3,
|
||||
InstructionSet_AVX2=4,
|
||||
InstructionSet_AVX512=5,
|
||||
InstructionSet_AVX512v2=6,
|
||||
InstructionSet_AVX512v3=7,
|
||||
InstructionSet_AVX10v1=8,
|
||||
InstructionSet_AVX10v2=9,
|
||||
InstructionSet_APX=10,
|
||||
InstructionSet_AES=11,
|
||||
InstructionSet_AES_V256=12,
|
||||
InstructionSet_AES_V512=13,
|
||||
InstructionSet_AVX512VP2INTERSECT=14,
|
||||
InstructionSet_AVXIFMA=15,
|
||||
InstructionSet_AVXVNNI=16,
|
||||
InstructionSet_GFNI=17,
|
||||
InstructionSet_GFNI_V256=18,
|
||||
InstructionSet_GFNI_V512=19,
|
||||
InstructionSet_SHA=20,
|
||||
InstructionSet_WAITPKG=21,
|
||||
InstructionSet_X86Serialize=22,
|
||||
InstructionSet_Vector128=23,
|
||||
InstructionSet_Vector256=24,
|
||||
InstructionSet_Vector512=25,
|
||||
InstructionSet_VectorT128=26,
|
||||
InstructionSet_VectorT256=27,
|
||||
InstructionSet_VectorT512=28,
|
||||
InstructionSet_AVXVNNIINT=29,
|
||||
InstructionSet_AVXVNNIINT_V512=30,
|
||||
InstructionSet_X86Base_X64=31,
|
||||
InstructionSet_SSE42_X64=32,
|
||||
InstructionSet_AVX_X64=33,
|
||||
InstructionSet_AVX2_X64=34,
|
||||
InstructionSet_AVX512_X64=35,
|
||||
InstructionSet_AVX512v2_X64=36,
|
||||
InstructionSet_AVX512v3_X64=37,
|
||||
InstructionSet_AVX10v1_X64=38,
|
||||
InstructionSet_AVX10v2_X64=39,
|
||||
InstructionSet_AES_X64=40,
|
||||
InstructionSet_AVX512VP2INTERSECT_X64=41,
|
||||
InstructionSet_AVXIFMA_X64=42,
|
||||
InstructionSet_AVXVNNI_X64=43,
|
||||
InstructionSet_GFNI_X64=44,
|
||||
InstructionSet_SHA_X64=45,
|
||||
InstructionSet_WAITPKG_X64=46,
|
||||
InstructionSet_X86Serialize_X64=47,
|
||||
InstructionSet_AVX=2,
|
||||
InstructionSet_AVX2=3,
|
||||
InstructionSet_AVX512=4,
|
||||
InstructionSet_AVX512v2=5,
|
||||
InstructionSet_AVX512v3=6,
|
||||
InstructionSet_AVX10v1=7,
|
||||
InstructionSet_AVX10v2=8,
|
||||
InstructionSet_APX=9,
|
||||
InstructionSet_AES=10,
|
||||
InstructionSet_AES_V256=11,
|
||||
InstructionSet_AES_V512=12,
|
||||
InstructionSet_AVX512VP2INTERSECT=13,
|
||||
InstructionSet_AVXIFMA=14,
|
||||
InstructionSet_AVXVNNI=15,
|
||||
InstructionSet_GFNI=16,
|
||||
InstructionSet_GFNI_V256=17,
|
||||
InstructionSet_GFNI_V512=18,
|
||||
InstructionSet_SHA=19,
|
||||
InstructionSet_WAITPKG=20,
|
||||
InstructionSet_X86Serialize=21,
|
||||
InstructionSet_Vector128=22,
|
||||
InstructionSet_Vector256=23,
|
||||
InstructionSet_Vector512=24,
|
||||
InstructionSet_VectorT128=25,
|
||||
InstructionSet_VectorT256=26,
|
||||
InstructionSet_VectorT512=27,
|
||||
InstructionSet_AVXVNNIINT=28,
|
||||
InstructionSet_AVXVNNIINT_V512=29,
|
||||
InstructionSet_X86Base_X64=30,
|
||||
InstructionSet_AVX_X64=31,
|
||||
InstructionSet_AVX2_X64=32,
|
||||
InstructionSet_AVX512_X64=33,
|
||||
InstructionSet_AVX512v2_X64=34,
|
||||
InstructionSet_AVX512v3_X64=35,
|
||||
InstructionSet_AVX10v1_X64=36,
|
||||
InstructionSet_AVX10v2_X64=37,
|
||||
InstructionSet_AES_X64=38,
|
||||
InstructionSet_AVX512VP2INTERSECT_X64=39,
|
||||
InstructionSet_AVXIFMA_X64=40,
|
||||
InstructionSet_AVXVNNI_X64=41,
|
||||
InstructionSet_GFNI_X64=42,
|
||||
InstructionSet_SHA_X64=43,
|
||||
InstructionSet_WAITPKG_X64=44,
|
||||
InstructionSet_X86Serialize_X64=45,
|
||||
#endif // TARGET_AMD64
|
||||
#ifdef TARGET_X86
|
||||
InstructionSet_X86Base=1,
|
||||
InstructionSet_SSE42=2,
|
||||
InstructionSet_AVX=3,
|
||||
InstructionSet_AVX2=4,
|
||||
InstructionSet_AVX512=5,
|
||||
InstructionSet_AVX512v2=6,
|
||||
InstructionSet_AVX512v3=7,
|
||||
InstructionSet_AVX10v1=8,
|
||||
InstructionSet_AVX10v2=9,
|
||||
InstructionSet_APX=10,
|
||||
InstructionSet_AES=11,
|
||||
InstructionSet_AES_V256=12,
|
||||
InstructionSet_AES_V512=13,
|
||||
InstructionSet_AVX512VP2INTERSECT=14,
|
||||
InstructionSet_AVXIFMA=15,
|
||||
InstructionSet_AVXVNNI=16,
|
||||
InstructionSet_GFNI=17,
|
||||
InstructionSet_GFNI_V256=18,
|
||||
InstructionSet_GFNI_V512=19,
|
||||
InstructionSet_SHA=20,
|
||||
InstructionSet_WAITPKG=21,
|
||||
InstructionSet_X86Serialize=22,
|
||||
InstructionSet_Vector128=23,
|
||||
InstructionSet_Vector256=24,
|
||||
InstructionSet_Vector512=25,
|
||||
InstructionSet_VectorT128=26,
|
||||
InstructionSet_VectorT256=27,
|
||||
InstructionSet_VectorT512=28,
|
||||
InstructionSet_AVXVNNIINT=29,
|
||||
InstructionSet_AVXVNNIINT_V512=30,
|
||||
InstructionSet_X86Base_X64=31,
|
||||
InstructionSet_SSE42_X64=32,
|
||||
InstructionSet_AVX_X64=33,
|
||||
InstructionSet_AVX2_X64=34,
|
||||
InstructionSet_AVX512_X64=35,
|
||||
InstructionSet_AVX512v2_X64=36,
|
||||
InstructionSet_AVX512v3_X64=37,
|
||||
InstructionSet_AVX10v1_X64=38,
|
||||
InstructionSet_AVX10v2_X64=39,
|
||||
InstructionSet_AES_X64=40,
|
||||
InstructionSet_AVX512VP2INTERSECT_X64=41,
|
||||
InstructionSet_AVXIFMA_X64=42,
|
||||
InstructionSet_AVXVNNI_X64=43,
|
||||
InstructionSet_GFNI_X64=44,
|
||||
InstructionSet_SHA_X64=45,
|
||||
InstructionSet_WAITPKG_X64=46,
|
||||
InstructionSet_X86Serialize_X64=47,
|
||||
InstructionSet_AVX=2,
|
||||
InstructionSet_AVX2=3,
|
||||
InstructionSet_AVX512=4,
|
||||
InstructionSet_AVX512v2=5,
|
||||
InstructionSet_AVX512v3=6,
|
||||
InstructionSet_AVX10v1=7,
|
||||
InstructionSet_AVX10v2=8,
|
||||
InstructionSet_APX=9,
|
||||
InstructionSet_AES=10,
|
||||
InstructionSet_AES_V256=11,
|
||||
InstructionSet_AES_V512=12,
|
||||
InstructionSet_AVX512VP2INTERSECT=13,
|
||||
InstructionSet_AVXIFMA=14,
|
||||
InstructionSet_AVXVNNI=15,
|
||||
InstructionSet_GFNI=16,
|
||||
InstructionSet_GFNI_V256=17,
|
||||
InstructionSet_GFNI_V512=18,
|
||||
InstructionSet_SHA=19,
|
||||
InstructionSet_WAITPKG=20,
|
||||
InstructionSet_X86Serialize=21,
|
||||
InstructionSet_Vector128=22,
|
||||
InstructionSet_Vector256=23,
|
||||
InstructionSet_Vector512=24,
|
||||
InstructionSet_VectorT128=25,
|
||||
InstructionSet_VectorT256=26,
|
||||
InstructionSet_VectorT512=27,
|
||||
InstructionSet_AVXVNNIINT=28,
|
||||
InstructionSet_AVXVNNIINT_V512=29,
|
||||
InstructionSet_X86Base_X64=30,
|
||||
InstructionSet_AVX_X64=31,
|
||||
InstructionSet_AVX2_X64=32,
|
||||
InstructionSet_AVX512_X64=33,
|
||||
InstructionSet_AVX512v2_X64=34,
|
||||
InstructionSet_AVX512v3_X64=35,
|
||||
InstructionSet_AVX10v1_X64=36,
|
||||
InstructionSet_AVX10v2_X64=37,
|
||||
InstructionSet_AES_X64=38,
|
||||
InstructionSet_AVX512VP2INTERSECT_X64=39,
|
||||
InstructionSet_AVXIFMA_X64=40,
|
||||
InstructionSet_AVXVNNI_X64=41,
|
||||
InstructionSet_GFNI_X64=42,
|
||||
InstructionSet_SHA_X64=43,
|
||||
InstructionSet_WAITPKG_X64=44,
|
||||
InstructionSet_X86Serialize_X64=45,
|
||||
#endif // TARGET_X86
|
||||
|
||||
};
|
||||
|
@ -267,8 +263,6 @@ public:
|
|||
#ifdef TARGET_AMD64
|
||||
if (HasInstructionSet(InstructionSet_X86Base))
|
||||
AddInstructionSet(InstructionSet_X86Base_X64);
|
||||
if (HasInstructionSet(InstructionSet_SSE42))
|
||||
AddInstructionSet(InstructionSet_SSE42_X64);
|
||||
if (HasInstructionSet(InstructionSet_AVX))
|
||||
AddInstructionSet(InstructionSet_AVX_X64);
|
||||
if (HasInstructionSet(InstructionSet_AVX2))
|
||||
|
@ -395,10 +389,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
|
|||
resultflags.RemoveInstructionSet(InstructionSet_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_X86Base_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_X86Base_X64);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_SSE42) && !resultflags.HasInstructionSet(InstructionSet_SSE42_X64))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_SSE42);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_SSE42_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE42))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_SSE42_X64);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_AVX_X64))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX))
|
||||
|
@ -459,9 +449,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
|
|||
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_SSE42) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_SSE42);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_SSE42))
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_AVX))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_AVX2);
|
||||
|
@ -491,7 +479,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
|
|||
resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42))
|
||||
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
|
||||
|
@ -525,9 +513,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
|
|||
resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
|
||||
#endif // TARGET_AMD64
|
||||
#ifdef TARGET_X86
|
||||
if (resultflags.HasInstructionSet(InstructionSet_SSE42) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_SSE42);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_SSE42))
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_AVX))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_AVX2);
|
||||
|
@ -557,7 +543,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
|
|||
resultflags.RemoveInstructionSet(InstructionSet_AVXIFMA);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE42))
|
||||
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
|
||||
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
|
||||
|
@ -673,10 +659,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
|
|||
return "X86Base";
|
||||
case InstructionSet_X86Base_X64 :
|
||||
return "X86Base_X64";
|
||||
case InstructionSet_SSE42 :
|
||||
return "SSE42";
|
||||
case InstructionSet_SSE42_X64 :
|
||||
return "SSE42_X64";
|
||||
case InstructionSet_AVX :
|
||||
return "AVX";
|
||||
case InstructionSet_AVX_X64 :
|
||||
|
@ -767,8 +749,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
|
|||
#ifdef TARGET_X86
|
||||
case InstructionSet_X86Base :
|
||||
return "X86Base";
|
||||
case InstructionSet_SSE42 :
|
||||
return "SSE42";
|
||||
case InstructionSet_AVX :
|
||||
return "AVX";
|
||||
case InstructionSet_AVX2 :
|
||||
|
@ -869,11 +849,11 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
|
|||
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse42: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Sse42: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Avx: return InstructionSet_AVX;
|
||||
case READYTORUN_INSTRUCTION_Avx2: return InstructionSet_AVX2;
|
||||
case READYTORUN_INSTRUCTION_Bmi1: return InstructionSet_AVX2;
|
||||
|
@ -938,11 +918,11 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
|
|||
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse42: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_SSE42;
|
||||
case READYTORUN_INSTRUCTION_Sse42: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_X86Base;
|
||||
case READYTORUN_INSTRUCTION_Avx: return InstructionSet_AVX;
|
||||
case READYTORUN_INSTRUCTION_Avx2: return InstructionSet_AVX2;
|
||||
case READYTORUN_INSTRUCTION_Bmi1: return InstructionSet_AVX2;
|
||||
|
|
|
@ -37,11 +37,11 @@
|
|||
|
||||
#include <minipal/guid.h>
|
||||
|
||||
constexpr GUID JITEEVersionIdentifier = { /* 2d40ec46-2e41-4a8b-8349-3c1267b95821 */
|
||||
0x2d40ec46,
|
||||
0x2e41,
|
||||
0x4a8b,
|
||||
{0x83, 0x49, 0x3c, 0x12, 0x67, 0xb9, 0x58, 0x21}
|
||||
constexpr GUID JITEEVersionIdentifier = { /* 4c03a921-f305-47db-a9bb-c7ec4a1b83d8 */
|
||||
0x4c03a921,
|
||||
0xf305,
|
||||
0x47db,
|
||||
{0xa9, 0xbb, 0xc7, 0xec, 0x4a, 0x1b, 0x83, 0xd8}
|
||||
};
|
||||
|
||||
#endif // JIT_EE_VERSIONING_GUID_H
|
||||
|
|
|
@ -251,17 +251,16 @@ bool IntegralRange::Contains(int64_t value) const
|
|||
case NI_X86Base_CompareScalarUnorderedLessThan:
|
||||
case NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual:
|
||||
case NI_X86Base_CompareScalarUnorderedGreaterThan:
|
||||
case NI_SSE42_TestC:
|
||||
case NI_SSE42_TestZ:
|
||||
case NI_SSE42_TestNotZAndNotC:
|
||||
case NI_X86Base_TestC:
|
||||
case NI_X86Base_TestZ:
|
||||
case NI_X86Base_TestNotZAndNotC:
|
||||
case NI_AVX_TestC:
|
||||
case NI_AVX_TestZ:
|
||||
case NI_AVX_TestNotZAndNotC:
|
||||
return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::One};
|
||||
|
||||
case NI_X86Base_Extract:
|
||||
case NI_SSE42_Extract:
|
||||
case NI_SSE42_X64_Extract:
|
||||
case NI_X86Base_X64_Extract:
|
||||
case NI_Vector128_ToScalar:
|
||||
case NI_Vector256_ToScalar:
|
||||
case NI_Vector512_ToScalar:
|
||||
|
@ -278,8 +277,8 @@ bool IntegralRange::Contains(int64_t value) const
|
|||
case NI_AVX2_TrailingZeroCount:
|
||||
case NI_AVX2_X64_LeadingZeroCount:
|
||||
case NI_AVX2_X64_TrailingZeroCount:
|
||||
case NI_SSE42_PopCount:
|
||||
case NI_SSE42_X64_PopCount:
|
||||
case NI_X86Base_PopCount:
|
||||
case NI_X86Base_X64_PopCount:
|
||||
// Note: No advantage in using a precise range for IntegralRange.
|
||||
// Example: IntCns = 42 gives [0..127] with a non -precise range, [42,42] with a precise range.
|
||||
return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::ByteMax};
|
||||
|
|
|
@ -47,11 +47,11 @@ public:
|
|||
|
||||
private:
|
||||
#if defined(TARGET_XARCH)
|
||||
// Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
|
||||
void genSSE2BitwiseOp(GenTree* treeNode);
|
||||
// Generates intrinsic code for the given tree as "Operand BitWiseOp BitMask"
|
||||
void genIntrinsicBitwiseOp(GenTree* treeNode);
|
||||
|
||||
// Generates SSE42 code for the given tree as a round operation
|
||||
void genSSE42RoundOp(GenTreeOp* treeNode);
|
||||
// Generates intrinsic code for the given tree as a round operation
|
||||
void genIntrinsicRoundOp(GenTreeOp* treeNode);
|
||||
|
||||
instruction simdAlignedMovIns()
|
||||
{
|
||||
|
@ -941,7 +941,6 @@ protected:
|
|||
|
||||
void genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
|
||||
void genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
|
||||
void genSse42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
|
||||
void genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
|
||||
void genFmaIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
|
||||
void genPermuteVar2x(GenTreeHWIntrinsic* node, insOpts instOptions);
|
||||
|
|
|
@ -707,7 +707,7 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
|
|||
if (varTypeIsFloating(targetType))
|
||||
{
|
||||
assert(tree->OperIs(GT_NEG));
|
||||
genSSE2BitwiseOp(tree);
|
||||
genIntrinsicBitwiseOp(tree);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1447,18 +1447,7 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc
|
|||
inst_Mov(TYP_INT, reg0, opReg, /* canSkip */ false);
|
||||
|
||||
// reg1 = opRef[61:32]
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1, INS_OPTS_NONE);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool isRMW = !compiler->canUseVexEncoding();
|
||||
int8_t shuffleMask = 1; // we only need [61:32]->[31:0], the rest is not read.
|
||||
|
||||
inst_RV_RV_TT_IV(INS_pshufd, EA_8BYTE, opReg, opReg, src, shuffleMask, isRMW, INS_OPTS_NONE);
|
||||
inst_Mov(TYP_INT, reg1, opReg, /* canSkip */ false);
|
||||
}
|
||||
inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1, INS_OPTS_NONE);
|
||||
#endif // TARGET_X86
|
||||
}
|
||||
|
||||
|
@ -2474,17 +2463,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
|
|||
|
||||
inst_Mov(TYP_FLOAT, targetReg, reg0, /* canSkip */ false);
|
||||
const emitAttr size = emitTypeSize(TYP_SIMD8);
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
GetEmitter()->emitIns_SIMD_R_R_R_I(INS_pinsrd, size, targetReg, targetReg, reg1, 1, INS_OPTS_NONE);
|
||||
}
|
||||
else
|
||||
{
|
||||
regNumber tempXmm = internalRegisters.GetSingle(lclNode);
|
||||
assert(tempXmm != targetReg);
|
||||
inst_Mov(TYP_FLOAT, tempXmm, reg1, /* canSkip */ false);
|
||||
GetEmitter()->emitIns_SIMD_R_R_R(INS_punpckldq, size, targetReg, targetReg, tempXmm, INS_OPTS_NONE);
|
||||
}
|
||||
GetEmitter()->emitIns_SIMD_R_R_R_I(INS_pinsrd, size, targetReg, targetReg, reg1, 1, INS_OPTS_NONE);
|
||||
genProduceReg(lclNode);
|
||||
}
|
||||
#elif defined(TARGET_AMD64)
|
||||
|
@ -5805,8 +5784,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
|
|||
}
|
||||
|
||||
case NI_X86Base_Extract:
|
||||
case NI_SSE42_Extract:
|
||||
case NI_SSE42_X64_Extract:
|
||||
case NI_X86Base_X64_Extract:
|
||||
case NI_AVX_ExtractVector128:
|
||||
case NI_AVX2_ExtractVector128:
|
||||
case NI_AVX512_ExtractVector128:
|
||||
|
@ -5822,15 +5800,6 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
|
|||
|
||||
switch (ins)
|
||||
{
|
||||
case INS_pextrw:
|
||||
{
|
||||
// The encoding which supports containment is SSE4.1+ only
|
||||
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE42));
|
||||
|
||||
ins = INS_pextrw_sse42;
|
||||
break;
|
||||
}
|
||||
|
||||
case INS_vextractf64x2:
|
||||
{
|
||||
ins = INS_vextractf32x4;
|
||||
|
@ -7757,7 +7726,7 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const
|
|||
#endif // TARGET_AMD64
|
||||
|
||||
//-----------------------------------------------------------------------------------------
|
||||
// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
|
||||
// genIntrinsicBitwiseOp - generate intrinsic code for the given oper as "Operand BitWiseOp BitMask"
|
||||
//
|
||||
// Arguments:
|
||||
// treeNode - tree node
|
||||
|
@ -7769,7 +7738,7 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const
|
|||
// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
|
||||
// ii) tree type is floating point type.
|
||||
// iii) caller of this routine needs to call genProduceReg()
|
||||
void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
|
||||
void CodeGen::genIntrinsicBitwiseOp(GenTree* treeNode)
|
||||
{
|
||||
regNumber targetReg = treeNode->GetRegNum();
|
||||
regNumber operandReg = genConsumeReg(treeNode->gtGetOp1());
|
||||
|
@ -7800,7 +7769,7 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
|
|||
}
|
||||
else
|
||||
{
|
||||
assert(!"genSSE2BitwiseOp: unsupported oper");
|
||||
assert(!"genIntrinsicBitwiseOp: unsupported oper");
|
||||
}
|
||||
|
||||
simd16_t constValue;
|
||||
|
@ -7816,7 +7785,7 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
|
|||
}
|
||||
|
||||
//-----------------------------------------------------------------------------------------
|
||||
// genSSE42RoundOp - generate SSE42 code for the given tree as a round operation
|
||||
// genIntrinsicRoundOp - generate intrinsic code for the given tree as a round operation
|
||||
//
|
||||
// Arguments:
|
||||
// treeNode - tree node
|
||||
|
@ -7825,17 +7794,13 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
|
|||
// None
|
||||
//
|
||||
// Assumptions:
|
||||
// i) SSE4.2 is supported by the underlying hardware
|
||||
// ii) treeNode oper is a GT_INTRINSIC
|
||||
// iii) treeNode type is a floating point type
|
||||
// iv) treeNode is not used from memory
|
||||
// v) tree oper is NI_System_Math{F}_Round, _Ceiling, _Floor, or _Truncate
|
||||
// vi) caller of this routine needs to call genProduceReg()
|
||||
void CodeGen::genSSE42RoundOp(GenTreeOp* treeNode)
|
||||
// i) treeNode oper is a GT_INTRINSIC
|
||||
// ii) treeNode type is a floating point type
|
||||
// iii) treeNode is not used from memory
|
||||
// iv) tree oper is NI_System_Math{F}_Round, _Ceiling, _Floor, or _Truncate
|
||||
// v) caller of this routine needs to call genProduceReg()
|
||||
void CodeGen::genIntrinsicRoundOp(GenTreeOp* treeNode)
|
||||
{
|
||||
// i) SSE4.2 is supported by the underlying hardware
|
||||
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE42));
|
||||
|
||||
// ii) treeNode oper is a GT_INTRINSIC
|
||||
assert(treeNode->OperIs(GT_INTRINSIC));
|
||||
|
||||
|
@ -7878,7 +7843,7 @@ void CodeGen::genSSE42RoundOp(GenTreeOp* treeNode)
|
|||
|
||||
default:
|
||||
ins = INS_invalid;
|
||||
assert(!"genSSE42RoundOp: unsupported intrinsic");
|
||||
assert(!"genRoundOp: unsupported intrinsic");
|
||||
unreached();
|
||||
}
|
||||
|
||||
|
@ -7901,14 +7866,14 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
|
|||
switch (treeNode->gtIntrinsicName)
|
||||
{
|
||||
case NI_System_Math_Abs:
|
||||
genSSE2BitwiseOp(treeNode);
|
||||
genIntrinsicBitwiseOp(treeNode);
|
||||
break;
|
||||
|
||||
case NI_System_Math_Ceiling:
|
||||
case NI_System_Math_Floor:
|
||||
case NI_System_Math_Truncate:
|
||||
case NI_System_Math_Round:
|
||||
genSSE42RoundOp(treeNode->AsOp());
|
||||
genIntrinsicRoundOp(treeNode->AsOp());
|
||||
break;
|
||||
|
||||
case NI_System_Math_Sqrt:
|
||||
|
|
|
@ -6061,11 +6061,6 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
|
|||
|
||||
instructionSetFlags.AddInstructionSet(InstructionSet_X86Base);
|
||||
|
||||
if (JitConfig.EnableSSE42() != 0)
|
||||
{
|
||||
instructionSetFlags.AddInstructionSet(InstructionSet_SSE42);
|
||||
}
|
||||
|
||||
if (JitConfig.EnableAVX() != 0)
|
||||
{
|
||||
instructionSetFlags.AddInstructionSet(InstructionSet_AVX);
|
||||
|
|
|
@ -1960,24 +1960,10 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn
|
|||
simdTmpVar = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTmpVar->TypeGet());
|
||||
Range().InsertAfter(loResult, simdTmpVar);
|
||||
|
||||
GenTree* hiResult;
|
||||
if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
GenTree* one = m_compiler->gtNewIconNode(1);
|
||||
hiResult = m_compiler->gtNewSimdGetElementNode(TYP_INT, simdTmpVar, one, CORINFO_TYPE_INT, simdSize);
|
||||
|
||||
Range().InsertAfter(simdTmpVar, one, hiResult);
|
||||
}
|
||||
else
|
||||
{
|
||||
GenTree* thirtyTwo = m_compiler->gtNewIconNode(32);
|
||||
GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo,
|
||||
node->GetSimdBaseJitType(), simdSize);
|
||||
hiResult = m_compiler->gtNewSimdToScalarNode(TYP_INT, shift, CORINFO_TYPE_INT, simdSize);
|
||||
|
||||
Range().InsertAfter(simdTmpVar, thirtyTwo, shift, hiResult);
|
||||
}
|
||||
GenTree* one = m_compiler->gtNewIconNode(1);
|
||||
GenTree* hiResult = m_compiler->gtNewSimdGetElementNode(TYP_INT, simdTmpVar, one, CORINFO_TYPE_INT, simdSize);
|
||||
|
||||
Range().InsertAfter(simdTmpVar, one, hiResult);
|
||||
Range().Remove(node);
|
||||
|
||||
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
|
||||
|
|
|
@ -8292,12 +8292,8 @@ void emitter::emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, reg
|
|||
|
||||
if ((dataSize == 16) && (constValue->u64[1] == constValue->u64[0]))
|
||||
{
|
||||
if (((cnsSize == 16) && emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE42)) ||
|
||||
emitComp->compOpportunisticallyDependsOn(InstructionSet_AVX))
|
||||
{
|
||||
dataSize = 8;
|
||||
ins = (cnsSize == 16) ? INS_movddup : INS_vbroadcastsd;
|
||||
}
|
||||
dataSize = 8;
|
||||
ins = (cnsSize == 16) ? INS_movddup : INS_vbroadcastsd;
|
||||
}
|
||||
|
||||
// `vbroadcastss` fills the full SIMD register, so we can't do this last step if the
|
||||
|
|
|
@ -4024,7 +4024,6 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
|
|||
case INS_pextrd:
|
||||
case INS_pextrq:
|
||||
case INS_pextrw:
|
||||
case INS_pextrw_sse42:
|
||||
case INS_rorx:
|
||||
case INS_shlx:
|
||||
case INS_sarx:
|
||||
|
@ -7003,35 +7002,8 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu
|
|||
// Store lower 8 bytes
|
||||
emitIns_S_R(INS_movsd_simd, EA_8BYTE, dataReg, varNum, offset);
|
||||
|
||||
if (emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// Extract and store upper 4 bytes
|
||||
emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2);
|
||||
}
|
||||
else if (tmpRegProvider != nullptr)
|
||||
{
|
||||
regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider);
|
||||
assert(isFloatReg(tmpReg));
|
||||
|
||||
// Extract upper 4 bytes from data
|
||||
emitIns_R_R(INS_movhlps, EA_16BYTE, tmpReg, dataReg);
|
||||
|
||||
// Store upper 4 bytes
|
||||
emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, varNum, offset + 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
// We don't have temp regs - let's do two shuffles then
|
||||
|
||||
// [0,1,2,3] -> [2,3,0,1]
|
||||
emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78);
|
||||
|
||||
// Store upper 4 bytes
|
||||
emitIns_S_R(INS_movss, EA_4BYTE, dataReg, varNum, offset + 8);
|
||||
|
||||
// Restore dataReg to its previous state: [2,3,0,1] -> [0,1,2,3]
|
||||
emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78);
|
||||
}
|
||||
// Extract and store upper 4 bytes
|
||||
emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2);
|
||||
}
|
||||
#endif // FEATURE_SIMD
|
||||
|
||||
|
@ -13628,7 +13600,6 @@ void emitter::emitDispIns(
|
|||
case INS_extractps:
|
||||
case INS_pextrb:
|
||||
case INS_pextrw:
|
||||
case INS_pextrw_sse42:
|
||||
case INS_pextrd:
|
||||
{
|
||||
tgtAttr = EA_4BYTE;
|
||||
|
|
|
@ -1182,8 +1182,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
|
|||
case NI_AVX2_TrailingZeroCount:
|
||||
case NI_AVX2_X64_LeadingZeroCount:
|
||||
case NI_AVX2_X64_TrailingZeroCount:
|
||||
case NI_SSE42_PopCount:
|
||||
case NI_SSE42_X64_PopCount:
|
||||
case NI_X86Base_PopCount:
|
||||
case NI_X86Base_X64_PopCount:
|
||||
case NI_Vector256_Create:
|
||||
case NI_Vector512_Create:
|
||||
case NI_Vector256_CreateScalar:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -942,7 +942,6 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
|
|||
// clang-format off
|
||||
#if defined(TARGET_XARCH)
|
||||
{ FIRST_NI_X86Base, LAST_NI_X86Base }, // X86Base
|
||||
{ FIRST_NI_SSE42, LAST_NI_SSE42 }, // SSE42
|
||||
{ FIRST_NI_AVX, LAST_NI_AVX }, // AVX
|
||||
{ FIRST_NI_AVX2, LAST_NI_AVX2 }, // AVX2
|
||||
{ FIRST_NI_AVX512, LAST_NI_AVX512 }, // AVX512
|
||||
|
@ -973,7 +972,6 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
|
|||
{ FIRST_NI_AVXVNNIINT_V512, LAST_NI_AVXVNNIINT_V512 }, // AVXVNNIINT_V512
|
||||
|
||||
{ FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, // X86Base_X64
|
||||
{ FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 }, // SSE42_X64
|
||||
{ NI_Illegal, NI_Illegal }, // AVX_X64
|
||||
{ FIRST_NI_AVX2_X64, LAST_NI_AVX2_X64 }, // AVX2_X64
|
||||
{ FIRST_NI_AVX512_X64, LAST_NI_AVX512_X64 }, // AVX512_X64
|
||||
|
@ -2265,9 +2263,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
|
|||
#if defined(TARGET_XARCH)
|
||||
switch (intrinsic)
|
||||
{
|
||||
case NI_SSE42_ConvertToVector128Int16:
|
||||
case NI_SSE42_ConvertToVector128Int32:
|
||||
case NI_SSE42_ConvertToVector128Int64:
|
||||
case NI_X86Base_ConvertToVector128Int16:
|
||||
case NI_X86Base_ConvertToVector128Int32:
|
||||
case NI_X86Base_ConvertToVector128Int64:
|
||||
case NI_AVX2_BroadcastScalarToVector128:
|
||||
case NI_AVX2_BroadcastScalarToVector256:
|
||||
case NI_AVX2_ConvertToVector256Int16:
|
||||
|
@ -2323,7 +2321,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
|
|||
: gtNewSimdHWIntrinsicNode(nodeRetType, op1, op2, intrinsic, simdBaseJitType, simdSize);
|
||||
|
||||
#ifdef TARGET_XARCH
|
||||
if ((intrinsic == NI_SSE42_Crc32) || (intrinsic == NI_SSE42_X64_Crc32))
|
||||
if ((intrinsic == NI_X86Base_Crc32) || (intrinsic == NI_X86Base_X64_Crc32))
|
||||
{
|
||||
// TODO-XArch-Cleanup: currently we use the simdBaseJitType to bring the type of the second argument
|
||||
// to the code generator. May encode the overload info in other way.
|
||||
|
|
|
@ -869,7 +869,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
|
|||
{
|
||||
switch (intrinsicId)
|
||||
{
|
||||
case NI_SSE42_BlendVariable:
|
||||
case NI_X86Base_BlendVariable:
|
||||
case NI_AVX_BlendVariable:
|
||||
case NI_AVX2_BlendVariable:
|
||||
case NI_AVX512_BlendVariableMask:
|
||||
|
@ -1005,13 +1005,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
|
|||
break;
|
||||
}
|
||||
|
||||
case InstructionSet_SSE42:
|
||||
case InstructionSet_SSE42_X64:
|
||||
{
|
||||
genSse42Intrinsic(node, instOptions);
|
||||
break;
|
||||
}
|
||||
|
||||
case InstructionSet_AVX:
|
||||
case InstructionSet_AVX2:
|
||||
case InstructionSet_AVX2_X64:
|
||||
|
@ -1908,19 +1901,9 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
|
||||
if (!canCombineLoad)
|
||||
{
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, loPart, instOptions);
|
||||
inst_RV_RV_TT_IV(INS_pinsrd, EA_16BYTE, targetReg, targetReg, hiPart, 0x01,
|
||||
!compiler->canUseVexEncoding(), instOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
regNumber tmpReg = internalRegisters.GetSingle(node);
|
||||
genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, loPart, instOptions);
|
||||
genHWIntrinsic_R_RM(node, ins, baseAttr, tmpReg, hiPart, instOptions);
|
||||
emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, targetReg, tmpReg, instOptions);
|
||||
}
|
||||
genHWIntrinsic_R_RM(node, ins, baseAttr, targetReg, loPart, instOptions);
|
||||
inst_RV_RV_TT_IV(INS_pinsrd, EA_16BYTE, targetReg, targetReg, hiPart, 0x01,
|
||||
!compiler->canUseVexEncoding(), instOptions);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1961,26 +1944,17 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
|
||||
if (baseType == TYP_FLOAT)
|
||||
{
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// insertps imm8 is:
|
||||
// * Bits 0-3: zmask
|
||||
// * Bits 4-5: count_d
|
||||
// * Bits 6-7: count_s (register form only)
|
||||
//
|
||||
// We want zmask 0b1110 (0xE) to zero elements 1/2/3
|
||||
// We want count_d 0b00 (0x0) to insert the value to element 0
|
||||
// We want count_s 0b00 (0x0) as we're just taking element 0 of the source
|
||||
// insertps imm8 is:
|
||||
// * Bits 0-3: zmask
|
||||
// * Bits 4-5: count_d
|
||||
// * Bits 6-7: count_s (register form only)
|
||||
//
|
||||
// We want zmask 0b1110 (0xE) to zero elements 1/2/3
|
||||
// We want count_d 0b00 (0x0) to insert the value to element 0
|
||||
// We want count_s 0b00 (0x0) as we're just taking element 0 of the source
|
||||
|
||||
emit->emitIns_SIMD_R_R_R_I(INS_insertps, attr, targetReg, targetReg, op1Reg, 0x0E,
|
||||
instOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(targetReg != op1Reg);
|
||||
emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg, instOptions);
|
||||
emit->emitIns_Mov(INS_movss, attr, targetReg, op1Reg, /* canSkip */ false);
|
||||
}
|
||||
emit->emitIns_SIMD_R_R_R_I(INS_insertps, attr, targetReg, targetReg, op1Reg, 0x0E,
|
||||
instOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2145,15 +2119,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
{
|
||||
if (ival == 1)
|
||||
{
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
emit->emitIns_R_R(INS_movshdup, attr, targetReg, op1Reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
emit->emitIns_SIMD_R_R_R_I(INS_shufps, attr, targetReg, op1Reg, op1Reg,
|
||||
static_cast<int8_t>(0x55), instOptions);
|
||||
}
|
||||
emit->emitIns_R_R(INS_movshdup, attr, targetReg, op1Reg);
|
||||
}
|
||||
else if (ival == 2)
|
||||
{
|
||||
|
@ -2564,40 +2530,11 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreached();
|
||||
break;
|
||||
}
|
||||
|
||||
genProduceReg(node);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// genSse42Intrinsic: Generates the code for an SSE4.2 hardware intrinsic node
|
||||
//
|
||||
// Arguments:
|
||||
// node - The hardware intrinsic node
|
||||
//
|
||||
void CodeGen::genSse42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
||||
{
|
||||
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
|
||||
regNumber targetReg = node->GetRegNum();
|
||||
GenTree* op1 = node->Op(1);
|
||||
var_types baseType = node->GetSimdBaseType();
|
||||
var_types targetType = node->TypeGet();
|
||||
emitter* emit = GetEmitter();
|
||||
|
||||
assert(targetReg != REG_NA);
|
||||
assert(!node->OperIsCommutative());
|
||||
|
||||
genConsumeMultiOpOperands(node);
|
||||
|
||||
switch (intrinsicId)
|
||||
{
|
||||
case NI_SSE42_ConvertToVector128Int16:
|
||||
case NI_SSE42_ConvertToVector128Int32:
|
||||
case NI_SSE42_ConvertToVector128Int64:
|
||||
case NI_X86Base_ConvertToVector128Int16:
|
||||
case NI_X86Base_ConvertToVector128Int32:
|
||||
case NI_X86Base_ConvertToVector128Int64:
|
||||
{
|
||||
GenTree* op1 = node->Op(1);
|
||||
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
|
||||
|
||||
if (!varTypeIsSIMD(op1->TypeGet()))
|
||||
|
@ -2614,12 +2551,13 @@ void CodeGen::genSse42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
break;
|
||||
}
|
||||
|
||||
case NI_SSE42_Crc32:
|
||||
case NI_SSE42_X64_Crc32:
|
||||
case NI_X86Base_Crc32:
|
||||
case NI_X86Base_X64_Crc32:
|
||||
{
|
||||
assert(instOptions == INS_OPTS_NONE);
|
||||
|
||||
instruction ins = INS_crc32;
|
||||
GenTree* op1 = node->Op(1);
|
||||
regNumber op1Reg = op1->GetRegNum();
|
||||
GenTree* op2 = node->Op(2);
|
||||
|
||||
|
@ -2671,12 +2609,11 @@ void CodeGen::genSse42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
break;
|
||||
}
|
||||
|
||||
case NI_SSE42_Extract:
|
||||
case NI_SSE42_X64_Extract:
|
||||
case NI_X86Base_Extract:
|
||||
case NI_X86Base_X64_Extract:
|
||||
{
|
||||
assert(!varTypeIsFloating(baseType));
|
||||
|
||||
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
|
||||
GenTree* op1 = node->Op(1);
|
||||
GenTree* op2 = node->Op(2);
|
||||
emitAttr attr = emitActualTypeSize(targetType);
|
||||
|
||||
|
@ -2703,18 +2640,16 @@ void CodeGen::genSse42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
|
|||
break;
|
||||
}
|
||||
|
||||
case NI_SSE42_PopCount:
|
||||
case NI_SSE42_X64_PopCount:
|
||||
case NI_X86Base_PopCount:
|
||||
case NI_X86Base_X64_PopCount:
|
||||
{
|
||||
genXCNTIntrinsic(node, INS_popcnt);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
unreached();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
genProduceReg(node);
|
||||
|
|
|
@ -419,20 +419,27 @@ HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift,
|
|||
// ISA Function name SIMD size NumArg Instructions Category Flags
|
||||
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// Intrinsics for X86Base, SSE, SSE2
|
||||
#define FIRST_NI_X86Base NI_X86Base_Add
|
||||
// Intrinsics for X86Base, SSE, SSE2, SSE3, SSSE3, SSE41, SSE42, POPCNT
|
||||
#define FIRST_NI_X86Base NI_X86Base_Abs
|
||||
HARDWARE_INTRINSIC(X86Base, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, AddSubtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, AlignRight, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, Blend, 16, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, BlendVariable, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, CeilingScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
|
@ -473,39 +480,59 @@ HARDWARE_INTRINSIC(X86Base, ConvertToInt32,
|
|||
HARDWARE_INTRINSIC(X86Base, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int16, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int64, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad)
|
||||
HARDWARE_INTRINSIC(X86Base, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Crc32, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic)
|
||||
HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic)
|
||||
HARDWARE_INTRINSIC(X86Base, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(X86Base, DotProduct, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_pextrw, INS_pextrw, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, FloorScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, HorizontalSubtract, 16, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, HorizontalSubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, {INS_pinsrb, INS_pinsrb, INS_pinsrw, INS_pinsrw, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128NonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadAndDuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadDquVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
|
||||
HARDWARE_INTRINSIC(X86Base, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
|
||||
HARDWARE_INTRINSIC(X86Base, Max, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
|
||||
HARDWARE_INTRINSIC(X86Base, Max, 16, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier)
|
||||
HARDWARE_INTRINSIC(X86Base, Min, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
|
||||
HARDWARE_INTRINSIC(X86Base, Min, 16, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MinHorizontal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, MoveAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, MoveHighAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
|
||||
HARDWARE_INTRINSIC(X86Base, MoveLowAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
|
||||
HARDWARE_INTRINSIC(X86Base, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
|
||||
HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MultipleSumAbsoluteDifferences, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MultiplyHighRoundScale, 16, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(X86Base, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other)
|
||||
HARDWARE_INTRINSIC(X86Base, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
|
||||
HARDWARE_INTRINSIC(X86Base, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other)
|
||||
HARDWARE_INTRINSIC(X86Base, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other)
|
||||
HARDWARE_INTRINSIC(X86Base, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other)
|
||||
|
@ -514,14 +541,25 @@ HARDWARE_INTRINSIC(X86Base, Reciprocal,
|
|||
HARDWARE_INTRINSIC(X86Base, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundCurrentDirection, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundCurrentDirectionScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToNearestInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToNearestIntegerScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, RoundToZeroScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
|
||||
HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
|
||||
HARDWARE_INTRINSIC(X86Base, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
|
||||
HARDWARE_INTRINSIC(X86Base, ShiftRightLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
|
||||
HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM)
|
||||
HARDWARE_INTRINSIC(X86Base, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
|
||||
HARDWARE_INTRINSIC(X86Base, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
|
||||
HARDWARE_INTRINSIC(X86Base, Sign, 16, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg)
|
||||
|
@ -536,6 +574,9 @@ HARDWARE_INTRINSIC(X86Base, Subtract,
|
|||
HARDWARE_INTRINSIC(X86Base, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(X86Base, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, TestC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, TestNotZAndNotC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, TestZ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(X86Base, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(X86Base, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt)
|
||||
|
@ -545,7 +586,7 @@ HARDWARE_INTRINSIC(X86Base, Xor,
|
|||
// ISA Function name SIMD size NumArg Instructions Category Flags
|
||||
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// 64-bit only Intrinsics for X86Base, SSE, SSE2
|
||||
// 64-bit only Intrinsics for X86Base, SSE, SSE2, SSE3, SSSE3, SSE41, SSE42, POPCNT
|
||||
#define FIRST_NI_X86Base_X64 NI_X86Base_X64_BitScanForward
|
||||
HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
|
||||
|
@ -556,88 +597,14 @@ HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128UInt64,
|
|||
HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_cvttsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, Crc32, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, Extract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, Insert, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
|
||||
HARDWARE_INTRINSIC(X86Base_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
|
||||
#define LAST_NI_X86Base_X64 NI_X86Base_X64_StoreNonTemporal
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// ISA Function name SIMD size NumArg Instructions Category Flags
|
||||
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// Intrinsics for SSE3, SSSE3, SSE41, SSE42, POPCNT
|
||||
#define FIRST_NI_SSE42 NI_SSE42_Abs
|
||||
HARDWARE_INTRINSIC(SSE42, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, AddSubtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, AlignRight, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE42, Blend, 16, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, BlendVariable, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(SSE42, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, CeilingScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE42, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(SSE42, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(SSE42, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport)
|
||||
HARDWARE_INTRINSIC(SSE42, ConvertToVector128Int16, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad)
|
||||
HARDWARE_INTRINSIC(SSE42, ConvertToVector128Int32, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad)
|
||||
HARDWARE_INTRINSIC(SSE42, ConvertToVector128Int64, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad)
|
||||
HARDWARE_INTRINSIC(SSE42, Crc32, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic)
|
||||
HARDWARE_INTRINSIC(SSE42, DotProduct, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, FloorScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE42, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, HorizontalSubtract, 16, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, HorizontalSubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, Insert, 16, 3, {INS_pinsrb, INS_pinsrb, INS_invalid, INS_invalid, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(SSE42, LoadAlignedVector128NonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE42, LoadAndDuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, LoadDquVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE42, Max, 16, 2, {INS_pmaxsb, INS_invalid, INS_invalid, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(SSE42, Min, 16, 2, {INS_pminsb, INS_invalid, INS_invalid, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(SSE42, MinHorizontal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, MoveAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, MoveHighAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, MoveLowAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, MultipleSumAbsoluteDifferences, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(SSE42, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(SSE42, MultiplyHighRoundScale, 16, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(SSE42, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
HARDWARE_INTRINSIC(SSE42, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(SSE42, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundCurrentDirection, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundCurrentDirectionScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToNearestInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToNearestIntegerScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToNegativeInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToPositiveInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, RoundToZeroScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE42, Shuffle, 16, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(SSE42, Sign, 16, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, TestC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE42, TestNotZAndNotC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE42, TestZ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt)
|
||||
#define LAST_NI_SSE42 NI_SSE42_TestZ
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// ISA Function name SIMD size NumArg Instructions Category Flags
|
||||
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// 64-bit only Intrinsics for SSE3, SSSE3, SSE41, SSE42, POPCNT
|
||||
#define FIRST_NI_SSE42_X64 NI_SSE42_X64_Crc32
|
||||
HARDWARE_INTRINSIC(SSE42_X64, Crc32, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic)
|
||||
HARDWARE_INTRINSIC(SSE42_X64, Extract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42_X64, Insert, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(SSE42_X64, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
|
||||
#define LAST_NI_SSE42_X64 NI_SSE42_X64_PopCount
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// ISA Function name SIMD size NumArg Instructions Category Flags
|
||||
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// Intrinsics for AVX
|
||||
#define FIRST_NI_AVX NI_AVX_Add
|
||||
HARDWARE_INTRINSIC(AVX, Add, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
|
||||
|
@ -1208,8 +1175,8 @@ HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply,
|
|||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
// Special intrinsics that are generated during lowering
|
||||
HARDWARE_INTRINSIC(X86Base, COMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(X86Base, UCOMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
|
||||
HARDWARE_INTRINSIC(SSE42, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(AVX2, AndNotVector, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX2, AndNotScalar, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics)
|
||||
|
|
|
@ -20,8 +20,6 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
|
|||
{
|
||||
case InstructionSet_X86Base:
|
||||
return InstructionSet_X86Base_X64;
|
||||
case InstructionSet_SSE42:
|
||||
return InstructionSet_SSE42_X64;
|
||||
case InstructionSet_AVX:
|
||||
return InstructionSet_AVX_X64;
|
||||
case InstructionSet_AVX2:
|
||||
|
@ -333,7 +331,7 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className)
|
|||
}
|
||||
else if (strcmp(className + 1, "opcnt") == 0)
|
||||
{
|
||||
return InstructionSet_SSE42;
|
||||
return InstructionSet_X86Base;
|
||||
}
|
||||
}
|
||||
else if (className[0] == 'S')
|
||||
|
@ -350,20 +348,20 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className)
|
|||
}
|
||||
else if (strcmp(className + 3, "3") == 0)
|
||||
{
|
||||
return InstructionSet_SSE42;
|
||||
return InstructionSet_X86Base;
|
||||
}
|
||||
else if (strcmp(className + 3, "41") == 0)
|
||||
{
|
||||
return InstructionSet_SSE42;
|
||||
return InstructionSet_X86Base;
|
||||
}
|
||||
else if (strcmp(className + 3, "42") == 0)
|
||||
{
|
||||
return InstructionSet_SSE42;
|
||||
return InstructionSet_X86Base;
|
||||
}
|
||||
}
|
||||
else if (strcmp(className + 1, "sse3") == 0)
|
||||
{
|
||||
return InstructionSet_SSE42;
|
||||
return InstructionSet_X86Base;
|
||||
}
|
||||
}
|
||||
else if (className[0] == 'V')
|
||||
|
@ -1054,54 +1052,54 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim
|
|||
return static_cast<int>(FloatComparisonMode::UnorderedNonSignaling);
|
||||
}
|
||||
|
||||
case NI_SSE42_Ceiling:
|
||||
case NI_SSE42_CeilingScalar:
|
||||
case NI_X86Base_Ceiling:
|
||||
case NI_X86Base_CeilingScalar:
|
||||
case NI_AVX_Ceiling:
|
||||
{
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
case NI_SSE42_RoundToPositiveInfinity:
|
||||
case NI_SSE42_RoundToPositiveInfinityScalar:
|
||||
case NI_X86Base_RoundToPositiveInfinity:
|
||||
case NI_X86Base_RoundToPositiveInfinityScalar:
|
||||
case NI_AVX_RoundToPositiveInfinity:
|
||||
{
|
||||
assert(varTypeIsFloating(simdBaseType));
|
||||
return static_cast<int>(FloatRoundingMode::ToPositiveInfinity);
|
||||
}
|
||||
|
||||
case NI_SSE42_Floor:
|
||||
case NI_SSE42_FloorScalar:
|
||||
case NI_X86Base_Floor:
|
||||
case NI_X86Base_FloorScalar:
|
||||
case NI_AVX_Floor:
|
||||
{
|
||||
FALLTHROUGH;
|
||||
}
|
||||
|
||||
case NI_SSE42_RoundToNegativeInfinity:
|
||||
case NI_SSE42_RoundToNegativeInfinityScalar:
|
||||
case NI_X86Base_RoundToNegativeInfinity:
|
||||
case NI_X86Base_RoundToNegativeInfinityScalar:
|
||||
case NI_AVX_RoundToNegativeInfinity:
|
||||
{
|
||||
assert(varTypeIsFloating(simdBaseType));
|
||||
return static_cast<int>(FloatRoundingMode::ToNegativeInfinity);
|
||||
}
|
||||
|
||||
case NI_SSE42_RoundCurrentDirection:
|
||||
case NI_SSE42_RoundCurrentDirectionScalar:
|
||||
case NI_X86Base_RoundCurrentDirection:
|
||||
case NI_X86Base_RoundCurrentDirectionScalar:
|
||||
case NI_AVX_RoundCurrentDirection:
|
||||
{
|
||||
assert(varTypeIsFloating(simdBaseType));
|
||||
return static_cast<int>(FloatRoundingMode::CurrentDirection);
|
||||
}
|
||||
|
||||
case NI_SSE42_RoundToNearestInteger:
|
||||
case NI_SSE42_RoundToNearestIntegerScalar:
|
||||
case NI_X86Base_RoundToNearestInteger:
|
||||
case NI_X86Base_RoundToNearestIntegerScalar:
|
||||
case NI_AVX_RoundToNearestInteger:
|
||||
{
|
||||
assert(varTypeIsFloating(simdBaseType));
|
||||
return static_cast<int>(FloatRoundingMode::ToNearestInteger);
|
||||
}
|
||||
|
||||
case NI_SSE42_RoundToZero:
|
||||
case NI_SSE42_RoundToZeroScalar:
|
||||
case NI_X86Base_RoundToZero:
|
||||
case NI_X86Base_RoundToZeroScalar:
|
||||
case NI_AVX_RoundToZero:
|
||||
{
|
||||
assert(varTypeIsFloating(simdBaseType));
|
||||
|
@ -1803,11 +1801,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
break;
|
||||
}
|
||||
|
||||
if ((simdSize < 32) && !compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
op1 = impSIMDPopStack();
|
||||
retNode = gtNewSimdCeilNode(retType, op1, simdBaseJitType, simdSize);
|
||||
break;
|
||||
|
@ -1862,11 +1855,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
assert(sig->numArgs == 1);
|
||||
assert(simdBaseType == TYP_FLOAT);
|
||||
|
||||
if (compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
op1 = impSIMDPopStack();
|
||||
retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
|
||||
}
|
||||
op1 = impSIMDPopStack();
|
||||
retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_INT, simdBaseJitType, simdSize);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2326,8 +2316,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
op2 = impSIMDPopStack();
|
||||
op1 = impSIMDPopStack();
|
||||
|
||||
if ((simdSize == 64) || varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType) ||
|
||||
(varTypeIsInt(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_SSE42)))
|
||||
if ((simdSize == 64) || varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType))
|
||||
{
|
||||
// The lowering for Dot doesn't handle these cases, so import as Sum(left * right)
|
||||
retNode = gtNewSimdBinOpNode(GT_MUL, simdType, op1, op2, simdBaseJitType, simdSize);
|
||||
|
@ -2467,14 +2456,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
assert(op1 != nullptr);
|
||||
|
||||
retNode = gtNewSimdHWIntrinsicNode(retType, op1, moveMaskIntrinsic, simdBaseJitType, simdSize);
|
||||
|
||||
if ((simdSize == 16) && varTypeIsShort(simdBaseType))
|
||||
{
|
||||
if (!compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2491,11 +2472,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
break;
|
||||
}
|
||||
|
||||
if ((simdSize < 32) && !compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
op1 = impSIMDPopStack();
|
||||
retNode = gtNewSimdFloorNode(retType, op1, simdBaseJitType, simdSize);
|
||||
break;
|
||||
|
@ -2561,41 +2537,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
{
|
||||
assert(sig->numArgs == 2);
|
||||
|
||||
op2 = impStackTop(0).val;
|
||||
|
||||
switch (simdBaseType)
|
||||
{
|
||||
case TYP_BYTE:
|
||||
case TYP_UBYTE:
|
||||
case TYP_INT:
|
||||
case TYP_UINT:
|
||||
case TYP_LONG:
|
||||
case TYP_ULONG:
|
||||
{
|
||||
if (!op2->IsIntegralConst(0) && !compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// Using software fallback if simdBaseType is not supported by hardware
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case TYP_DOUBLE:
|
||||
case TYP_FLOAT:
|
||||
case TYP_SHORT:
|
||||
case TYP_USHORT:
|
||||
{
|
||||
// short/ushort/float/double is supported by SSE2
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
unreached();
|
||||
}
|
||||
}
|
||||
|
||||
impPopStack();
|
||||
op2 = impPopStack().val;
|
||||
op1 = impSIMDPopStack();
|
||||
|
||||
retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize);
|
||||
|
@ -2752,10 +2694,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
{
|
||||
assert(sig->numArgs == 1);
|
||||
|
||||
if ((simdSize == 16) && !compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2))
|
||||
{
|
||||
break;
|
||||
|
@ -3586,11 +3524,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
break;
|
||||
}
|
||||
|
||||
if ((simdSize < 32) && !compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
op1 = impSIMDPopStack();
|
||||
retNode = gtNewSimdRoundNode(retType, op1, simdBaseJitType, simdSize);
|
||||
break;
|
||||
|
@ -3981,11 +3914,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
break;
|
||||
}
|
||||
|
||||
if ((simdSize < 32) && !compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
op1 = impSIMDPopStack();
|
||||
retNode = gtNewSimdTruncNode(retType, op1, simdBaseJitType, simdSize);
|
||||
break;
|
||||
|
@ -4040,42 +3968,17 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
case NI_Vector512_WithElement:
|
||||
{
|
||||
assert(sig->numArgs == 3);
|
||||
GenTree* indexOp = impStackTop(1).val;
|
||||
|
||||
switch (simdBaseType)
|
||||
if (varTypeIsLong(simdBaseType))
|
||||
{
|
||||
// Using software fallback if simdBaseType is not supported by hardware
|
||||
case TYP_BYTE:
|
||||
case TYP_UBYTE:
|
||||
case TYP_INT:
|
||||
case TYP_UINT:
|
||||
if (!compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case TYP_LONG:
|
||||
case TYP_ULONG:
|
||||
if (!compOpportunisticallyDependsOn(InstructionSet_SSE42_X64))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case TYP_DOUBLE:
|
||||
case TYP_FLOAT:
|
||||
case TYP_SHORT:
|
||||
case TYP_USHORT:
|
||||
// short/ushort/float/double is supported by SSE2
|
||||
break;
|
||||
|
||||
default:
|
||||
unreached();
|
||||
if (!compOpportunisticallyDependsOn(InstructionSet_X86Base_X64))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
GenTree* valueOp = impPopStack().val;
|
||||
impPopStack(); // Pop the indexOp now that we know its valid
|
||||
GenTree* valueOp = impPopStack().val;
|
||||
GenTree* indexOp = impPopStack().val;
|
||||
GenTree* vectorOp = impSIMDPopStack();
|
||||
|
||||
retNode = gtNewSimdWithElementNode(retType, vectorOp, indexOp, valueOp, simdBaseJitType, simdSize);
|
||||
|
@ -4914,7 +4817,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
break;
|
||||
}
|
||||
|
||||
case NI_SSE42_BlendVariable:
|
||||
case NI_X86Base_BlendVariable:
|
||||
case NI_AVX_BlendVariable:
|
||||
case NI_AVX2_BlendVariable:
|
||||
case NI_AVX512_BlendVariable:
|
||||
|
@ -5021,7 +4924,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
}
|
||||
|
||||
case NI_X86Base_CompareEqual:
|
||||
case NI_SSE42_CompareEqual:
|
||||
case NI_AVX_CompareEqual:
|
||||
case NI_AVX2_CompareEqual:
|
||||
case NI_AVX512_CompareEqual:
|
||||
|
@ -5042,7 +4944,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
}
|
||||
|
||||
case NI_X86Base_CompareGreaterThan:
|
||||
case NI_SSE42_CompareGreaterThan:
|
||||
case NI_AVX_CompareGreaterThan:
|
||||
case NI_AVX2_CompareGreaterThan:
|
||||
case NI_AVX512_CompareGreaterThan:
|
||||
|
@ -5082,7 +4983,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
|
|||
}
|
||||
|
||||
case NI_X86Base_CompareLessThan:
|
||||
case NI_SSE42_CompareLessThan:
|
||||
case NI_AVX_CompareLessThan:
|
||||
case NI_AVX2_CompareLessThan:
|
||||
case NI_AVX512_CompareLessThan:
|
||||
|
|
|
@ -5908,27 +5908,24 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
|
|||
|
||||
#if defined(FEATURE_HW_INTRINSICS)
|
||||
#if defined(TARGET_XARCH)
|
||||
if (compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
GenTree* op2 = impPopStack().val;
|
||||
GenTree* op1 = impPopStack().val;
|
||||
|
||||
if (varTypeIsLong(baseType))
|
||||
{
|
||||
GenTree* op2 = impPopStack().val;
|
||||
GenTree* op1 = impPopStack().val;
|
||||
|
||||
if (varTypeIsLong(baseType))
|
||||
{
|
||||
hwintrinsic = NI_SSE42_X64_Crc32;
|
||||
op1 = gtFoldExpr(gtNewCastNode(baseType, op1, /* unsigned */ true, baseType));
|
||||
}
|
||||
else
|
||||
{
|
||||
hwintrinsic = NI_SSE42_Crc32;
|
||||
baseType = genActualType(baseType);
|
||||
}
|
||||
|
||||
result = gtNewScalarHWIntrinsicNode(baseType, op1, op2, hwintrinsic);
|
||||
|
||||
// We use the simdBaseJitType to bring the type of the second argument to codegen
|
||||
result->AsHWIntrinsic()->SetSimdBaseJitType(baseJitType);
|
||||
hwintrinsic = NI_X86Base_X64_Crc32;
|
||||
op1 = gtFoldExpr(gtNewCastNode(baseType, op1, /* unsigned */ true, baseType));
|
||||
}
|
||||
else
|
||||
{
|
||||
hwintrinsic = NI_X86Base_Crc32;
|
||||
baseType = genActualType(baseType);
|
||||
}
|
||||
|
||||
result = gtNewScalarHWIntrinsicNode(baseType, op1, op2, hwintrinsic);
|
||||
|
||||
// We use the simdBaseJitType to bring the type of the second argument to codegen
|
||||
result->AsHWIntrinsic()->SetSimdBaseJitType(baseJitType);
|
||||
#elif defined(TARGET_ARM64)
|
||||
if (compOpportunisticallyDependsOn(InstructionSet_Crc32))
|
||||
{
|
||||
|
@ -6173,14 +6170,11 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
|
|||
}
|
||||
#elif defined(FEATURE_HW_INTRINSICS)
|
||||
#if defined(TARGET_XARCH)
|
||||
if (compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// Pop the value from the stack
|
||||
impPopStack();
|
||||
// Pop the value from the stack
|
||||
impPopStack();
|
||||
|
||||
hwintrinsic = varTypeIsLong(baseType) ? NI_SSE42_X64_PopCount : NI_SSE42_PopCount;
|
||||
result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic);
|
||||
}
|
||||
hwintrinsic = varTypeIsLong(baseType) ? NI_X86Base_X64_PopCount : NI_X86Base_PopCount;
|
||||
result = gtNewScalarHWIntrinsicNode(baseType, op1, hwintrinsic);
|
||||
#elif defined(TARGET_ARM64)
|
||||
// TODO-ARM64-CQ: PopCount should be handled as an intrinsic for non-constant cases
|
||||
#endif // TARGET_*
|
||||
|
@ -8153,6 +8147,8 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
|
|||
// instructions to directly compute round/ceiling/floor/truncate.
|
||||
|
||||
case NI_System_Math_Abs:
|
||||
case NI_System_Math_Ceiling:
|
||||
case NI_System_Math_Floor:
|
||||
case NI_System_Math_Max:
|
||||
case NI_System_Math_MaxMagnitude:
|
||||
case NI_System_Math_MaxMagnitudeNumber:
|
||||
|
@ -8166,14 +8162,10 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
|
|||
case NI_System_Math_MultiplyAddEstimate:
|
||||
case NI_System_Math_ReciprocalEstimate:
|
||||
case NI_System_Math_ReciprocalSqrtEstimate:
|
||||
case NI_System_Math_Sqrt:
|
||||
return true;
|
||||
|
||||
case NI_System_Math_Ceiling:
|
||||
case NI_System_Math_Floor:
|
||||
case NI_System_Math_Round:
|
||||
case NI_System_Math_Sqrt:
|
||||
case NI_System_Math_Truncate:
|
||||
return compOpportunisticallyDependsOn(InstructionSet_SSE42);
|
||||
return true;
|
||||
|
||||
case NI_System_Math_FusedMultiplyAdd:
|
||||
return compOpportunisticallyDependsOn(InstructionSet_AVX2);
|
||||
|
|
|
@ -1103,7 +1103,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(instruction ins, GenTree* op)
|
|||
var_types simdBaseType = hwintrinsic->GetSimdBaseType();
|
||||
switch (intrinsicId)
|
||||
{
|
||||
case NI_SSE42_LoadAndDuplicateToVector128:
|
||||
case NI_X86Base_LoadAndDuplicateToVector128:
|
||||
case NI_AVX_BroadcastScalarToVector128:
|
||||
case NI_AVX_BroadcastScalarToVector256:
|
||||
{
|
||||
|
@ -1127,13 +1127,13 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(instruction ins, GenTree* op)
|
|||
}
|
||||
}
|
||||
|
||||
case NI_SSE42_MoveAndDuplicate:
|
||||
case NI_X86Base_MoveAndDuplicate:
|
||||
case NI_AVX2_BroadcastScalarToVector128:
|
||||
case NI_AVX2_BroadcastScalarToVector256:
|
||||
case NI_AVX512_BroadcastScalarToVector512:
|
||||
{
|
||||
assert(hwintrinsic->isContained());
|
||||
if (intrinsicId == NI_SSE42_MoveAndDuplicate)
|
||||
if (intrinsicId == NI_X86Base_MoveAndDuplicate)
|
||||
{
|
||||
assert(simdBaseType == TYP_DOUBLE);
|
||||
}
|
||||
|
|
|
@ -217,15 +217,21 @@ INSTMUL(imul_31, "imul", IUM_RD, BAD_CODE, 0xD54400003868
|
|||
#define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
|
||||
|
||||
#define FIRST_SSE_INSTRUCTION INS_addpd
|
||||
// Instructions for SSE, SSE2
|
||||
// Instructions for SSE, SSE2, SSE3, SSSE3, SSE41, SSE42, POPCNT
|
||||
INST3(addpd, "vaddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x58), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed doubles
|
||||
INST3(addps, "vaddps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x58), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed singles
|
||||
INST3(addsd, "vaddsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x58), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar doubles
|
||||
INST3(addss, "vaddss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x58), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add scalar singles
|
||||
INST3(addsubpd, "vaddsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles
|
||||
INST3(addsubps, "vaddsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles
|
||||
INST3(andnpd, "vandnpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed doubles
|
||||
INST3(andnps, "vandnps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x55), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // And-Not packed singles
|
||||
INST3(andpd, "vandpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed doubles
|
||||
INST3(andps, "vandps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x54), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // AND packed singles
|
||||
INST3(blendpd, "vblendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values
|
||||
INST3(blendps, "vblendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values
|
||||
INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Doubles
|
||||
INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Singles
|
||||
INST3(cmppd, "vcmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), 4C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // compare packed doubles
|
||||
INST3(cmpps, "vcmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), 4C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // compare packed singles
|
||||
INST3(cmpsd, "vcmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // compare scalar doubles
|
||||
|
@ -258,6 +264,15 @@ INST3(divpd, "vdivpd", IUM_WR, BAD_CODE, BAD_CODE,
|
|||
INST3(divps, "vdivps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5E), 11C, 3C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide packed singles
|
||||
INST3(divsd, "vdivsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5E), 13C, 4C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar doubles
|
||||
INST3(divss, "vdivss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5E), 11C, 3C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Divide scalar singles
|
||||
INST3(dppd, "vdppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), 9C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs
|
||||
INST3(dpps, "vdpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), 13C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs
|
||||
INST3(extractps, "vextractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Extract Packed Floating-Point Values
|
||||
INST3(haddpd, "vhaddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles
|
||||
INST3(haddps, "vhaddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats
|
||||
INST3(hsubpd, "vhsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles
|
||||
INST3(hsubps, "vhsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats
|
||||
INST3(insertps, "vinsertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), 1C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value
|
||||
INST3(lddqu, "vlddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Load Unaligned integer
|
||||
INST3(lfence, "lfence", IUM_RD, 0x000FE8AE, BAD_CODE, BAD_CODE, ZERO, 4C, INS_TT_NONE, REX_WIG)
|
||||
INST3(maskmovdqu, "vmaskmovdqu", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF7), 400C, 6C, INS_TT_NONE, REX_WIG | Encoding_VEX)
|
||||
INST3(maxpd, "vmaxpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5F), 4C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Return Maximum packed doubles
|
||||
|
@ -273,6 +288,7 @@ INST3(movapd, "vmovapd", IUM_WR, PCKDBL(0x29), BAD_CODE,
|
|||
INST3(movaps, "vmovaps", IUM_WR, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX)
|
||||
INST3(movd32, "vmovd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move DWORD between xmm regs <-> memory/r32 regs
|
||||
INST3(movd64, "vmovq", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move QWORD between xmm regs <-> memory/r64 regs
|
||||
INST3(movddup, "vmovddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), ILLEGAL, ILLEGAL, INS_TT_MOVDDUP, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate Double FP Values
|
||||
INST3(movdqa32, "vmovdqa", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2 | INS_FLAGS_HasPseudoName)
|
||||
INST3(movdqu32, "vmovdqu", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2 | INS_FLAGS_HasPseudoName)
|
||||
INST3(movhlps, "vmovhlps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x12), 1C, 1C, INS_TT_NONE, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction)
|
||||
|
@ -284,23 +300,31 @@ INST3(movlps, "vmovlps", IUM_WR, PCKFLT(0x13), BAD_CODE,
|
|||
INST3(movmskpd, "vmovmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
|
||||
INST3(movmskps, "vmovmskps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x50), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX)
|
||||
INST3(movntdq, "vmovntdq", IUM_WR, PCKDBL(0xE7), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX)
|
||||
INST3(movntdqa, "vmovntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Load Double Quadword Non-Temporal Aligned Hint
|
||||
INST3(movnti32, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_REX2)
|
||||
INST3(movnti64, "movnti", IUM_WR, PCKFLT(0xC3), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_REX2)
|
||||
INST3(movntpd, "vmovntpd", IUM_WR, PCKDBL(0x2B), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX)
|
||||
INST3(movntps, "vmovntps", IUM_WR, PCKFLT(0x2B), BAD_CODE, BAD_CODE, 400C, 1C, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX)
|
||||
INST3(movq, "vmovq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Encoding_REX2) // Move Quadword between memory/mm <-> regs
|
||||
INST3(movsd_simd, "vmovsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction)
|
||||
INST3(movshdup, "vmovshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate odd-indexed Single FP Values
|
||||
INST3(movsldup, "vmovsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate even-indexed Single FP Values
|
||||
INST3(movss, "vmovss", IUM_WR, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction)
|
||||
INST3(movupd, "vmovupd", IUM_WR, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W1_EVEX | Encoding_VEX | Encoding_EVEX)
|
||||
INST3(movups, "vmovups", IUM_WR, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX)
|
||||
INST3(mpsadbw, "vmpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), 4C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference
|
||||
INST3(mulpd, "vmulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), 4C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed doubles
|
||||
INST3(mulps, "vmulps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x59), 4C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed singles
|
||||
INST3(mulsd, "vmulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles
|
||||
INST3(mulss, "vmulss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x59), 4C, 2X, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar single
|
||||
INST3(orpd, "vorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed doubles
|
||||
INST3(orps, "vorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Or packed singles
|
||||
INST3(pabsb, "vpabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of bytes
|
||||
INST3(pabsd, "vpabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 32-bit integers
|
||||
INST3(pabsw, "vpabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 16-bit integers
|
||||
INST3(packssdw, "vpackssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation
|
||||
INST3(packsswb, "vpacksswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation
|
||||
INST3(packusdw, "vpackusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation
|
||||
INST3(packuswb, "vpackuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation
|
||||
INST3(paddb, "vpaddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers
|
||||
INST3(paddd, "vpaddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFE), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed double-word (32-bit) integers
|
||||
|
@ -310,26 +334,68 @@ INST3(paddsw, "vpaddsw", IUM_WR, BAD_CODE, BAD_CODE,
|
|||
INST3(paddusb, "vpaddusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned byte integers and saturate the results
|
||||
INST3(paddusw, "vpaddusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed unsigned word integers and saturate the results
|
||||
INST3(paddw, "vpaddw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed word (16-bit) integers
|
||||
INST3(palignr, "vpalignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right
|
||||
INST3(pandd, "vpand", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Packed bit-wise AND of two xmm regs
|
||||
INST3(pandnd, "vpandn", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Packed bit-wise AND NOT of two xmm regs
|
||||
INST3(pavgb, "vpavgb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE0), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed byte integers
|
||||
INST3(pavgw, "vpavgw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE3), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Average of packed word integers
|
||||
INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Bytes
|
||||
INST3(pblendw, "vpblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words
|
||||
INST3(pcmpeqb, "vpcmpeqb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x74), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit integers for equality
|
||||
INST3(pcmpeqd, "vpcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality
|
||||
INST3(pcmpeqq, "vpcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
|
||||
INST3(pcmpeqw, "vpcmpeqw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x75), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit integers for equality
|
||||
INST3(pcmpgtb, "vpcmpgtb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x64), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 8-bit signed integers for greater than
|
||||
INST3(pcmpgtd, "vpcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than
|
||||
INST3(pcmpgtq, "vpcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), 3C, 1C, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
|
||||
INST3(pcmpgtw, "vpcmpgtw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x65), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 16-bit signed integers for greater than
|
||||
INST3(pextrw, "vpextrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC5), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract 16-bit value into a r32 with zero extended to 32-bits
|
||||
INST3(pextrb, "vpextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Byte
|
||||
INST3(pextrd, "vpextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Dword
|
||||
INST3(pextrq, "vpextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Extract Qword
|
||||
INST3(phaddd, "vphaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add
|
||||
INST3(pextrw, "vpextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Word
|
||||
INST3(phaddsw, "vphaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation
|
||||
INST3(phaddw, "vphaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers
|
||||
INST3(phminposuw, "vphminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Packed Horizontal Word Minimum
|
||||
INST3(phsubd, "vphsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers
|
||||
INST3(phsubsw, "vphsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation
|
||||
INST3(phsubw, "vphsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers
|
||||
INST3(pinsrb, "vpinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte
|
||||
INST3(pinsrd, "vpinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword
|
||||
INST3(pinsrq, "vpinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword
|
||||
INST3(pinsrw, "vpinsrw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC4), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert word at index
|
||||
INST3(pmaddubsw, "vpmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes
|
||||
INST3(pmaddwd, "vpmaddwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst
|
||||
INST3(pmaxsb, "vpmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes
|
||||
INST3(pmaxsd, "vpmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers
|
||||
INST3(pmaxsw, "vpmaxsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEE), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed words
|
||||
INST3(pmaxub, "vpmaxub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDE), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum unsigned bytes
|
||||
INST3(pmaxud, "vpmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers
|
||||
INST3(pmaxuw, "vpmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers
|
||||
INST3(pminsb, "vpminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes
|
||||
INST3(pminsd, "vpminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers
|
||||
INST3(pminsw, "vpminsw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEA), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed words
|
||||
INST3(pminub, "vpminub", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDA), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum unsigned bytes
|
||||
INST3(pminud, "vpminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers
|
||||
INST3(pminuw, "vpminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers
|
||||
INST3(pmovmskb, "vpmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), ILLEGAL, ILLEGAL, INS_TT_NONE, REX_WIG | Encoding_VEX) // Move the MSB bits of all bytes in a xmm reg to an int reg
|
||||
INST3(pmovsxbd, "vpmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to int
|
||||
INST3(pmovsxbq, "vpmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to long
|
||||
INST3(pmovsxbw, "vpmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to short
|
||||
INST3(pmovsxdq, "vpmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed sign extend int to long
|
||||
INST3(pmovsxwd, "vpmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to int
|
||||
INST3(pmovsxwq, "vpmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to long
|
||||
INST3(pmovzxbd, "vpmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to intg
|
||||
INST3(pmovzxbq, "vpmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to lon
|
||||
INST3(pmovzxbw, "vpmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to short
|
||||
INST3(pmovzxdq, "vpmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed zero extend int to long
|
||||
INST3(pmovzxwd, "vpmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to int
|
||||
INST3(pmovzxwq, "vpmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to long
|
||||
INST3(pmuldq, "vpmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result
|
||||
INST3(pmulhrsw, "vpmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale
|
||||
INST3(pmulhuw, "vpmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers
|
||||
INST3(pmulhw, "vpmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers
|
||||
INST3(pmulld, "vpmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), 10C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
|
||||
INST3(pmullw, "vpmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
|
||||
INST3(pmuludq, "vpmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit unsigned integers and store 64-bit result
|
||||
INST3(pord, "vpor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Packed bit-wise OR of two xmm regs
|
||||
|
@ -338,9 +404,13 @@ INST3(prefetcht0, "prefetcht0", IUM_RD, 0x000F0818, BAD_CODE,
|
|||
INST3(prefetcht1, "prefetcht1", IUM_RD, 0x000F1018, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2)
|
||||
INST3(prefetcht2, "prefetcht2", IUM_RD, 0x000F1818, BAD_CODE, BAD_CODE, ZERO, 2X, INS_TT_TUPLE1_FIXED, Input_8Bit | REX_WIG | Encoding_REX2)
|
||||
INST3(psadbw, "vpsadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), 3C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers
|
||||
INST3(pshufb, "vpshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes
|
||||
INST3(pshufd, "vpshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed shuffle of 32-bit integers
|
||||
INST3(pshufhw, "vpshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
|
||||
INST3(pshuflw, "vpshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), 1C, 1C, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
|
||||
INST3(psignb, "vpsignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
|
||||
INST3(psignd, "vpsignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
|
||||
INST3(psignw, "vpsignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
|
||||
INST3(pslld, "vpslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers
|
||||
INST3(pslldq, "vpslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes
|
||||
INST3(psllq, "vpsllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), ILLEGAL, ILLEGAL, INS_TT_FULL | INS_TT_MEM128, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers
|
||||
|
@ -359,6 +429,7 @@ INST3(psubsw, "vpsubsw", IUM_WR, BAD_CODE, BAD_CODE,
|
|||
INST3(psubusb, "vpsubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation
|
||||
INST3(psubusw, "vpsubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation
|
||||
INST3(psubw, "vpsubw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers
|
||||
INST3(ptest, "vptest", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x17), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed logical compare
|
||||
INST3(punpckhbw, "vpunpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi)
|
||||
INST3(punpckhdq, "vpunpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction)
|
||||
INST3(punpckhqdq, "vpunpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), 1C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi)
|
||||
|
@ -370,6 +441,10 @@ INST3(punpcklwd, "vpunpcklwd", IUM_WR, BAD_CODE, BAD_CODE,
|
|||
INST3(pxord, "vpxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Packed bit-wise XOR of two xmm regs
|
||||
INST3(rcpps, "vrcpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x53), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal of packed singles
|
||||
INST3(rcpss, "vrcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single
|
||||
INST3(roundpd, "vroundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), 8C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_FLAGS_HasPseudoName) // Round packed double precision floating-point values
|
||||
INST3(roundps, "vroundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), 8C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_FLAGS_HasPseudoName) // Round packed single precision floating-point values
|
||||
INST3(roundsd, "vroundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Round scalar double precision floating-point values
|
||||
INST3(roundss, "vroundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Round scalar single precision floating-point values
|
||||
INST3(rsqrtps, "vrsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Reciprocal Sqrt of packed singles
|
||||
INST3(rsqrtss, "vrsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single
|
||||
INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, ZERO, 6C, INS_TT_NONE, REX_WIG)
|
||||
|
@ -392,84 +467,6 @@ INST3(unpcklps, "vunpcklps", IUM_WR, BAD_CODE, BAD_CODE,
|
|||
INST3(xorpd, "vxorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed doubles
|
||||
INST3(xorps, "vxorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // XOR packed singles
|
||||
|
||||
// Instructions for SSE3, SSSE3, SSE41, SSE42, POPCNT
|
||||
INST3(addsubpd, "vaddsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed doubles
|
||||
INST3(addsubps, "vaddsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xD0), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add/Subtract packed singles
|
||||
INST3(blendpd, "vblendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values
|
||||
INST3(blendps, "vblendps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Single Precision Floating-Point Values
|
||||
INST3(blendvpd, "blendvpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Doubles
|
||||
INST3(blendvps, "blendvps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Singles
|
||||
INST3(dppd, "vdppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), 9C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs
|
||||
INST3(dpps, "vdpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), 13C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs
|
||||
INST3(extractps, "vextractps", IUM_WR, SSE3A(0x17), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Extract Packed Floating-Point Values
|
||||
INST3(haddpd, "vhaddpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed doubles
|
||||
INST3(haddps, "vhaddps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7C), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal add packed floats
|
||||
INST3(hsubpd, "vhsubpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed doubles
|
||||
INST3(hsubps, "vhsubps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7D), 6C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Horizontal subtract packed floats
|
||||
INST3(insertps, "vinsertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), 1C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value
|
||||
INST3(lddqu, "vlddqu", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xF0), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Load Unaligned integer
|
||||
INST3(movddup, "vmovddup", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x12), ILLEGAL, ILLEGAL, INS_TT_MOVDDUP, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate Double FP Values
|
||||
INST3(movntdqa, "vmovntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Load Double Quadword Non-Temporal Aligned Hint
|
||||
INST3(movshdup, "vmovshdup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x16), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate odd-indexed Single FP Values
|
||||
INST3(movsldup, "vmovsldup", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x12), 1C, 1C, INS_TT_FULL_MEM, KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Replicate even-indexed Single FP Values
|
||||
INST3(mpsadbw, "vmpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), 4C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference
|
||||
INST3(pabsb, "vpabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of bytes
|
||||
INST3(pabsd, "vpabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 32-bit integers
|
||||
INST3(pabsw, "vpabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 16-bit integers
|
||||
INST3(packusdw, "vpackusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), 1C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base8 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation
|
||||
INST3(palignr, "vpalignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right
|
||||
INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), 1C, 2X, INS_TT_FULL_MEM, REX_W0) // Variable Blend Packed Bytes
|
||||
INST3(pblendw, "vpblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), 1C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words
|
||||
INST3(pcmpeqq, "vpcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), 1C, 2X, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
|
||||
INST3(pcmpgtq, "vpcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), 3C, 1C, INS_TT_FULL, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality
|
||||
INST3(pextrb, "vpextrb", IUM_WR, SSE3A(0x14), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Byte
|
||||
INST3(pextrd, "vpextrd", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Dword
|
||||
INST3(pextrq, "vpextrq", IUM_WR, SSE3A(0x16), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Extract Qword
|
||||
INST3(pextrw_sse42, "vpextrw", IUM_WR, SSE3A(0x15), BAD_CODE, BAD_CODE, 4C, 1C, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Extract Word
|
||||
INST3(phaddd, "vphaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add
|
||||
INST3(phaddsw, "vphaddsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x03), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers with saturation
|
||||
INST3(phaddw, "vphaddw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x01), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add of 16-bit integers
|
||||
INST3(phminposuw, "vphminposuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x41), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX) // Packed Horizontal Word Minimum
|
||||
INST3(phsubd, "vphsubd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x06), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 32-bit integers
|
||||
INST3(phsubsw, "vphsubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x07), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers with saturation
|
||||
INST3(phsubw, "vphsubw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x05), 3C, 2C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal subtract of 16-bit integers
|
||||
INST3(pinsrb, "vpinsrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x20), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Byte
|
||||
INST3(pinsrd, "vpinsrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Dword
|
||||
INST3(pinsrq, "vpinsrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x22), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert Qword
|
||||
INST3(pmaddubsw, "vpmaddubsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x04), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Packed Signed and Unsigned Bytes
|
||||
INST3(pmaxsb, "vpmaxsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3C), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum signed bytes
|
||||
INST3(pmaxsd, "vpmaxsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit signed integers
|
||||
INST3(pmaxud, "vpmaxud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 32-bit unsigned integers
|
||||
INST3(pmaxuw, "vpmaxuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3E), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed maximum 16-bit unsigned integers
|
||||
INST3(pminsb, "vpminsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x38), 1C, 2X, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum signed bytes
|
||||
INST3(pminsd, "vpminsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit signed integers
|
||||
INST3(pminud, "vpminud", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3B), 1C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 32-bit unsigned integers
|
||||
INST3(pminuw, "vpminuw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3A), 1C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed minimum 16-bit unsigned integers
|
||||
INST3(pmovsxbd, "vpmovsxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x21), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to int
|
||||
INST3(pmovsxbq, "vpmovsxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x22), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to long
|
||||
INST3(pmovsxbw, "vpmovsxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x20), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend byte to short
|
||||
INST3(pmovsxdq, "vpmovsxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x25), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed sign extend int to long
|
||||
INST3(pmovsxwd, "vpmovsxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x23), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to int
|
||||
INST3(pmovsxwq, "vpmovsxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x24), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed sign extend short to long
|
||||
INST3(pmovzxbd, "vpmovzxbd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x31), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_8Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to intg
|
||||
INST3(pmovzxbq, "vpmovzxbq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x32), ILLEGAL, ILLEGAL, INS_TT_EIGHTH_MEM, Input_8Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to lon
|
||||
INST3(pmovzxbw, "vpmovzxbw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x30), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_8Bit | KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend byte to short
|
||||
INST3(pmovzxdq, "vpmovzxdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x35), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_32Bit | KMask_Base2 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed zero extend int to long
|
||||
INST3(pmovzxwd, "vpmovzxwd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x33), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to int
|
||||
INST3(pmovzxwq, "vpmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x34), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed zero extend short to long
|
||||
INST3(pmuldq, "vpmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), 5C, 2X, INS_TT_FULL, Input_32Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // packed multiply 32-bit signed integers and store 64-bit result
|
||||
INST3(pmulhrsw, "vpmulhrsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0B), 5C, 2X, INS_TT_FULL_MEM, KMask_Base8 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Multiply High with Round and Scale
|
||||
INST3(pmulld, "vpmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), 10C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
|
||||
INST3(pshufb, "vpshufb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x00), 1C, 1C, INS_TT_FULL_MEM, KMask_Base16 | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Shuffle Bytes
|
||||
INST3(psignb, "vpsignb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x08), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
|
||||
INST3(psignd, "vpsignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
|
||||
INST3(psignw, "vpsignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), 1C, 2X, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN
|
||||
INST3(ptest, "vptest", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x17), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed logical compare
|
||||
INST3(roundpd, "vroundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), 8C, 1C, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_FLAGS_HasPseudoName) // Round packed double precision floating-point values
|
||||
INST3(roundps, "vroundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), 8C, 1C, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_FLAGS_HasPseudoName) // Round packed single precision floating-point values
|
||||
INST3(roundsd, "vroundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base2 | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Round scalar double precision floating-point values
|
||||
INST3(roundss, "vroundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), 8C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base4 | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Round scalar single precision floating-point values
|
||||
|
||||
// Instructions for AESNI, PCLMULQDQ
|
||||
INST3(aesdec, "vaesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow
|
||||
INST3(aesdeclast, "vaesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), 4C, 1C, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow
|
||||
|
|
|
@ -394,7 +394,6 @@ RELEASE_CONFIG_INTEGER(EnableHWIntrinsic, "EnableHWIntrinsic",
|
|||
#endif // defined(TARGET_LOONGARCH64)
|
||||
|
||||
#if defined(TARGET_AMD64) || defined(TARGET_X86)
|
||||
RELEASE_CONFIG_INTEGER(EnableSSE42, "EnableSSE42", 1) // Allows SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, and dependent hardware intrinsics to be disabled
|
||||
RELEASE_CONFIG_INTEGER(EnableAVX, "EnableAVX", 1) // Allows AVX and dependent hardware intrinsics to be disabled
|
||||
RELEASE_CONFIG_INTEGER(EnableAVX2, "EnableAVX2", 1) // Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled
|
||||
RELEASE_CONFIG_INTEGER(EnableAVX512, "EnableAVX512", 1) // Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4164,22 +4164,6 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
|
|||
{
|
||||
BuildUse(op1, RBM_NONE, i);
|
||||
}
|
||||
#if defined(FEATURE_SIMD) && defined(TARGET_X86)
|
||||
if (TargetOS::IsWindows && !compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
if (varTypeIsSIMD(storeLoc) && op1->IsCall())
|
||||
{
|
||||
// Need an additional register to create a SIMD8 from EAX/EDX without SSE4.1.
|
||||
buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
|
||||
|
||||
if (isCandidateVar(varDsc))
|
||||
{
|
||||
// This internal register must be different from the target register.
|
||||
setInternalRegsDelayFree = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // FEATURE_SIMD && TARGET_X86
|
||||
}
|
||||
else if (op1->isContained() && op1->OperIs(GT_BITCAST))
|
||||
{
|
||||
|
|
|
@ -1710,13 +1710,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
|
|||
{
|
||||
simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
|
||||
}
|
||||
|
||||
if (!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// To store SIMD12 without extractps we will need
|
||||
// a temp xmm reg to do the shuffle.
|
||||
buildInternalFloatRegisterDefForNode(use.GetNode());
|
||||
}
|
||||
}
|
||||
#endif // defined(FEATURE_SIMD)
|
||||
|
||||
|
@ -2270,16 +2263,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
|
|||
|
||||
RefPosition* op1Use = BuildUse(op1);
|
||||
srcCount += 1;
|
||||
|
||||
if ((baseType == TYP_FLOAT) && HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) &&
|
||||
!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
setDelayFree(op1Use);
|
||||
}
|
||||
else
|
||||
{
|
||||
tgtPrefUse = op1Use;
|
||||
}
|
||||
tgtPrefUse = op1Use;
|
||||
}
|
||||
|
||||
buildUses = false;
|
||||
|
@ -2289,12 +2273,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
|
|||
{
|
||||
dstCandidates = allByteRegs();
|
||||
}
|
||||
else if (varTypeIsLong(baseType) && !compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// For SSE2 fallbacks, we will need a temp register to insert the upper half of a long
|
||||
buildInternalFloatRegisterDefForNode(intrinsicTree);
|
||||
setInternalRegsDelayFree = true;
|
||||
}
|
||||
#endif // TARGET_X86
|
||||
break;
|
||||
}
|
||||
|
@ -2396,7 +2374,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
|
|||
break;
|
||||
}
|
||||
|
||||
case NI_SSE42_BlendVariable:
|
||||
case NI_X86Base_BlendVariable:
|
||||
{
|
||||
assert(numArgs == 3);
|
||||
|
||||
|
@ -2424,7 +2402,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
|
|||
break;
|
||||
}
|
||||
|
||||
case NI_SSE42_Extract:
|
||||
case NI_X86Base_Extract:
|
||||
{
|
||||
assert(!varTypeIsFloating(baseType));
|
||||
|
||||
|
@ -2438,8 +2416,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
|
|||
}
|
||||
|
||||
#ifdef TARGET_X86
|
||||
case NI_SSE42_Crc32:
|
||||
case NI_SSE42_X64_Crc32:
|
||||
case NI_X86Base_Crc32:
|
||||
case NI_X86Base_X64_Crc32:
|
||||
{
|
||||
// TODO-XArch-Cleanup: Currently we use the BaseType to bring the type of the second argument
|
||||
// to the code generator. We may want to encode the overload info in another way.
|
||||
|
@ -3107,15 +3085,6 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree)
|
|||
assert(!indirTree->TypeIs(TYP_STRUCT));
|
||||
SingleTypeRegSet useCandidates = RBM_NONE;
|
||||
|
||||
#ifdef FEATURE_SIMD
|
||||
if (indirTree->TypeIs(TYP_SIMD12) && indirTree->OperIs(GT_STOREIND) &&
|
||||
!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42) && !indirTree->Data()->IsVectorZero())
|
||||
{
|
||||
// GT_STOREIND needs an internal register so the upper 4 bytes can be extracted
|
||||
buildInternalFloatRegisterDefForNode(indirTree);
|
||||
}
|
||||
#endif // FEATURE_SIMD
|
||||
|
||||
#ifdef TARGET_AMD64
|
||||
if (varTypeUsesIntReg(indirTree->Addr()))
|
||||
{
|
||||
|
|
|
@ -421,14 +421,9 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStack<GenTre
|
|||
#if defined(TARGET_XARCH)
|
||||
case NI_Vector128_ExtractMostSignificantBits:
|
||||
{
|
||||
// We want to keep this as is, because we'll rewrite it in post-order
|
||||
assert(varTypeIsShort(simdBaseType));
|
||||
|
||||
if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// We want to keep this as is, because we'll rewrite it in post-order
|
||||
return;
|
||||
}
|
||||
break;
|
||||
return;
|
||||
}
|
||||
#endif // TARGET_XARCH
|
||||
|
||||
|
@ -698,7 +693,7 @@ void Rationalizer::RewriteHWIntrinsicBlendv(GenTree** use, Compiler::GenTreeStac
|
|||
}
|
||||
else
|
||||
{
|
||||
intrinsic = NI_SSE42_BlendVariable;
|
||||
intrinsic = NI_X86Base_BlendVariable;
|
||||
}
|
||||
|
||||
if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic) && varTypeIsSmall(simdBaseType))
|
||||
|
@ -917,10 +912,6 @@ void Rationalizer::RewriteHWIntrinsicToNonMask(GenTree** use, Compiler::GenTreeS
|
|||
intrinsic = NI_AVX_CompareEqual;
|
||||
}
|
||||
}
|
||||
else if (varTypeIsLong(simdBaseType))
|
||||
{
|
||||
intrinsic = NI_SSE42_CompareEqual;
|
||||
}
|
||||
else
|
||||
{
|
||||
intrinsic = NI_X86Base_CompareEqual;
|
||||
|
@ -941,10 +932,6 @@ void Rationalizer::RewriteHWIntrinsicToNonMask(GenTree** use, Compiler::GenTreeS
|
|||
intrinsic = NI_AVX_CompareGreaterThan;
|
||||
}
|
||||
}
|
||||
else if (varTypeIsLong(simdBaseType))
|
||||
{
|
||||
intrinsic = NI_SSE42_CompareGreaterThan;
|
||||
}
|
||||
else
|
||||
{
|
||||
intrinsic = NI_X86Base_CompareGreaterThan;
|
||||
|
@ -978,10 +965,6 @@ void Rationalizer::RewriteHWIntrinsicToNonMask(GenTree** use, Compiler::GenTreeS
|
|||
intrinsic = NI_AVX_CompareLessThan;
|
||||
}
|
||||
}
|
||||
else if (varTypeIsLong(simdBaseType))
|
||||
{
|
||||
intrinsic = NI_SSE42_CompareLessThan;
|
||||
}
|
||||
else
|
||||
{
|
||||
intrinsic = NI_X86Base_CompareLessThan;
|
||||
|
@ -1539,9 +1522,6 @@ void Rationalizer::RewriteHWIntrinsicExtractMsb(GenTree** use, Compiler::GenTree
|
|||
parents.Push(castNode);
|
||||
}
|
||||
#elif defined(TARGET_XARCH)
|
||||
NamedIntrinsic moveMaskIntrinsic = NI_Illegal;
|
||||
NamedIntrinsic shuffleIntrinsic = NI_Illegal;
|
||||
|
||||
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
|
||||
|
||||
// We want to tightly pack the most significant byte of each short/ushort
|
||||
|
@ -1554,6 +1534,8 @@ void Rationalizer::RewriteHWIntrinsicExtractMsb(GenTree** use, Compiler::GenTree
|
|||
simdVal.u64[0] = 0x0F0D0B0907050301;
|
||||
simdVal.u64[1] = 0x8080808080808080;
|
||||
|
||||
NamedIntrinsic shuffleIntrinsic = NI_Illegal;
|
||||
|
||||
if (simdSize == 32)
|
||||
{
|
||||
// Vector256 works on 2x128-bit lanes, so repeat the same indices for the upper lane
|
||||
|
@ -1561,15 +1543,11 @@ void Rationalizer::RewriteHWIntrinsicExtractMsb(GenTree** use, Compiler::GenTree
|
|||
simdVal.u64[2] = 0x0F0D0B0907050301;
|
||||
simdVal.u64[3] = 0x8080808080808080;
|
||||
|
||||
shuffleIntrinsic = NI_AVX2_Shuffle;
|
||||
moveMaskIntrinsic = NI_X86Base_MoveMask;
|
||||
shuffleIntrinsic = NI_AVX2_Shuffle;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE42));
|
||||
|
||||
shuffleIntrinsic = NI_SSE42_Shuffle;
|
||||
moveMaskIntrinsic = NI_X86Base_MoveMask;
|
||||
shuffleIntrinsic = NI_X86Base_Shuffle;
|
||||
}
|
||||
|
||||
GenTree* op2 = comp->gtNewVconNode(simdType);
|
||||
|
@ -1606,7 +1584,7 @@ void Rationalizer::RewriteHWIntrinsicExtractMsb(GenTree** use, Compiler::GenTree
|
|||
simdSize = 16;
|
||||
}
|
||||
|
||||
node->ChangeHWIntrinsicId(moveMaskIntrinsic);
|
||||
node->ChangeHWIntrinsicId(NI_X86Base_MoveMask);
|
||||
node->SetSimdSize(simdSize);
|
||||
node->SetSimdBaseJitType(simdBaseJitType);
|
||||
node->Op(1) = op1;
|
||||
|
|
|
@ -88,23 +88,12 @@ void CodeGen::genStoreIndTypeSimd12(GenTreeStoreInd* treeNode)
|
|||
// Store upper 4 bytes
|
||||
emit->emitInsStoreInd(INS_movss, EA_4BYTE, treeNode);
|
||||
}
|
||||
else if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
else
|
||||
{
|
||||
// Extract and store upper 4 bytes
|
||||
GenTreeStoreInd storeInd = storeIndirForm(TYP_SIMD16, addr, data);
|
||||
emit->emitIns_A_R_I(INS_extractps, EA_16BYTE, &storeInd, dataReg, 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
regNumber tmpReg = internalRegisters.GetSingle(treeNode);
|
||||
|
||||
// Extract upper 4 bytes from data
|
||||
emit->emitIns_R_R(INS_movhlps, EA_16BYTE, tmpReg, dataReg);
|
||||
data->SetRegNum(tmpReg);
|
||||
|
||||
// Store upper 4 bytes
|
||||
emit->emitInsStoreInd(INS_movss, EA_4BYTE, treeNode);
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
@ -133,15 +122,11 @@ void CodeGen::genLoadIndTypeSimd12(GenTreeIndir* treeNode)
|
|||
return;
|
||||
}
|
||||
|
||||
emitter* emit = GetEmitter();
|
||||
regNumber tgtReg = treeNode->GetRegNum();
|
||||
bool useSse42 = compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42);
|
||||
emitter* emit = GetEmitter();
|
||||
regNumber tgtReg = treeNode->GetRegNum();
|
||||
|
||||
if (useSse42)
|
||||
{
|
||||
// Load lower 8 bytes
|
||||
emit->emitInsLoadInd(INS_movsd_simd, EA_8BYTE, tgtReg, treeNode);
|
||||
}
|
||||
// Load lower 8 bytes
|
||||
emit->emitInsLoadInd(INS_movsd_simd, EA_8BYTE, tgtReg, treeNode);
|
||||
|
||||
// Update the addr node to offset by 8
|
||||
|
||||
|
@ -164,41 +149,9 @@ void CodeGen::genLoadIndTypeSimd12(GenTreeIndir* treeNode)
|
|||
|
||||
treeNode->Addr() = addr;
|
||||
|
||||
if (useSse42)
|
||||
{
|
||||
// Load and insert upper 4 bytes, 0x20 inserts to index 2 and 0x8 zeros index 3
|
||||
GenTreeIndir indir = indirForm(TYP_SIMD16, addr);
|
||||
emit->emitIns_SIMD_R_R_A_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, &indir, 0x28, INS_OPTS_NONE);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Load upper 4 bytes to lower half of tgtReg
|
||||
emit->emitInsLoadInd(INS_movss, EA_4BYTE, tgtReg, treeNode);
|
||||
|
||||
// Move upper 4 bytes to upper half of tgtReg
|
||||
emit->emitIns_R_R(INS_movlhps, EA_16BYTE, tgtReg, tgtReg);
|
||||
|
||||
// Revert the addr node to the original offset
|
||||
// Doing it this way saves us a register and produces smaller code
|
||||
|
||||
if (treeNode->isIndirAddrMode())
|
||||
{
|
||||
GenTreeAddrMode* addrMode = addr->AsAddrMode();
|
||||
addrMode->SetOffset(addrMode->Offset() - 8);
|
||||
}
|
||||
else if (addr->IsCnsIntOrI() && addr->isContained())
|
||||
{
|
||||
GenTreeIntConCommon* icon = addr->AsIntConCommon();
|
||||
icon->SetIconValue(icon->IconValue() - 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
unreached();
|
||||
}
|
||||
|
||||
// Load lower 8 bytes into tgtReg, preserving upper 4 bytes
|
||||
emit->emitInsLoadInd(INS_movlps, EA_16BYTE, tgtReg, treeNode);
|
||||
}
|
||||
// Load and insert upper 4 bytes, 0x20 inserts to index 2 and 0x8 zeros index 3
|
||||
GenTreeIndir indir = indirForm(TYP_SIMD16, addr);
|
||||
emit->emitIns_SIMD_R_R_A_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, &indir, 0x28, INS_OPTS_NONE);
|
||||
|
||||
genProduceReg(treeNode);
|
||||
}
|
||||
|
@ -288,21 +241,11 @@ void CodeGen::genEmitStoreLclTypeSimd12(GenTree* store, unsigned lclNum, unsigne
|
|||
// Store upper 4 bytes
|
||||
emit->emitIns_S_R(INS_movss, EA_4BYTE, dataReg, lclNum, offset + 8);
|
||||
}
|
||||
else if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
else
|
||||
{
|
||||
// Extract and store upper 4 bytes
|
||||
emit->emitIns_S_R_I(INS_extractps, EA_16BYTE, lclNum, offset + 8, dataReg, 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
regNumber tmpReg = internalRegisters.GetSingle(store);
|
||||
|
||||
// Extract upper 4 bytes from data
|
||||
emit->emitIns_R_R(INS_movhlps, EA_16BYTE, tmpReg, dataReg);
|
||||
|
||||
// Store upper 4 bytes
|
||||
emit->emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, lclNum, offset + 8);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
@ -317,25 +260,11 @@ void CodeGen::genEmitLoadLclTypeSimd12(regNumber tgtReg, unsigned lclNum, unsign
|
|||
{
|
||||
emitter* emit = GetEmitter();
|
||||
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// Load lower 8 bytes into tgtReg, preserving upper 4 bytes
|
||||
emit->emitIns_R_S(INS_movsd_simd, EA_8BYTE, tgtReg, lclNum, offset);
|
||||
// Load lower 8 bytes into tgtReg, preserving upper 4 bytes
|
||||
emit->emitIns_R_S(INS_movsd_simd, EA_8BYTE, tgtReg, lclNum, offset);
|
||||
|
||||
// Load and insert upper 4 byte, 0x20 inserts to index 2 and 0x8 zeros index 3
|
||||
emit->emitIns_SIMD_R_R_S_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, lclNum, offset + 8, 0x28, INS_OPTS_NONE);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Load upper 4 bytes to lower half of tgtReg
|
||||
emit->emitIns_R_S(INS_movss, EA_4BYTE, tgtReg, lclNum, offset + 8);
|
||||
|
||||
// Move upper 4 bytes to upper half of tgtReg
|
||||
emit->emitIns_R_R(INS_movlhps, EA_16BYTE, tgtReg, tgtReg);
|
||||
|
||||
// Load lower 8 bytes into tgtReg, preserving upper 4 bytes
|
||||
emit->emitIns_R_S(INS_movlps, EA_16BYTE, tgtReg, lclNum, offset);
|
||||
}
|
||||
// Load and insert upper 4 byte, 0x20 inserts to index 2 and 0x8 zeros index 3
|
||||
emit->emitIns_SIMD_R_R_S_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, lclNum, offset + 8, 0x28, INS_OPTS_NONE);
|
||||
}
|
||||
|
||||
#ifdef TARGET_X86
|
||||
|
@ -524,26 +453,12 @@ void CodeGen::genSimd12UpperClear(regNumber tgtReg)
|
|||
{
|
||||
assert(genIsValidFloatReg(tgtReg));
|
||||
|
||||
if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE42))
|
||||
{
|
||||
// ZMASK: 0b1000 - Preserve element 0, 1, and 2; Zero element 3
|
||||
// COUNT_D: 0b11 - Insert into element 3
|
||||
// COUNT_S: 0b11 - Insert from element 3
|
||||
// ZMASK: 0b1000 - Preserve element 0, 1, and 2; Zero element 3
|
||||
// COUNT_D: 0b11 - Insert into element 3
|
||||
// COUNT_S: 0b11 - Insert from element 3
|
||||
|
||||
GetEmitter()->emitIns_SIMD_R_R_R_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, tgtReg, static_cast<int8_t>(0xF8),
|
||||
INS_OPTS_NONE);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Preserve element 0, 1, and 2; Zero element 3
|
||||
simd16_t constValue;
|
||||
constValue.u32[0] = 0xFFFFFFFF;
|
||||
constValue.u32[1] = 0xFFFFFFFF;
|
||||
constValue.u32[2] = 0xFFFFFFFF;
|
||||
constValue.u32[3] = 0x00000000;
|
||||
CORINFO_FIELD_HANDLE zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue);
|
||||
GetEmitter()->emitIns_SIMD_R_R_C(INS_andps, EA_16BYTE, tgtReg, tgtReg, zroSimd12Elm3, 0, INS_OPTS_NONE);
|
||||
}
|
||||
GetEmitter()->emitIns_SIMD_R_R_R_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, tgtReg, static_cast<int8_t>(0xF8),
|
||||
INS_OPTS_NONE);
|
||||
}
|
||||
|
||||
#endif // FEATURE_SIMD
|
||||
|
|
|
@ -6702,8 +6702,8 @@ bool ValueNumStore::IsVNNeverNegative(ValueNum vn)
|
|||
case VNF_MDArrLowerBound:
|
||||
#ifdef FEATURE_HW_INTRINSICS
|
||||
#ifdef TARGET_XARCH
|
||||
case VNF_HWI_SSE42_PopCount:
|
||||
case VNF_HWI_SSE42_X64_PopCount:
|
||||
case VNF_HWI_X86Base_PopCount:
|
||||
case VNF_HWI_X86Base_X64_PopCount:
|
||||
case VNF_HWI_AVX2_LeadingZeroCount:
|
||||
case VNF_HWI_AVX2_TrailingZeroCount:
|
||||
case VNF_HWI_AVX2_X64_LeadingZeroCount:
|
||||
|
@ -8084,7 +8084,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree,
|
|||
return VNForLongCon(static_cast<int64_t>(result));
|
||||
}
|
||||
|
||||
case NI_SSE42_PopCount:
|
||||
case NI_X86Base_PopCount:
|
||||
{
|
||||
assert(!varTypeIsSmall(type) && !varTypeIsLong(type));
|
||||
|
||||
|
@ -8094,7 +8094,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree,
|
|||
return VNForIntCon(static_cast<int32_t>(result));
|
||||
}
|
||||
|
||||
case NI_SSE42_X64_PopCount:
|
||||
case NI_X86Base_X64_PopCount:
|
||||
{
|
||||
assert(varTypeIsLong(type));
|
||||
|
||||
|
|
|
@ -176,13 +176,32 @@ static bool InitDLL(HANDLE hPalInstance)
|
|||
bool DetectCPUFeatures()
|
||||
{
|
||||
#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)
|
||||
g_cpuFeatures = minipal_getcpufeatures();
|
||||
int cpuFeatures = minipal_getcpufeatures();
|
||||
|
||||
if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures)
|
||||
if ((cpuFeatures & IntrinsicConstants_Invalid) != 0)
|
||||
{
|
||||
PalPrintFatalError("\nThe required instruction sets are not supported by the current CPU.\n");
|
||||
#if defined(HOST_X86) || defined(HOST_AMD64)
|
||||
PalPrintFatalError("\nThe current CPU is missing one or more of the following instruction sets: SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT\n");
|
||||
#elif defined(HOST_ARM64) && (defined(HOST_WINDOWS) || defined(HOST_APPLE))
|
||||
PalPrintFatalError("\nThe current CPU is missing one or more of the following instruction sets: AdvSimd, LSE\n");
|
||||
#elif defined(HOST_ARM64)
|
||||
PalPrintFatalError("\nThe current CPU is missing one or more of the following instruction sets: AdvSimd\n");
|
||||
#else
|
||||
PalPrintFatalError("\nThe current CPU is missing one or more of the baseline instruction sets.\n");
|
||||
#endif
|
||||
|
||||
RhFailFast();
|
||||
}
|
||||
|
||||
int missingCpuFeatures = g_requiredCpuFeatures & ~cpuFeatures;
|
||||
|
||||
if (missingCpuFeatures != 0)
|
||||
{
|
||||
PalPrintFatalError("\nThe current CPU is missing one or more of the required instruction sets.\n");
|
||||
RhFailFast();
|
||||
}
|
||||
|
||||
g_cpuFeatures = cpuFeatures;
|
||||
#endif // HOST_X86|| HOST_AMD64 || HOST_ARM64
|
||||
|
||||
return true;
|
||||
|
|
|
@ -60,33 +60,28 @@ namespace ILCompiler
|
|||
// Keep these enumerations in sync with cpufeatures.h in the minipal.
|
||||
private static class XArchIntrinsicConstants
|
||||
{
|
||||
// SSE and SSE2 are baseline ISAs - they're always available
|
||||
public const int Sse42 = (1 << 0);
|
||||
public const int Avx = (1 << 1);
|
||||
public const int Avx2 = (1 << 2);
|
||||
public const int Avx512 = (1 << 3);
|
||||
|
||||
public const int Avx512v2 = (1 << 4);
|
||||
public const int Avx512v3 = (1 << 5);
|
||||
public const int Avx10v1 = (1 << 6);
|
||||
public const int Avx10v2 = (1 << 7);
|
||||
public const int Apx = (1 << 8);
|
||||
|
||||
public const int Aes = (1 << 9);
|
||||
public const int Avx512Vp2intersect = (1 << 10);
|
||||
public const int AvxIfma = (1 << 11);
|
||||
public const int AvxVnni = (1 << 12);
|
||||
// SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, and POPCNT are baseline ISAs - they're always available
|
||||
public const int Avx = (1 << 0);
|
||||
public const int Avx2 = (1 << 1);
|
||||
public const int Avx512 = (1 << 2);
|
||||
public const int Avx512v2 = (1 << 3);
|
||||
public const int Avx512v3 = (1 << 4);
|
||||
public const int Avx10v1 = (1 << 5);
|
||||
public const int Avx10v2 = (1 << 6);
|
||||
public const int Apx = (1 << 7);
|
||||
public const int Aes = (1 << 8);
|
||||
public const int Avx512Vp2intersect = (1 << 9);
|
||||
public const int AvxIfma = (1 << 10);
|
||||
public const int AvxVnni = (1 << 11);
|
||||
public const int AvxVnniInt = (1 << 12);
|
||||
public const int Gfni = (1 << 13);
|
||||
public const int Sha = (1 << 14);
|
||||
public const int Vaes = (1 << 15);
|
||||
public const int WaitPkg = (1 << 16);
|
||||
public const int X86Serialize = (1 << 17);
|
||||
public const int AvxVnniInt = (1 << 18);
|
||||
|
||||
public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
|
||||
{
|
||||
if ((flags & Sse42) != 0)
|
||||
builder.AddSupportedInstructionSet("sse42");
|
||||
if ((flags & Avx) != 0)
|
||||
builder.AddSupportedInstructionSet("avx");
|
||||
if ((flags & Avx2) != 0)
|
||||
|
@ -144,15 +139,11 @@ namespace ILCompiler
|
|||
public static int FromInstructionSet(InstructionSet instructionSet)
|
||||
{
|
||||
Debug.Assert(InstructionSet.X64_AES == InstructionSet.X86_AES);
|
||||
Debug.Assert(InstructionSet.X64_SSE42 == InstructionSet.X86_SSE42);
|
||||
Debug.Assert(InstructionSet.X64_AVX2 == InstructionSet.X86_AVX2);
|
||||
|
||||
return instructionSet switch
|
||||
{
|
||||
// Optional ISAs - only available via opt-in or opportunistic light-up
|
||||
InstructionSet.X64_SSE42 => Sse42,
|
||||
InstructionSet.X64_SSE42_X64 => Sse42,
|
||||
|
||||
InstructionSet.X64_AVX => Avx,
|
||||
InstructionSet.X64_AVX_X64 => Avx,
|
||||
|
||||
|
|
|
@ -17,25 +17,63 @@ namespace System.CommandLine
|
|||
internal static partial class Helpers
|
||||
{
|
||||
public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, int maxVectorTBitWidth, bool isVectorTOptimistic, TargetArchitecture targetArchitecture, TargetOS targetOS,
|
||||
string mustNotBeMessage, string invalidImplicationMessage, Logger logger, bool optimizingForSize = false)
|
||||
string mustNotBeMessage, string invalidImplicationMessage, Logger logger, bool optimizingForSize, bool isReadyToRun)
|
||||
{
|
||||
InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture);
|
||||
|
||||
// Ready to run images are built with certain instruction set baselines
|
||||
// Images are built with certain instruction set baselines
|
||||
//
|
||||
// For NativeAOT, this represents the minimum hardware required to run.
|
||||
// Older hardware will not work
|
||||
//
|
||||
// For ReadyToRun, this represents the presumed majority hardware.
|
||||
// Older hardware (down to the NAOT baseline) will still work, but may have more jitting on startup
|
||||
|
||||
if ((targetArchitecture == TargetArchitecture.X86) || (targetArchitecture == TargetArchitecture.X64))
|
||||
{
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("base");
|
||||
if (isReadyToRun)
|
||||
{
|
||||
// ReadyToRun can presume AVX2, BMI1, BMI2, F16C, FMA, LZCNT, and MOVBE
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("x86-64-v3");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise, we require SSE4.2 and POPCNT
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("x86-64-v2");
|
||||
}
|
||||
}
|
||||
else if (targetArchitecture == TargetArchitecture.ARM64)
|
||||
{
|
||||
if (targetOS == TargetOS.OSX)
|
||||
{
|
||||
// For osx-arm64 we know that apple-m1 is a baseline
|
||||
// For osx-arm64 we know that apple-m1 is the baseline
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("apple-m1");
|
||||
}
|
||||
else if (isReadyToRun)
|
||||
{
|
||||
if (targetOS == TargetOS.Windows)
|
||||
{
|
||||
// ReadyToRun on Windows can presume armv8.2-a and RCPC
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("armv8.2-a");
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("rcpc");
|
||||
}
|
||||
else
|
||||
{
|
||||
// While Unix needs a lower baseline due to things like Raspberry PI
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("armv8-a");
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("lse");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("neon");
|
||||
// We require armv8-a everywhere
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("armv8-a");
|
||||
|
||||
if (targetOS == TargetOS.Windows)
|
||||
{
|
||||
// However, Windows also requires LSE
|
||||
instructionSetSupportBuilder.AddSupportedInstructionSet("lse");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -187,7 +225,6 @@ namespace System.CommandLine
|
|||
// Note that we do not indicate support for AVX, or any other instruction set which uses the VEX encodings as
|
||||
// the presence of those makes otherwise acceptable code be unusable on hardware which does not support VEX encodings.
|
||||
//
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sse42");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha");
|
||||
|
@ -234,11 +271,13 @@ namespace System.CommandLine
|
|||
{
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("crc");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("dotprod");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc2");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rdma");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("dotprod");
|
||||
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rdma");
|
||||
}
|
||||
|
||||
// Vector<T> can always be part of the optimistic set, we only want to optionally exclude it from the supported set
|
||||
|
|
|
@ -73,8 +73,6 @@ namespace Internal.ReadyToRunConstants
|
|||
{
|
||||
case InstructionSet.X64_X86Base: return ReadyToRunInstructionSet.X86Base;
|
||||
case InstructionSet.X64_X86Base_X64: return ReadyToRunInstructionSet.X86Base;
|
||||
case InstructionSet.X64_SSE42: return ReadyToRunInstructionSet.Sse42;
|
||||
case InstructionSet.X64_SSE42_X64: return ReadyToRunInstructionSet.Sse42;
|
||||
case InstructionSet.X64_AVX: return ReadyToRunInstructionSet.Avx;
|
||||
case InstructionSet.X64_AVX_X64: return ReadyToRunInstructionSet.Avx;
|
||||
case InstructionSet.X64_AVX2: return ReadyToRunInstructionSet.Avx2;
|
||||
|
@ -129,8 +127,6 @@ namespace Internal.ReadyToRunConstants
|
|||
{
|
||||
case InstructionSet.X86_X86Base: return ReadyToRunInstructionSet.X86Base;
|
||||
case InstructionSet.X86_X86Base_X64: return null;
|
||||
case InstructionSet.X86_SSE42: return ReadyToRunInstructionSet.Sse42;
|
||||
case InstructionSet.X86_SSE42_X64: return null;
|
||||
case InstructionSet.X86_AVX: return ReadyToRunInstructionSet.Avx;
|
||||
case InstructionSet.X86_AVX_X64: return null;
|
||||
case InstructionSet.X86_AVX2: return ReadyToRunInstructionSet.Avx2;
|
||||
|
|
|
@ -47,7 +47,6 @@ namespace Internal.JitInterface
|
|||
RiscV64_Zba = InstructionSet_RiscV64.Zba,
|
||||
RiscV64_Zbb = InstructionSet_RiscV64.Zbb,
|
||||
X64_X86Base = InstructionSet_X64.X86Base,
|
||||
X64_SSE42 = InstructionSet_X64.SSE42,
|
||||
X64_AVX = InstructionSet_X64.AVX,
|
||||
X64_AVX2 = InstructionSet_X64.AVX2,
|
||||
X64_AVX512 = InstructionSet_X64.AVX512,
|
||||
|
@ -77,7 +76,6 @@ namespace Internal.JitInterface
|
|||
X64_AVXVNNIINT = InstructionSet_X64.AVXVNNIINT,
|
||||
X64_AVXVNNIINT_V512 = InstructionSet_X64.AVXVNNIINT_V512,
|
||||
X64_X86Base_X64 = InstructionSet_X64.X86Base_X64,
|
||||
X64_SSE42_X64 = InstructionSet_X64.SSE42_X64,
|
||||
X64_AVX_X64 = InstructionSet_X64.AVX_X64,
|
||||
X64_AVX2_X64 = InstructionSet_X64.AVX2_X64,
|
||||
X64_AVX512_X64 = InstructionSet_X64.AVX512_X64,
|
||||
|
@ -94,7 +92,6 @@ namespace Internal.JitInterface
|
|||
X64_WAITPKG_X64 = InstructionSet_X64.WAITPKG_X64,
|
||||
X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64,
|
||||
X86_X86Base = InstructionSet_X86.X86Base,
|
||||
X86_SSE42 = InstructionSet_X86.SSE42,
|
||||
X86_AVX = InstructionSet_X86.AVX,
|
||||
X86_AVX2 = InstructionSet_X86.AVX2,
|
||||
X86_AVX512 = InstructionSet_X86.AVX512,
|
||||
|
@ -124,7 +121,6 @@ namespace Internal.JitInterface
|
|||
X86_AVXVNNIINT = InstructionSet_X86.AVXVNNIINT,
|
||||
X86_AVXVNNIINT_V512 = InstructionSet_X86.AVXVNNIINT_V512,
|
||||
X86_X86Base_X64 = InstructionSet_X86.X86Base_X64,
|
||||
X86_SSE42_X64 = InstructionSet_X86.SSE42_X64,
|
||||
X86_AVX_X64 = InstructionSet_X86.AVX_X64,
|
||||
X86_AVX2_X64 = InstructionSet_X86.AVX2_X64,
|
||||
X86_AVX512_X64 = InstructionSet_X86.AVX512_X64,
|
||||
|
@ -188,52 +184,50 @@ namespace Internal.JitInterface
|
|||
ILLEGAL = InstructionSet.ILLEGAL,
|
||||
NONE = InstructionSet.NONE,
|
||||
X86Base = 1,
|
||||
SSE42 = 2,
|
||||
AVX = 3,
|
||||
AVX2 = 4,
|
||||
AVX512 = 5,
|
||||
AVX512v2 = 6,
|
||||
AVX512v3 = 7,
|
||||
AVX10v1 = 8,
|
||||
AVX10v2 = 9,
|
||||
APX = 10,
|
||||
AES = 11,
|
||||
AES_V256 = 12,
|
||||
AES_V512 = 13,
|
||||
AVX512VP2INTERSECT = 14,
|
||||
AVXIFMA = 15,
|
||||
AVXVNNI = 16,
|
||||
GFNI = 17,
|
||||
GFNI_V256 = 18,
|
||||
GFNI_V512 = 19,
|
||||
SHA = 20,
|
||||
WAITPKG = 21,
|
||||
X86Serialize = 22,
|
||||
Vector128 = 23,
|
||||
Vector256 = 24,
|
||||
Vector512 = 25,
|
||||
VectorT128 = 26,
|
||||
VectorT256 = 27,
|
||||
VectorT512 = 28,
|
||||
AVXVNNIINT = 29,
|
||||
AVXVNNIINT_V512 = 30,
|
||||
X86Base_X64 = 31,
|
||||
SSE42_X64 = 32,
|
||||
AVX_X64 = 33,
|
||||
AVX2_X64 = 34,
|
||||
AVX512_X64 = 35,
|
||||
AVX512v2_X64 = 36,
|
||||
AVX512v3_X64 = 37,
|
||||
AVX10v1_X64 = 38,
|
||||
AVX10v2_X64 = 39,
|
||||
AES_X64 = 40,
|
||||
AVX512VP2INTERSECT_X64 = 41,
|
||||
AVXIFMA_X64 = 42,
|
||||
AVXVNNI_X64 = 43,
|
||||
GFNI_X64 = 44,
|
||||
SHA_X64 = 45,
|
||||
WAITPKG_X64 = 46,
|
||||
X86Serialize_X64 = 47,
|
||||
AVX = 2,
|
||||
AVX2 = 3,
|
||||
AVX512 = 4,
|
||||
AVX512v2 = 5,
|
||||
AVX512v3 = 6,
|
||||
AVX10v1 = 7,
|
||||
AVX10v2 = 8,
|
||||
APX = 9,
|
||||
AES = 10,
|
||||
AES_V256 = 11,
|
||||
AES_V512 = 12,
|
||||
AVX512VP2INTERSECT = 13,
|
||||
AVXIFMA = 14,
|
||||
AVXVNNI = 15,
|
||||
GFNI = 16,
|
||||
GFNI_V256 = 17,
|
||||
GFNI_V512 = 18,
|
||||
SHA = 19,
|
||||
WAITPKG = 20,
|
||||
X86Serialize = 21,
|
||||
Vector128 = 22,
|
||||
Vector256 = 23,
|
||||
Vector512 = 24,
|
||||
VectorT128 = 25,
|
||||
VectorT256 = 26,
|
||||
VectorT512 = 27,
|
||||
AVXVNNIINT = 28,
|
||||
AVXVNNIINT_V512 = 29,
|
||||
X86Base_X64 = 30,
|
||||
AVX_X64 = 31,
|
||||
AVX2_X64 = 32,
|
||||
AVX512_X64 = 33,
|
||||
AVX512v2_X64 = 34,
|
||||
AVX512v3_X64 = 35,
|
||||
AVX10v1_X64 = 36,
|
||||
AVX10v2_X64 = 37,
|
||||
AES_X64 = 38,
|
||||
AVX512VP2INTERSECT_X64 = 39,
|
||||
AVXIFMA_X64 = 40,
|
||||
AVXVNNI_X64 = 41,
|
||||
GFNI_X64 = 42,
|
||||
SHA_X64 = 43,
|
||||
WAITPKG_X64 = 44,
|
||||
X86Serialize_X64 = 45,
|
||||
}
|
||||
|
||||
public enum InstructionSet_X86
|
||||
|
@ -241,52 +235,50 @@ namespace Internal.JitInterface
|
|||
ILLEGAL = InstructionSet.ILLEGAL,
|
||||
NONE = InstructionSet.NONE,
|
||||
X86Base = 1,
|
||||
SSE42 = 2,
|
||||
AVX = 3,
|
||||
AVX2 = 4,
|
||||
AVX512 = 5,
|
||||
AVX512v2 = 6,
|
||||
AVX512v3 = 7,
|
||||
AVX10v1 = 8,
|
||||
AVX10v2 = 9,
|
||||
APX = 10,
|
||||
AES = 11,
|
||||
AES_V256 = 12,
|
||||
AES_V512 = 13,
|
||||
AVX512VP2INTERSECT = 14,
|
||||
AVXIFMA = 15,
|
||||
AVXVNNI = 16,
|
||||
GFNI = 17,
|
||||
GFNI_V256 = 18,
|
||||
GFNI_V512 = 19,
|
||||
SHA = 20,
|
||||
WAITPKG = 21,
|
||||
X86Serialize = 22,
|
||||
Vector128 = 23,
|
||||
Vector256 = 24,
|
||||
Vector512 = 25,
|
||||
VectorT128 = 26,
|
||||
VectorT256 = 27,
|
||||
VectorT512 = 28,
|
||||
AVXVNNIINT = 29,
|
||||
AVXVNNIINT_V512 = 30,
|
||||
X86Base_X64 = 31,
|
||||
SSE42_X64 = 32,
|
||||
AVX_X64 = 33,
|
||||
AVX2_X64 = 34,
|
||||
AVX512_X64 = 35,
|
||||
AVX512v2_X64 = 36,
|
||||
AVX512v3_X64 = 37,
|
||||
AVX10v1_X64 = 38,
|
||||
AVX10v2_X64 = 39,
|
||||
AES_X64 = 40,
|
||||
AVX512VP2INTERSECT_X64 = 41,
|
||||
AVXIFMA_X64 = 42,
|
||||
AVXVNNI_X64 = 43,
|
||||
GFNI_X64 = 44,
|
||||
SHA_X64 = 45,
|
||||
WAITPKG_X64 = 46,
|
||||
X86Serialize_X64 = 47,
|
||||
AVX = 2,
|
||||
AVX2 = 3,
|
||||
AVX512 = 4,
|
||||
AVX512v2 = 5,
|
||||
AVX512v3 = 6,
|
||||
AVX10v1 = 7,
|
||||
AVX10v2 = 8,
|
||||
APX = 9,
|
||||
AES = 10,
|
||||
AES_V256 = 11,
|
||||
AES_V512 = 12,
|
||||
AVX512VP2INTERSECT = 13,
|
||||
AVXIFMA = 14,
|
||||
AVXVNNI = 15,
|
||||
GFNI = 16,
|
||||
GFNI_V256 = 17,
|
||||
GFNI_V512 = 18,
|
||||
SHA = 19,
|
||||
WAITPKG = 20,
|
||||
X86Serialize = 21,
|
||||
Vector128 = 22,
|
||||
Vector256 = 23,
|
||||
Vector512 = 24,
|
||||
VectorT128 = 25,
|
||||
VectorT256 = 26,
|
||||
VectorT512 = 27,
|
||||
AVXVNNIINT = 28,
|
||||
AVXVNNIINT_V512 = 29,
|
||||
X86Base_X64 = 30,
|
||||
AVX_X64 = 31,
|
||||
AVX2_X64 = 32,
|
||||
AVX512_X64 = 33,
|
||||
AVX512v2_X64 = 34,
|
||||
AVX512v3_X64 = 35,
|
||||
AVX10v1_X64 = 36,
|
||||
AVX10v2_X64 = 37,
|
||||
AES_X64 = 38,
|
||||
AVX512VP2INTERSECT_X64 = 39,
|
||||
AVXIFMA_X64 = 40,
|
||||
AVXVNNI_X64 = 41,
|
||||
GFNI_X64 = 42,
|
||||
SHA_X64 = 43,
|
||||
WAITPKG_X64 = 44,
|
||||
X86Serialize_X64 = 45,
|
||||
}
|
||||
|
||||
public unsafe struct InstructionSetFlags : IEnumerable<InstructionSet>
|
||||
|
@ -525,10 +517,6 @@ namespace Internal.JitInterface
|
|||
resultflags.AddInstructionSet(InstructionSet.X64_X86Base_X64);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_SSE42_X64);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42_X64))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_AVX_X64);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX_X64))
|
||||
|
@ -589,10 +577,8 @@ namespace Internal.JitInterface
|
|||
resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512))
|
||||
|
@ -622,7 +608,7 @@ namespace Internal.JitInterface
|
|||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256))
|
||||
|
@ -656,10 +642,8 @@ namespace Internal.JitInterface
|
|||
break;
|
||||
|
||||
case TargetArchitecture.X86:
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512))
|
||||
|
@ -689,7 +673,7 @@ namespace Internal.JitInterface
|
|||
if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_GFNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256))
|
||||
|
@ -799,8 +783,6 @@ namespace Internal.JitInterface
|
|||
case TargetArchitecture.X64:
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42_X64))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX_X64))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2_X64))
|
||||
|
@ -832,8 +814,6 @@ namespace Internal.JitInterface
|
|||
if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_SSE42);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
|
||||
|
@ -863,7 +843,7 @@ namespace Internal.JitInterface
|
|||
resultflags.AddInstructionSet(InstructionSet.X64_AVXIFMA);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42))
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_GFNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI))
|
||||
resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256);
|
||||
|
@ -899,8 +879,6 @@ namespace Internal.JitInterface
|
|||
|
||||
case TargetArchitecture.X86:
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_SSE42);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_AVX);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
|
||||
|
@ -930,7 +908,7 @@ namespace Internal.JitInterface
|
|||
resultflags.AddInstructionSet(InstructionSet.X86_AVXIFMA);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42))
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_GFNI);
|
||||
if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI))
|
||||
resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256);
|
||||
|
@ -971,10 +949,8 @@ namespace Internal.JitInterface
|
|||
|
||||
private static Dictionary<(string, TargetArchitecture), string> AllInstructionSetGroups { get; } = new()
|
||||
{
|
||||
{ ("x86-64", TargetArchitecture.X64), "base" },
|
||||
{ ("x86-64", TargetArchitecture.X86), "base" },
|
||||
{ ("x86-64-v2", TargetArchitecture.X64), "x86-64 sse4.2" },
|
||||
{ ("x86-64-v2", TargetArchitecture.X86), "x86-64 sse4.2" },
|
||||
{ ("x86-64-v2", TargetArchitecture.X64), "base" },
|
||||
{ ("x86-64-v2", TargetArchitecture.X86), "base" },
|
||||
{ ("x86-64-v3", TargetArchitecture.X64), "x86-64-v2 avx2" },
|
||||
{ ("x86-64-v3", TargetArchitecture.X86), "x86-64-v2 avx2" },
|
||||
{ ("x86-64-v4", TargetArchitecture.X64), "x86-64-v3 avx512" },
|
||||
|
@ -1046,11 +1022,11 @@ namespace Internal.JitInterface
|
|||
yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse2", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Sse42", InstructionSet.X64_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Sse3", InstructionSet.X64_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Ssse3", InstructionSet.X64_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Sse41", InstructionSet.X64_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Popcnt", InstructionSet.X64_SSE42, true);
|
||||
yield return new InstructionSetInfo("base", "Sse42", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse3", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Ssse3", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse41", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Popcnt", InstructionSet.X64_X86Base, true);
|
||||
yield return new InstructionSetInfo("avx", "Avx", InstructionSet.X64_AVX, true);
|
||||
yield return new InstructionSetInfo("avx2", "Avx2", InstructionSet.X64_AVX2, true);
|
||||
yield return new InstructionSetInfo("avx2", "Bmi1", InstructionSet.X64_AVX2, true);
|
||||
|
@ -1119,11 +1095,11 @@ namespace Internal.JitInterface
|
|||
yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse2", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Sse42", InstructionSet.X86_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Sse3", InstructionSet.X86_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Ssse3", InstructionSet.X86_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Sse41", InstructionSet.X86_SSE42, true);
|
||||
yield return new InstructionSetInfo("sse4.2", "Popcnt", InstructionSet.X86_SSE42, true);
|
||||
yield return new InstructionSetInfo("base", "Sse42", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse3", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Ssse3", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Sse41", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("base", "Popcnt", InstructionSet.X86_X86Base, true);
|
||||
yield return new InstructionSetInfo("avx", "Avx", InstructionSet.X86_AVX, true);
|
||||
yield return new InstructionSetInfo("avx2", "Avx2", InstructionSet.X86_AVX2, true);
|
||||
yield return new InstructionSetInfo("avx2", "Bmi1", InstructionSet.X86_AVX2, true);
|
||||
|
@ -1224,8 +1200,6 @@ namespace Internal.JitInterface
|
|||
case TargetArchitecture.X64:
|
||||
if (HasInstructionSet(InstructionSet.X64_X86Base))
|
||||
AddInstructionSet(InstructionSet.X64_X86Base_X64);
|
||||
if (HasInstructionSet(InstructionSet.X64_SSE42))
|
||||
AddInstructionSet(InstructionSet.X64_SSE42_X64);
|
||||
if (HasInstructionSet(InstructionSet.X64_AVX))
|
||||
AddInstructionSet(InstructionSet.X64_AVX_X64);
|
||||
if (HasInstructionSet(InstructionSet.X64_AVX2))
|
||||
|
@ -1286,7 +1260,6 @@ namespace Internal.JitInterface
|
|||
|
||||
case TargetArchitecture.X64:
|
||||
AddInstructionSet(InstructionSet.X64_X86Base_X64);
|
||||
AddInstructionSet(InstructionSet.X64_SSE42_X64);
|
||||
AddInstructionSet(InstructionSet.X64_AVX_X64);
|
||||
AddInstructionSet(InstructionSet.X64_AVX2_X64);
|
||||
AddInstructionSet(InstructionSet.X64_AVX512_X64);
|
||||
|
@ -1306,7 +1279,6 @@ namespace Internal.JitInterface
|
|||
|
||||
case TargetArchitecture.X86:
|
||||
AddInstructionSet(InstructionSet.X86_X86Base_X64);
|
||||
AddInstructionSet(InstructionSet.X86_SSE42_X64);
|
||||
AddInstructionSet(InstructionSet.X86_AVX_X64);
|
||||
AddInstructionSet(InstructionSet.X86_AVX2_X64);
|
||||
AddInstructionSet(InstructionSet.X86_AVX512_X64);
|
||||
|
@ -1479,33 +1451,33 @@ namespace Internal.JitInterface
|
|||
|
||||
case "Sse42":
|
||||
if (nestedTypeName == "X64")
|
||||
{ return InstructionSet.X64_SSE42_X64; }
|
||||
{ return InstructionSet.X64_X86Base_X64; }
|
||||
else
|
||||
{ return InstructionSet.X64_SSE42; }
|
||||
{ return InstructionSet.X64_X86Base; }
|
||||
|
||||
case "Sse3":
|
||||
if (nestedTypeName == "X64")
|
||||
{ return InstructionSet.X64_SSE42_X64; }
|
||||
{ return InstructionSet.X64_X86Base_X64; }
|
||||
else
|
||||
{ return InstructionSet.X64_SSE42; }
|
||||
{ return InstructionSet.X64_X86Base; }
|
||||
|
||||
case "Ssse3":
|
||||
if (nestedTypeName == "X64")
|
||||
{ return InstructionSet.X64_SSE42_X64; }
|
||||
{ return InstructionSet.X64_X86Base_X64; }
|
||||
else
|
||||
{ return InstructionSet.X64_SSE42; }
|
||||
{ return InstructionSet.X64_X86Base; }
|
||||
|
||||
case "Sse41":
|
||||
if (nestedTypeName == "X64")
|
||||
{ return InstructionSet.X64_SSE42_X64; }
|
||||
{ return InstructionSet.X64_X86Base_X64; }
|
||||
else
|
||||
{ return InstructionSet.X64_SSE42; }
|
||||
{ return InstructionSet.X64_X86Base; }
|
||||
|
||||
case "Popcnt":
|
||||
if (nestedTypeName == "X64")
|
||||
{ return InstructionSet.X64_SSE42_X64; }
|
||||
{ return InstructionSet.X64_X86Base_X64; }
|
||||
else
|
||||
{ return InstructionSet.X64_SSE42; }
|
||||
{ return InstructionSet.X64_X86Base; }
|
||||
|
||||
case "Avx":
|
||||
if (nestedTypeName == "X64")
|
||||
|
@ -1800,19 +1772,19 @@ namespace Internal.JitInterface
|
|||
{ return InstructionSet.X86_X86Base; }
|
||||
|
||||
case "Sse42":
|
||||
{ return InstructionSet.X86_SSE42; }
|
||||
{ return InstructionSet.X86_X86Base; }
|
||||
|
||||
case "Sse3":
|
||||
{ return InstructionSet.X86_SSE42; }
|
||||
{ return InstructionSet.X86_X86Base; }
|
||||
|
||||
case "Ssse3":
|
||||
{ return InstructionSet.X86_SSE42; }
|
||||
{ return InstructionSet.X86_X86Base; }
|
||||
|
||||
case "Sse41":
|
||||
{ return InstructionSet.X86_SSE42; }
|
||||
{ return InstructionSet.X86_X86Base; }
|
||||
|
||||
case "Popcnt":
|
||||
{ return InstructionSet.X86_SSE42; }
|
||||
{ return InstructionSet.X86_X86Base; }
|
||||
|
||||
case "Avx":
|
||||
{ return InstructionSet.X86_AVX; }
|
||||
|
|
|
@ -32,11 +32,11 @@ instructionset ,X86 ,X86Base , ,22 ,X86Base
|
|||
instructionset ,X86 ,Sse , ,1 ,X86Base ,base
|
||||
instructionset ,X86 ,Sse2 , ,2 ,X86Base ,base
|
||||
|
||||
instructionset ,X86 ,Sse42 , ,6 ,SSE42 ,sse4.2
|
||||
instructionset ,X86 ,Sse3 , ,3 ,SSE42 ,sse4.2
|
||||
instructionset ,X86 ,Ssse3 , ,4 ,SSE42 ,sse4.2
|
||||
instructionset ,X86 ,Sse41 , ,5 ,SSE42 ,sse4.2
|
||||
instructionset ,X86 ,Popcnt , ,15 ,SSE42 ,sse4.2
|
||||
instructionset ,X86 ,Sse42 , ,6 ,X86Base ,base
|
||||
instructionset ,X86 ,Sse3 , ,3 ,X86Base ,base
|
||||
instructionset ,X86 ,Ssse3 , ,4 ,X86Base ,base
|
||||
instructionset ,X86 ,Sse41 , ,5 ,X86Base ,base
|
||||
instructionset ,X86 ,Popcnt , ,15 ,X86Base ,base
|
||||
|
||||
instructionset ,X86 ,Avx , ,7 ,AVX ,avx
|
||||
|
||||
|
@ -122,7 +122,6 @@ instructionset ,X86 ,AvxVnniInt16_V512 , ,63 ,AVXVNNI
|
|||
; 64-bit Instruction Sets
|
||||
|
||||
instructionset64bit,X86 ,X86Base
|
||||
instructionset64bit,X86 ,SSE42
|
||||
|
||||
instructionset64bit,X86 ,AVX
|
||||
instructionset64bit,X86 ,AVX2
|
||||
|
@ -153,9 +152,7 @@ vectorinstructionset,X86 ,Vector512
|
|||
|
||||
; Implications
|
||||
|
||||
implication ,X86 ,SSE42 ,X86Base
|
||||
|
||||
implication ,X86 ,AVX ,SSE42
|
||||
implication ,X86 ,AVX ,X86Base
|
||||
implication ,X86 ,AVX2 ,AVX
|
||||
|
||||
implication ,X86 ,AVX512 ,AVX2
|
||||
|
@ -175,7 +172,7 @@ implication ,X86 ,AVX512VP2INTERSECT ,AVX512
|
|||
implication ,X86 ,AVXIFMA ,AVX2
|
||||
implication ,X86 ,AVXVNNI ,AVX2
|
||||
|
||||
implication ,X86 ,GFNI ,SSE42
|
||||
implication ,X86 ,GFNI ,X86Base
|
||||
implication ,X86 ,GFNI_V256 ,GFNI
|
||||
implication ,X86 ,GFNI_V256 ,AVX
|
||||
implication ,X86 ,GFNI_V512 ,GFNI_V256
|
||||
|
@ -264,8 +261,7 @@ implication ,RiscV64 ,Zbb ,RiscV64Base
|
|||
implication ,RiscV64 ,Zba ,RiscV64Base
|
||||
|
||||
; ,name and aliases ,archs ,lower baselines included by implication
|
||||
instructionsetgroup ,x86-64 ,X64 X86 ,base
|
||||
instructionsetgroup ,x86-64-v2 ,X64 X86 ,x86-64 sse4.2
|
||||
instructionsetgroup ,x86-64-v2 ,X64 X86 ,base
|
||||
instructionsetgroup ,x86-64-v3 ,X64 X86 ,x86-64-v2 avx2
|
||||
instructionsetgroup ,x86-64-v4 ,X64 X86 ,x86-64-v3 avx512
|
||||
|
||||
|
|
|
@ -108,7 +108,8 @@ namespace ILCompiler
|
|||
TargetOS targetOS = Get(_command.TargetOS);
|
||||
InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), isVectorTOptimistic, targetArchitecture, targetOS,
|
||||
"Unrecognized instruction set {0}", "Unsupported combination of instruction sets: {0}/{1}", logger,
|
||||
optimizingForSize: _command.OptimizationMode == OptimizationMode.PreferSize);
|
||||
optimizingForSize: _command.OptimizationMode == OptimizationMode.PreferSize,
|
||||
isReadyToRun: false);
|
||||
|
||||
string systemModuleName = Get(_command.SystemModuleName);
|
||||
string reflectionData = Get(_command.ReflectionData);
|
||||
|
|
|
@ -86,7 +86,9 @@ namespace ILCompiler
|
|||
TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture);
|
||||
TargetOS targetOS = Get(_command.TargetOS);
|
||||
InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), isVectorTOptimistic, targetArchitecture, targetOS,
|
||||
SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication, logger);
|
||||
SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication, logger,
|
||||
optimizingForSize: _command.OptimizationMode == OptimizationMode.PreferSize,
|
||||
isReadyToRun: true);
|
||||
SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes;
|
||||
var targetDetails = new TargetDetails(targetArchitecture, targetOS, Crossgen2RootCommand.IsArmel ? TargetAbi.NativeAotArmel : TargetAbi.NativeAot, instructionSetSupport.GetVectorTSimdVector());
|
||||
|
||||
|
|
|
@ -1180,6 +1180,19 @@ void EEJitManager::SetCpuInfo()
|
|||
|
||||
int cpuFeatures = minipal_getcpufeatures();
|
||||
|
||||
if ((cpuFeatures & IntrinsicConstants_Invalid) != 0)
|
||||
{
|
||||
#if defined(TARGET_X86) || defined(TARGET_AMD64)
|
||||
EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("\nThe current CPU is missing one or more of the following instruction sets: SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT\n"));
|
||||
#elif defined(TARGET_ARM64) && (defined(TARGET_WINDOWS) || defined(TARGET_APPLE))
|
||||
EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("\nThe current CPU is missing one or more of the following instruction sets: AdvSimd, LSE\n"));
|
||||
#elif defined(TARGET_ARM64)
|
||||
EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("\nThe current CPU is missing one or more of the following instruction sets: AdvSimd\n"));
|
||||
#else
|
||||
EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("\nThe current CPU is missing one or more of the baseline instruction sets.\n"));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Get the maximum bitwidth of Vector<T>, rounding down to the nearest multiple of 128-bits
|
||||
uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128;
|
||||
|
||||
|
@ -1198,20 +1211,13 @@ void EEJitManager::SetCpuInfo()
|
|||
CPUCompileFlags.Set(InstructionSet_VectorT512);
|
||||
}
|
||||
|
||||
// x86-64-v1
|
||||
// x86-64-v2
|
||||
|
||||
if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
|
||||
{
|
||||
CPUCompileFlags.Set(InstructionSet_X86Base);
|
||||
}
|
||||
|
||||
// x86-64-v2
|
||||
|
||||
if (((cpuFeatures & XArchIntrinsicConstants_Sse42) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE42))
|
||||
{
|
||||
CPUCompileFlags.Set(InstructionSet_SSE42);
|
||||
}
|
||||
|
||||
// x86-64-v3
|
||||
|
||||
if (((cpuFeatures & XArchIntrinsicConstants_Avx) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX))
|
||||
|
@ -6317,7 +6323,7 @@ size_t ReadyToRunJitManager::WalkILOffsets(
|
|||
BoundsType boundsType,
|
||||
void* pContext,
|
||||
size_t (* pfnWalkILOffsets)(ICorDebugInfo::OffsetMapping *pOffsetMapping, void *pContext))
|
||||
{
|
||||
{
|
||||
CONTRACTL {
|
||||
THROWS; // on OOM.
|
||||
GC_NOTRIGGER; // getting vars shouldn't trigger
|
||||
|
|
|
@ -238,8 +238,17 @@ int minipal_getcpufeatures(void)
|
|||
bool hasAvx2Dependencies = false;
|
||||
bool hasAvx10v1Dependencies = false;
|
||||
|
||||
assert((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0); // SSE
|
||||
assert((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0); // SSE2
|
||||
if (((cpuidInfo[CPUID_EDX] & (1 << 25)) == 0) || // SSE
|
||||
((cpuidInfo[CPUID_EDX] & (1 << 26)) == 0) || // SSE2
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 0)) == 0) || // SSE3
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 9)) == 0) || // SSSE3
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 19)) == 0) || // SSE4.1
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 20)) == 0) || // SSE4.2
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 23)) == 0)) // POPCNT
|
||||
{
|
||||
// One of the baseline ISAs is not supported
|
||||
result |= IntrinsicConstants_Invalid;
|
||||
}
|
||||
|
||||
if (((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) && // AESNI
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0)) // PCLMULQDQ
|
||||
|
@ -247,27 +256,18 @@ int minipal_getcpufeatures(void)
|
|||
result |= XArchIntrinsicConstants_Aes;
|
||||
}
|
||||
|
||||
if (((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) && // SSE3
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) && // SSSE3
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) && // SSE4.1
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) && // SSE4.2
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0)) // POPCNT
|
||||
if (((cpuidInfo[CPUID_ECX] & (1 << 27)) != 0) && // OSXSAVE
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 28)) != 0)) // AVX
|
||||
{
|
||||
result |= XArchIntrinsicConstants_Sse42;
|
||||
|
||||
if (((cpuidInfo[CPUID_ECX] & (1 << 27)) != 0) && // OSXSAVE
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 28)) != 0)) // AVX
|
||||
if (IsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11
|
||||
{
|
||||
if (IsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11
|
||||
{
|
||||
result |= XArchIntrinsicConstants_Avx;
|
||||
result |= XArchIntrinsicConstants_Avx;
|
||||
|
||||
if (((cpuidInfo[CPUID_ECX] & (1 << 29)) != 0) && // F16C
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) && // FMA
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0)) // MOVBE
|
||||
{
|
||||
hasAvx2Dependencies = true;
|
||||
}
|
||||
if (((cpuidInfo[CPUID_ECX] & (1 << 29)) != 0) && // F16C
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) && // FMA
|
||||
((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0)) // MOVBE
|
||||
{
|
||||
hasAvx2Dependencies = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -455,14 +455,18 @@ int minipal_getcpufeatures(void)
|
|||
#if HAVE_AUXV_HWCAP_H
|
||||
unsigned long hwCap = getauxval(AT_HWCAP);
|
||||
|
||||
assert(hwCap & HWCAP_ASIMD);
|
||||
if ((hwCap & HWCAP_ASIMD) == 0)
|
||||
{
|
||||
// One of the baseline ISAs is not supported
|
||||
result |= IntrinsicConstants_Invalid;
|
||||
}
|
||||
|
||||
if ((hwCap & HWCAP_ATOMICS) != 0)
|
||||
result |= ARM64IntrinsicConstants_Atomics;
|
||||
|
||||
if (hwCap & HWCAP_AES)
|
||||
result |= ARM64IntrinsicConstants_Aes;
|
||||
|
||||
if (hwCap & HWCAP_ATOMICS)
|
||||
result |= ARM64IntrinsicConstants_Atomics;
|
||||
|
||||
if (hwCap & HWCAP_CRC32)
|
||||
result |= ARM64IntrinsicConstants_Crc32;
|
||||
|
||||
|
@ -498,6 +502,17 @@ int minipal_getcpufeatures(void)
|
|||
int64_t valueFromSysctl = 0;
|
||||
size_t sz = sizeof(valueFromSysctl);
|
||||
|
||||
if ((sysctlbyname("hw.optional.AdvSIMD", &valueFromSysctl, &sz, NULL, 0) != 0) || (valueFromSysctl == 0) ||
|
||||
(sysctlbyname("hw.optional.arm.FEAT_LSE", &valueFromSysctl, &sz, NULL, 0) != 0) || (valueFromSysctl == 0))
|
||||
{
|
||||
// One of the baseline ISAs is not supported
|
||||
result |= IntrinsicConstants_Invalid;
|
||||
}
|
||||
else
|
||||
{
|
||||
result |= ARM64IntrinsicConstants_Atomics;
|
||||
}
|
||||
|
||||
if ((sysctlbyname("hw.optional.arm.FEAT_AES", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0))
|
||||
result |= ARM64IntrinsicConstants_Aes;
|
||||
|
||||
|
@ -516,9 +531,6 @@ int minipal_getcpufeatures(void)
|
|||
if ((sysctlbyname("hw.optional.arm.FEAT_SHA256", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0))
|
||||
result |= ARM64IntrinsicConstants_Sha256;
|
||||
|
||||
if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0))
|
||||
result |= ARM64IntrinsicConstants_Atomics;
|
||||
|
||||
if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0))
|
||||
result |= ARM64IntrinsicConstants_Rcpc;
|
||||
|
||||
|
@ -529,6 +541,17 @@ int minipal_getcpufeatures(void)
|
|||
#endif // HOST_UNIX
|
||||
|
||||
#if defined(HOST_WINDOWS)
|
||||
if (!IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE) ||
|
||||
!IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
// One of the baseline ISAs is not supported
|
||||
result |= IntrinsicConstants_Invalid;
|
||||
}
|
||||
else
|
||||
{
|
||||
result |= ARM64IntrinsicConstants_Atomics;
|
||||
}
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
result |= ARM64IntrinsicConstants_Aes;
|
||||
|
@ -541,11 +564,6 @@ int minipal_getcpufeatures(void)
|
|||
result |= ARM64IntrinsicConstants_Crc32;
|
||||
}
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
result |= ARM64IntrinsicConstants_Atomics;
|
||||
}
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
result |= ARM64IntrinsicConstants_Dp;
|
||||
|
@ -578,7 +596,6 @@ int minipal_getcpufeatures(void)
|
|||
{
|
||||
result |= ARM64IntrinsicConstants_Sve2;
|
||||
}
|
||||
|
||||
#endif // HOST_WINDOWS
|
||||
|
||||
#endif // HOST_ARM64
|
||||
|
|
|
@ -8,28 +8,28 @@
|
|||
// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.cs
|
||||
//
|
||||
|
||||
// Reserve the last bit to indicate an invalid query, such as if a baseline ISA isn't supported
|
||||
#define IntrinsicConstants_Invalid (1 << 31)
|
||||
|
||||
#if defined(HOST_X86) || defined(HOST_AMD64)
|
||||
#define XArchIntrinsicConstants_Sse42 (1 << 0)
|
||||
#define XArchIntrinsicConstants_Avx (1 << 1)
|
||||
#define XArchIntrinsicConstants_Avx2 (1 << 2)
|
||||
#define XArchIntrinsicConstants_Avx512 (1 << 3)
|
||||
|
||||
#define XArchIntrinsicConstants_Avx512v2 (1 << 4)
|
||||
#define XArchIntrinsicConstants_Avx512v3 (1 << 5)
|
||||
#define XArchIntrinsicConstants_Avx10v1 (1 << 6)
|
||||
#define XArchIntrinsicConstants_Avx10v2 (1 << 7)
|
||||
#define XArchIntrinsicConstants_Apx (1 << 8)
|
||||
|
||||
#define XArchIntrinsicConstants_Aes (1 << 9)
|
||||
#define XArchIntrinsicConstants_Avx512Vp2intersect (1 << 10)
|
||||
#define XArchIntrinsicConstants_AvxIfma (1 << 11)
|
||||
#define XArchIntrinsicConstants_AvxVnni (1 << 12)
|
||||
#define XArchIntrinsicConstants_Avx (1 << 0)
|
||||
#define XArchIntrinsicConstants_Avx2 (1 << 1)
|
||||
#define XArchIntrinsicConstants_Avx512 (1 << 2)
|
||||
#define XArchIntrinsicConstants_Avx512v2 (1 << 3)
|
||||
#define XArchIntrinsicConstants_Avx512v3 (1 << 4)
|
||||
#define XArchIntrinsicConstants_Avx10v1 (1 << 5)
|
||||
#define XArchIntrinsicConstants_Avx10v2 (1 << 6)
|
||||
#define XArchIntrinsicConstants_Apx (1 << 7)
|
||||
#define XArchIntrinsicConstants_Aes (1 << 8)
|
||||
#define XArchIntrinsicConstants_Avx512Vp2intersect (1 << 9)
|
||||
#define XArchIntrinsicConstants_AvxIfma (1 << 10)
|
||||
#define XArchIntrinsicConstants_AvxVnni (1 << 11)
|
||||
#define XArchIntrinsicConstants_AvxVnniInt (1 << 12)
|
||||
#define XArchIntrinsicConstants_Gfni (1 << 13)
|
||||
#define XArchIntrinsicConstants_Sha (1 << 14)
|
||||
#define XArchIntrinsicConstants_Vaes (1 << 15)
|
||||
#define XArchIntrinsicConstants_WaitPkg (1 << 16)
|
||||
#define XArchIntrinsicConstants_X86Serialize (1 << 17)
|
||||
#define XArchIntrinsicConstants_AvxVnniInt (1 << 18)
|
||||
#endif // HOST_X86 || HOST_AMD64
|
||||
|
||||
#if defined(HOST_ARM64)
|
||||
|
@ -50,7 +50,6 @@
|
|||
// Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions
|
||||
#define ARM64_ATOMICS_FEATURE_FLAG_BIT 6
|
||||
static_assert((1 << ARM64_ATOMICS_FEATURE_FLAG_BIT) == ARM64IntrinsicConstants_Atomics, "ARM64_ATOMICS_FEATURE_FLAG_BIT must match with ARM64IntrinsicConstants_Atomics");
|
||||
|
||||
#endif // HOST_ARM64
|
||||
|
||||
#if defined(HOST_RISCV64)
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
DOTNET_EnableAVX2;
|
||||
DOTNET_EnableAVX512;
|
||||
DOTNET_EnableHWIntrinsic;
|
||||
DOTNET_EnableSSE42;
|
||||
DOTNET_EnableAPX;
|
||||
DOTNET_JitStressEvexEncoding;
|
||||
DOTNET_PreferredVectorBitWidth;
|
||||
|
@ -103,26 +102,23 @@
|
|||
<TestEnvironment Include="jitstress2_tiered" JitStress="2" TieredCompilation="1" />
|
||||
<TestEnvironment Include="jitstress_isas_nohwintrinsic" EnableHWIntrinsic="0" />
|
||||
<TestEnvironment Include="jitstress_isas_x86_evex" JitStressEvexEncoding="1" PreferredVectorBitWidth="512" />
|
||||
<TestEnvironment Include="jitstress_isas_x86_noavx" EnableAVX="0" /> <!-- Depends on SSE42 -->
|
||||
<TestEnvironment Include="jitstress_isas_x86_noavx" EnableAVX="0" /> <!-- Depends on Baseline -->
|
||||
<TestEnvironment Include="jitstress_isas_x86_noavx2" EnableAVX2="0" /> <!-- Depends on AVX -->
|
||||
<TestEnvironment Include="jitstress_isas_x86_noavx512" EnableAVX512="0" /> <!-- Depends on AVX2 -->
|
||||
<TestEnvironment Include="jitstress_isas_x86_nosse3" EnableSSE42="0" /> <!-- Depends on Baseline -->
|
||||
<TestEnvironment Include="jitstress_isas_x86_vectort128" JitStressEvexEncoding="1" MaxVectorTBitWidth="128" />
|
||||
<TestEnvironment Include="jitstress_isas_x86_vectort512" JitStressEvexEncoding="1" PreferredVectorBitWidth="512" MaxVectorTBitWidth="512" />
|
||||
<TestEnvironment Include="jitstress_isas_x86_noavx512_vectort128" EnableAVX512="0" MaxVectorTBitWidth="128" />
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_evex" JitStress="1" JitStressEvexEncoding="1" PreferredVectorBitWidth="512" />
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_noavx" JitStress="1" EnableAVX="0" /> <!-- Depends on SSE42 -->
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_noavx" JitStress="1" EnableAVX="0" /> <!-- Depends on Baseline -->
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_noavx2" JitStress="1" EnableAVX2="0" /> <!-- Depends on AVX -->
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_noavx512" JitStress="1" EnableAVX512="0" /> <!-- Depends on AVX2 -->
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_nosse3" JitStress="1" EnableSSE42="0" /> <!-- Depends on Baseline -->
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_vectort128" JitStress="1" JitStressEvexEncoding="1" MaxVectorTBitWidth="128" />
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_vectort512" JitStress="1" JitStressEvexEncoding="1" PreferredVectorBitWidth="512" MaxVectorTBitWidth="512" />
|
||||
<TestEnvironment Include="jitstress_isas_1_x86_noavx512_vectort128" JitStress="1" EnableAVX512="0" MaxVectorTBitWidth="128" />
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_evex" JitStress="2" JitStressEvexEncoding="1" PreferredVectorBitWidth="512" />
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_noavx" JitStress="2" EnableAVX="0" /> <!-- Depends on SSE42 -->
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_noavx" JitStress="2" EnableAVX="0" /> <!-- Depends on Baseline -->
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_noavx2" JitStress="2" EnableAVX2="0" /> <!-- Depends on AVX -->
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_noavx512" JitStress="2" EnableAVX512="0" /> <!-- Depends on AVX2 -->
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_nosse3" JitStress="2" EnableSSE42="0" /> <!-- Depends on Baseline -->
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_vectort128" JitStress="2" JitStressEvexEncoding="1" MaxVectorTBitWidth="128" />
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_vectort512" JitStress="2" JitStressEvexEncoding="1" PreferredVectorBitWidth="512" MaxVectorTBitWidth="512" />
|
||||
<TestEnvironment Include="jitstress_isas_2_x86_noavx512_vectort128" JitStress="2" EnableAVX512="0" MaxVectorTBitWidth="128" />
|
||||
|
|
|
@ -70,7 +70,7 @@ namespace XarchHardwareIntrinsicTest._CpuId
|
|||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
// SSE, SSE2 are paired
|
||||
// SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT are paired
|
||||
|
||||
if (IsBitIncorrect(edx, 25, typeof(Sse), Sse.IsSupported, "HWIntrinsic", ref isHierarchyDisabled))
|
||||
{
|
||||
|
@ -81,6 +81,31 @@ namespace XarchHardwareIntrinsicTest._CpuId
|
|||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 0, typeof(Sse3), Sse3.IsSupported, "HWIntrinsic", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 9, typeof(Ssse3), Ssse3.IsSupported, "HWIntrinsic", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 19, typeof(Sse41), Sse41.IsSupported, "HWIntrinsic", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 20, typeof(Sse42), Sse42.IsSupported, "HWIntrinsic", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 23, typeof(Popcnt), Popcnt.IsSupported, "HWIntrinsic", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
}
|
||||
|
||||
bool isBaselineHierarchyDisabled = isHierarchyDisabled;
|
||||
|
@ -100,40 +125,6 @@ namespace XarchHardwareIntrinsicTest._CpuId
|
|||
}
|
||||
}
|
||||
|
||||
isHierarchyDisabled = isBaselineHierarchyDisabled;
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
// SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT are paired
|
||||
|
||||
if (IsBitIncorrect(ecx, 0, typeof(Sse3), Sse3.IsSupported, "SSE42", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 9, typeof(Ssse3), Ssse3.IsSupported, "SSE42", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 19, typeof(Sse41), Sse41.IsSupported, "SSE42", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 20, typeof(Sse42), Sse42.IsSupported, "SSE42", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
|
||||
if (IsBitIncorrect(ecx, 23, typeof(Popcnt), Popcnt.IsSupported, "SSE42", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
}
|
||||
}
|
||||
|
||||
bool isSse42HierarchyDisabled = isHierarchyDisabled;
|
||||
|
||||
if (IsBitIncorrect(ecx, 28, typeof(Avx), Avx.IsSupported, "AVX", ref isHierarchyDisabled))
|
||||
{
|
||||
testResult = Fail;
|
||||
|
@ -280,7 +271,7 @@ namespace XarchHardwareIntrinsicTest._CpuId
|
|||
testResult = Fail;
|
||||
}
|
||||
|
||||
isHierarchyDisabled = isSse42HierarchyDisabled;
|
||||
isHierarchyDisabled = isBaselineHierarchyDisabled;
|
||||
|
||||
if (IsBitIncorrect(ecx, 8, typeof(Gfni), Gfni.IsSupported, "GFNI", ref isHierarchyDisabled))
|
||||
{
|
||||
|
|
|
@ -48,50 +48,6 @@ unsafe class Program
|
|||
bool? ExpectedSse2 = true;
|
||||
|
||||
#if BASELINE_INTRINSICS
|
||||
bool? ExpectedSse3 = null;
|
||||
bool? ExpectedSsse3 = null;
|
||||
bool? ExpectedSse41 = null;
|
||||
bool? ExpectedSse42 = null;
|
||||
bool? ExpectedPopcnt = null;
|
||||
bool? ExpectedAes = null;
|
||||
bool? ExpectedPclmulqdq = null;
|
||||
bool? ExpectedGfni = null;
|
||||
bool? ExpectedSha = null;
|
||||
bool? ExpectedWaitPkg = null;
|
||||
bool? ExpectedX86Serialize = null;
|
||||
|
||||
bool? ExpectedAvx = false;
|
||||
bool? ExpectedAvx2 = false;
|
||||
bool? ExpectedBmi1 = false;
|
||||
bool? ExpectedBmi2 = false;
|
||||
bool? ExpectedF16c = false;
|
||||
bool? ExpectedFma = false;
|
||||
bool? ExpectedLzcnt = false;
|
||||
bool? ExpectedAvx512F = false;
|
||||
bool? ExpectedAvx512BW = false;
|
||||
bool? ExpectedAvx512CD = false;
|
||||
bool? ExpectedAvx512DQ = false;
|
||||
bool? ExpectedAvx512Vbmi = false;
|
||||
bool? ExpectedAvx512Bitalg = false;
|
||||
bool? ExpectedAvx512Vbmi2 = false;
|
||||
bool? ExpectedAvx512Vpopcntdq = false;
|
||||
bool? ExpectedAvx512Bf16 = false;
|
||||
bool? ExpectedAvx512Fp16 = false;
|
||||
bool? ExpectedAvx10v1 = false;
|
||||
bool? ExpectedAvx10v1V512 = false;
|
||||
bool? ExpectedAvx10v2 = false;
|
||||
bool? ExpectedAvx512Vp2intersect = false;
|
||||
bool? ExpectedAvxIfma = false;
|
||||
bool? ExpectedAvxVnni = false;
|
||||
bool? ExpectedAvxVnniInt = false;
|
||||
bool? ExpectedAvxVnniIntV512 = false;
|
||||
bool? ExpectedGfniV256 = false;
|
||||
bool? ExpectedGfniV512 = false;
|
||||
bool? ExpectedAesV256 = false;
|
||||
bool? ExpectedAesV512 = false;
|
||||
bool? ExpectedPclmulqdqV256 = false;
|
||||
bool? ExpectedPclmulqdqV512 = false;
|
||||
#elif SSE42_INTRINSICS
|
||||
bool? ExpectedSse3 = true;
|
||||
bool? ExpectedSsse3 = true;
|
||||
bool? ExpectedSse41 = true;
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<CLRTestPriority>0</CLRTestPriority>
|
||||
<CLRTestTargetUnsupported Condition="'$(TargetArchitecture)' != 'x64'">true</CLRTestTargetUnsupported>
|
||||
<!-- Sanitizers increase the binary size, so it ends up outside of our expected range. -->
|
||||
<CLRTestTargetUnsupported Condition="'$(EnableNativeSanitizers)' != ''">true</CLRTestTargetUnsupported>
|
||||
<!-- Test infra issue on apple devices: https://github.com/dotnet/runtime/issues/89917 -->
|
||||
<CLRTestTargetUnsupported Condition="'$(TargetsAppleMobile)' == 'true'">true</CLRTestTargetUnsupported>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
<DefineConstants>$(DefineConstants);SSE42_INTRINSICS;VECTORT128_INTRINSICS</DefineConstants>
|
||||
<RequiresProcessIsolation>true</RequiresProcessIsolation>
|
||||
<ReferenceXUnitWrapperGenerator>false</ReferenceXUnitWrapperGenerator>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<IlcArg Include="--instruction-set:sse4.2" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Include="Program.cs" />
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -89,11 +89,16 @@ class TestHardwareIntrinsics
|
|||
public static bool IsAvxVnniSupported = AvxVnni.IsSupported;
|
||||
}
|
||||
|
||||
class Complex
|
||||
class Simple3
|
||||
{
|
||||
public static bool IsPopcntSupported = Popcnt.IsSupported;
|
||||
}
|
||||
|
||||
class Complex
|
||||
{
|
||||
public static bool IsX86SerializeSupported = X86Serialize.IsSupported;
|
||||
}
|
||||
|
||||
public static void Run()
|
||||
{
|
||||
Assert.IsPreinitialized(typeof(Simple1));
|
||||
|
@ -102,11 +107,14 @@ class TestHardwareIntrinsics
|
|||
Assert.IsPreinitialized(typeof(Simple2));
|
||||
Assert.AreEqual(AvxVnni.IsSupported, Simple2.IsAvxVnniSupported);
|
||||
|
||||
Assert.IsPreinitialized(typeof(Simple3));
|
||||
Assert.AreEqual(Popcnt.IsSupported, Simple3.IsPopcntSupported);
|
||||
|
||||
if (RuntimeInformation.ProcessArchitecture is Architecture.X86 or Architecture.X64)
|
||||
Assert.IsLazyInitialized(typeof(Complex));
|
||||
else
|
||||
Assert.IsPreinitialized(typeof(Complex));
|
||||
Assert.AreEqual(Popcnt.IsSupported, Complex.IsPopcntSupported);
|
||||
Assert.AreEqual(X86Serialize.IsSupported, Complex.IsX86SerializeSupported);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<!-- Needed for CLRTestTargetUnsupported, IlasmRoundTripIncompatible, NativeAotIncompatible -->
|
||||
<RequiresProcessIsolation>true</RequiresProcessIsolation>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
<CLRTestTargetUnsupported Condition="('$(TargetArchitecture)' != 'x64' AND '$(TargetArchitecture)' != 'x86') OR ('$(RuntimeFlavor)' != 'coreclr')">true</CLRTestTargetUnsupported>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
<AlwaysUseCrossGen2>true</AlwaysUseCrossGen2>
|
||||
<IlasmRoundTripIncompatible>true</IlasmRoundTripIncompatible>
|
||||
<NativeAotIncompatible>true</NativeAotIncompatible>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
<CrossGen2TestExtraArguments>$(CrossGen2TestExtraArguments) --instruction-set:sse4.2</CrossGen2TestExtraArguments>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Include="../../../JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs" />
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue