forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			616 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			616 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			C
		
	
	
	
| //===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
 | |
| //
 | |
| //                     The LLVM Compiler Infrastructure
 | |
| //
 | |
| // This file is distributed under the University of Illinois Open Source
 | |
| // License. See LICENSE.TXT for details.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| //  This file is based on LLVM's lib/Support/Host.cpp.
 | |
| //  It implements the operating system Host concept and builtin
 | |
| //  __cpu_model for the compiler_rt library, for x86 only.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #if (defined(__i386__) || defined(_M_IX86) || \
 | |
|      defined(__x86_64__) || defined(_M_X64)) && \
 | |
|     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
 | |
| 
 | |
| #include <assert.h>
 | |
| 
 | |
| #define bool int
 | |
| #define true 1
 | |
| #define false 0
 | |
| 
 | |
| #ifdef _MSC_VER
 | |
| #include <intrin.h>
 | |
| #endif
 | |
| 
 | |
| #ifndef __has_attribute
 | |
| #define __has_attribute(attr) 0
 | |
| #endif
 | |
| 
 | |
| enum VendorSignatures {
 | |
|   SIG_INTEL = 0x756e6547 /* Genu */,
 | |
|   SIG_AMD = 0x68747541 /* Auth */
 | |
| };
 | |
| 
 | |
| enum ProcessorVendors {
 | |
|   VENDOR_INTEL = 1,
 | |
|   VENDOR_AMD,
 | |
|   VENDOR_OTHER,
 | |
|   VENDOR_MAX
 | |
| };
 | |
| 
 | |
| enum ProcessorTypes {
 | |
|   INTEL_BONNELL = 1,
 | |
|   INTEL_CORE2,
 | |
|   INTEL_COREI7,
 | |
|   AMDFAM10H,
 | |
|   AMDFAM15H,
 | |
|   INTEL_SILVERMONT,
 | |
|   INTEL_KNL,
 | |
|   AMD_BTVER1,
 | |
|   AMD_BTVER2,
 | |
|   AMDFAM17H,
 | |
|   CPU_TYPE_MAX
 | |
| };
 | |
| 
 | |
| enum ProcessorSubtypes {
 | |
|   INTEL_COREI7_NEHALEM = 1,
 | |
|   INTEL_COREI7_WESTMERE,
 | |
|   INTEL_COREI7_SANDYBRIDGE,
 | |
|   AMDFAM10H_BARCELONA,
 | |
|   AMDFAM10H_SHANGHAI,
 | |
|   AMDFAM10H_ISTANBUL,
 | |
|   AMDFAM15H_BDVER1,
 | |
|   AMDFAM15H_BDVER2,
 | |
|   AMDFAM15H_BDVER3,
 | |
|   AMDFAM15H_BDVER4,
 | |
|   AMDFAM17H_ZNVER1,
 | |
|   INTEL_COREI7_IVYBRIDGE,
 | |
|   INTEL_COREI7_HASWELL,
 | |
|   INTEL_COREI7_BROADWELL,
 | |
|   INTEL_COREI7_SKYLAKE,
 | |
|   INTEL_COREI7_SKYLAKE_AVX512,
 | |
|   CPU_SUBTYPE_MAX
 | |
| };
 | |
| 
 | |
| enum ProcessorFeatures {
 | |
|   FEATURE_CMOV = 0,
 | |
|   FEATURE_MMX,
 | |
|   FEATURE_POPCNT,
 | |
|   FEATURE_SSE,
 | |
|   FEATURE_SSE2,
 | |
|   FEATURE_SSE3,
 | |
|   FEATURE_SSSE3,
 | |
|   FEATURE_SSE4_1,
 | |
|   FEATURE_SSE4_2,
 | |
|   FEATURE_AVX,
 | |
|   FEATURE_AVX2,
 | |
|   FEATURE_SSE4_A,
 | |
|   FEATURE_FMA4,
 | |
|   FEATURE_XOP,
 | |
|   FEATURE_FMA,
 | |
|   FEATURE_AVX512F,
 | |
|   FEATURE_BMI,
 | |
|   FEATURE_BMI2,
 | |
|   FEATURE_AES,
 | |
|   FEATURE_PCLMUL,
 | |
|   FEATURE_AVX512VL,
 | |
|   FEATURE_AVX512BW,
 | |
|   FEATURE_AVX512DQ,
 | |
|   FEATURE_AVX512CD,
 | |
|   FEATURE_AVX512ER,
 | |
|   FEATURE_AVX512PF,
 | |
|   FEATURE_AVX512VBMI,
 | |
|   FEATURE_AVX512IFMA,
 | |
|   FEATURE_AVX5124VNNIW,
 | |
|   FEATURE_AVX5124FMAPS,
 | |
|   FEATURE_AVX512VPOPCNTDQ
 | |
| };
 | |
| 
 | |
| // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
 | |
| // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
 | |
| // support. Consequently, for i386, the presence of CPUID is checked first
 | |
| // via the corresponding eflags bit.
 | |
| static bool isCpuIdSupported() {
 | |
| #if defined(__GNUC__) || defined(__clang__)
 | |
| #if defined(__i386__)
 | |
|   int __cpuid_supported;
 | |
|   __asm__("  pushfl\n"
 | |
|           "  popl   %%eax\n"
 | |
|           "  movl   %%eax,%%ecx\n"
 | |
|           "  xorl   $0x00200000,%%eax\n"
 | |
|           "  pushl  %%eax\n"
 | |
|           "  popfl\n"
 | |
|           "  pushfl\n"
 | |
|           "  popl   %%eax\n"
 | |
|           "  movl   $0,%0\n"
 | |
|           "  cmpl   %%eax,%%ecx\n"
 | |
|           "  je     1f\n"
 | |
|           "  movl   $1,%0\n"
 | |
|           "1:"
 | |
|           : "=r"(__cpuid_supported)
 | |
|           :
 | |
|           : "eax", "ecx");
 | |
|   if (!__cpuid_supported)
 | |
|     return false;
 | |
| #endif
 | |
|   return true;
 | |
| #endif
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| // This code is copied from lib/Support/Host.cpp.
 | |
| // Changes to either file should be mirrored in the other.
 | |
| 
 | |
| /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
 | |
| /// the specified arguments.  If we can't run cpuid on the host, return true.
 | |
| static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
 | |
|                                unsigned *rECX, unsigned *rEDX) {
 | |
| #if defined(__GNUC__) || defined(__clang__)
 | |
| #if defined(__x86_64__)
 | |
|   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
 | |
|   // FIXME: should we save this for Clang?
 | |
|   __asm__("movq\t%%rbx, %%rsi\n\t"
 | |
|           "cpuid\n\t"
 | |
|           "xchgq\t%%rbx, %%rsi\n\t"
 | |
|           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
 | |
|           : "a"(value));
 | |
|   return false;
 | |
| #elif defined(__i386__)
 | |
|   __asm__("movl\t%%ebx, %%esi\n\t"
 | |
|           "cpuid\n\t"
 | |
|           "xchgl\t%%ebx, %%esi\n\t"
 | |
|           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
 | |
|           : "a"(value));
 | |
|   return false;
 | |
| #else
 | |
|   return true;
 | |
| #endif
 | |
| #elif defined(_MSC_VER)
 | |
|   // The MSVC intrinsic is portable across x86 and x64.
 | |
|   int registers[4];
 | |
|   __cpuid(registers, value);
 | |
|   *rEAX = registers[0];
 | |
|   *rEBX = registers[1];
 | |
|   *rECX = registers[2];
 | |
|   *rEDX = registers[3];
 | |
|   return false;
 | |
| #else
 | |
|   return true;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
 | |
| /// the 4 values in the specified arguments.  If we can't run cpuid on the host,
 | |
| /// return true.
 | |
| static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
 | |
|                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
 | |
|                                  unsigned *rEDX) {
 | |
| #if defined(__x86_64__) || defined(_M_X64)
 | |
| #if defined(__GNUC__) || defined(__clang__)
 | |
|   // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
 | |
|   // FIXME: should we save this for Clang?
 | |
|   __asm__("movq\t%%rbx, %%rsi\n\t"
 | |
|           "cpuid\n\t"
 | |
|           "xchgq\t%%rbx, %%rsi\n\t"
 | |
|           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
 | |
|           : "a"(value), "c"(subleaf));
 | |
|   return false;
 | |
| #elif defined(_MSC_VER)
 | |
|   int registers[4];
 | |
|   __cpuidex(registers, value, subleaf);
 | |
|   *rEAX = registers[0];
 | |
|   *rEBX = registers[1];
 | |
|   *rECX = registers[2];
 | |
|   *rEDX = registers[3];
 | |
|   return false;
 | |
| #else
 | |
|   return true;
 | |
| #endif
 | |
| #elif defined(__i386__) || defined(_M_IX86)
 | |
| #if defined(__GNUC__) || defined(__clang__)
 | |
|   __asm__("movl\t%%ebx, %%esi\n\t"
 | |
|           "cpuid\n\t"
 | |
|           "xchgl\t%%ebx, %%esi\n\t"
 | |
|           : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
 | |
|           : "a"(value), "c"(subleaf));
 | |
|   return false;
 | |
| #elif defined(_MSC_VER)
 | |
|   __asm {
 | |
|       mov   eax,value
 | |
|       mov   ecx,subleaf
 | |
|       cpuid
 | |
|       mov   esi,rEAX
 | |
|       mov   dword ptr [esi],eax
 | |
|       mov   esi,rEBX
 | |
|       mov   dword ptr [esi],ebx
 | |
|       mov   esi,rECX
 | |
|       mov   dword ptr [esi],ecx
 | |
|       mov   esi,rEDX
 | |
|       mov   dword ptr [esi],edx
 | |
|   }
 | |
|   return false;
 | |
| #else
 | |
|   return true;
 | |
| #endif
 | |
| #else
 | |
|   return true;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| // Read control register 0 (XCR0). Used to detect features such as AVX.
 | |
| static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
 | |
| #if defined(__GNUC__) || defined(__clang__)
 | |
|   // Check xgetbv; this uses a .byte sequence instead of the instruction
 | |
|   // directly because older assemblers do not include support for xgetbv and
 | |
|   // there is no easy way to conditionally compile based on the assembler used.
 | |
|   __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
 | |
|   return false;
 | |
| #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
 | |
|   unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
 | |
|   *rEAX = Result;
 | |
|   *rEDX = Result >> 32;
 | |
|   return false;
 | |
| #else
 | |
|   return true;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
 | |
|                                  unsigned *Model) {
 | |
|   *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
 | |
|   *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
 | |
|   if (*Family == 6 || *Family == 0xf) {
 | |
|     if (*Family == 0xf)
 | |
|       // Examine extended family ID if family ID is F.
 | |
|       *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
 | |
|     // Examine extended model ID if family ID is 6 or F.
 | |
|     *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void
 | |
| getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
 | |
|                                 unsigned Brand_id, unsigned Features,
 | |
|                                 unsigned *Type, unsigned *Subtype) {
 | |
|   if (Brand_id != 0)
 | |
|     return;
 | |
|   switch (Family) {
 | |
|   case 6:
 | |
|     switch (Model) {
 | |
|     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
 | |
|                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
 | |
|                // mobile processor, Intel Core 2 Extreme processor, Intel
 | |
|                // Pentium Dual-Core processor, Intel Xeon processor, model
 | |
|                // 0Fh. All processors are manufactured using the 65 nm process.
 | |
|     case 0x16: // Intel Celeron processor model 16h. All processors are
 | |
|                // manufactured using the 65 nm process
 | |
|     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
 | |
|                // 17h. All processors are manufactured using the 45 nm process.
 | |
|                //
 | |
|                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
 | |
|     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
 | |
|                // the 45 nm process.
 | |
|       *Type = INTEL_CORE2; // "penryn"
 | |
|       break;
 | |
|     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
 | |
|                // processors are manufactured using the 45 nm process.
 | |
|     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
 | |
|                // As found in a Summer 2010 model iMac.
 | |
|     case 0x1f:
 | |
|     case 0x2e:             // Nehalem EX
 | |
|       *Type = INTEL_COREI7; // "nehalem"
 | |
|       *Subtype = INTEL_COREI7_NEHALEM;
 | |
|       break;
 | |
|     case 0x25: // Intel Core i7, laptop version.
 | |
|     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
 | |
|                // processors are manufactured using the 32 nm process.
 | |
|     case 0x2f: // Westmere EX
 | |
|       *Type = INTEL_COREI7; // "westmere"
 | |
|       *Subtype = INTEL_COREI7_WESTMERE;
 | |
|       break;
 | |
|     case 0x2a: // Intel Core i7 processor. All processors are manufactured
 | |
|                // using the 32 nm process.
 | |
|     case 0x2d:
 | |
|       *Type = INTEL_COREI7; //"sandybridge"
 | |
|       *Subtype = INTEL_COREI7_SANDYBRIDGE;
 | |
|       break;
 | |
|     case 0x3a:
 | |
|     case 0x3e:             // Ivy Bridge EP
 | |
|       *Type = INTEL_COREI7; // "ivybridge"
 | |
|       *Subtype = INTEL_COREI7_IVYBRIDGE;
 | |
|       break;
 | |
| 
 | |
|     // Haswell:
 | |
|     case 0x3c:
 | |
|     case 0x3f:
 | |
|     case 0x45:
 | |
|     case 0x46:
 | |
|       *Type = INTEL_COREI7; // "haswell"
 | |
|       *Subtype = INTEL_COREI7_HASWELL;
 | |
|       break;
 | |
| 
 | |
|     // Broadwell:
 | |
|     case 0x3d:
 | |
|     case 0x47:
 | |
|     case 0x4f:
 | |
|     case 0x56:
 | |
|       *Type = INTEL_COREI7; // "broadwell"
 | |
|       *Subtype = INTEL_COREI7_BROADWELL;
 | |
|       break;
 | |
| 
 | |
|     // Skylake:
 | |
|     case 0x4e: // Skylake mobile
 | |
|     case 0x5e: // Skylake desktop
 | |
|     case 0x8e: // Kaby Lake mobile
 | |
|     case 0x9e: // Kaby Lake desktop
 | |
|       *Type = INTEL_COREI7; // "skylake"
 | |
|       *Subtype = INTEL_COREI7_SKYLAKE;
 | |
|       break;
 | |
| 
 | |
|     // Skylake Xeon:
 | |
|     case 0x55:
 | |
|       *Type = INTEL_COREI7;
 | |
|       *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
 | |
|       break;
 | |
| 
 | |
|     case 0x1c: // Most 45 nm Intel Atom processors
 | |
|     case 0x26: // 45 nm Atom Lincroft
 | |
|     case 0x27: // 32 nm Atom Medfield
 | |
|     case 0x35: // 32 nm Atom Midview
 | |
|     case 0x36: // 32 nm Atom Midview
 | |
|       *Type = INTEL_BONNELL;
 | |
|       break; // "bonnell"
 | |
| 
 | |
|     // Atom Silvermont codes from the Intel software optimization guide.
 | |
|     case 0x37:
 | |
|     case 0x4a:
 | |
|     case 0x4d:
 | |
|     case 0x5a:
 | |
|     case 0x5d:
 | |
|     case 0x4c: // really airmont
 | |
|       *Type = INTEL_SILVERMONT;
 | |
|       break; // "silvermont"
 | |
| 
 | |
|     case 0x57:
 | |
|       *Type = INTEL_KNL; // knl
 | |
|       break;
 | |
| 
 | |
|     default: // Unknown family 6 CPU.
 | |
|       break;
 | |
|     break;
 | |
|     }
 | |
|   default:
 | |
|     break; // Unknown.
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
 | |
|                                           unsigned Features, unsigned *Type,
 | |
|                                           unsigned *Subtype) {
 | |
|   // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
 | |
|   // appears to be no way to generate the wide variety of AMD-specific targets
 | |
|   // from the information returned from CPUID.
 | |
|   switch (Family) {
 | |
|   case 16:
 | |
|     *Type = AMDFAM10H; // "amdfam10"
 | |
|     switch (Model) {
 | |
|     case 2:
 | |
|       *Subtype = AMDFAM10H_BARCELONA;
 | |
|       break;
 | |
|     case 4:
 | |
|       *Subtype = AMDFAM10H_SHANGHAI;
 | |
|       break;
 | |
|     case 8:
 | |
|       *Subtype = AMDFAM10H_ISTANBUL;
 | |
|       break;
 | |
|     }
 | |
|     break;
 | |
|   case 20:
 | |
|     *Type = AMD_BTVER1;
 | |
|     break; // "btver1";
 | |
|   case 21:
 | |
|     *Type = AMDFAM15H;
 | |
|     if (Model >= 0x60 && Model <= 0x7f) {
 | |
|       *Subtype = AMDFAM15H_BDVER4;
 | |
|       break; // "bdver4"; 60h-7Fh: Excavator
 | |
|     }
 | |
|     if (Model >= 0x30 && Model <= 0x3f) {
 | |
|       *Subtype = AMDFAM15H_BDVER3;
 | |
|       break; // "bdver3"; 30h-3Fh: Steamroller
 | |
|     }
 | |
|     if (Model >= 0x10 && Model <= 0x1f) {
 | |
|       *Subtype = AMDFAM15H_BDVER2;
 | |
|       break; // "bdver2"; 10h-1Fh: Piledriver
 | |
|     }
 | |
|     if (Model <= 0x0f) {
 | |
|       *Subtype = AMDFAM15H_BDVER1;
 | |
|       break; // "bdver1"; 00h-0Fh: Bulldozer
 | |
|     }
 | |
|     break;
 | |
|   case 22:
 | |
|     *Type = AMD_BTVER2;
 | |
|     break; // "btver2"
 | |
|   case 23:
 | |
|     *Type = AMDFAM17H;
 | |
|     *Subtype = AMDFAM17H_ZNVER1;
 | |
|     break;
 | |
|   default:
 | |
|     break; // "generic"
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
 | |
|                                  unsigned *FeaturesOut) {
 | |
|   unsigned Features = 0;
 | |
|   unsigned EAX, EBX;
 | |
| 
 | |
|   if ((EDX >> 15) & 1)
 | |
|     Features |= 1 << FEATURE_CMOV;
 | |
|   if ((EDX >> 23) & 1)
 | |
|     Features |= 1 << FEATURE_MMX;
 | |
|   if ((EDX >> 25) & 1)
 | |
|     Features |= 1 << FEATURE_SSE;
 | |
|   if ((EDX >> 26) & 1)
 | |
|     Features |= 1 << FEATURE_SSE2;
 | |
| 
 | |
|   if ((ECX >> 0) & 1)
 | |
|     Features |= 1 << FEATURE_SSE3;
 | |
|   if ((ECX >> 1) & 1)
 | |
|     Features |= 1 << FEATURE_PCLMUL;
 | |
|   if ((ECX >> 9) & 1)
 | |
|     Features |= 1 << FEATURE_SSSE3;
 | |
|   if ((ECX >> 12) & 1)
 | |
|     Features |= 1 << FEATURE_FMA;
 | |
|   if ((ECX >> 19) & 1)
 | |
|     Features |= 1 << FEATURE_SSE4_1;
 | |
|   if ((ECX >> 20) & 1)
 | |
|     Features |= 1 << FEATURE_SSE4_2;
 | |
|   if ((ECX >> 23) & 1)
 | |
|     Features |= 1 << FEATURE_POPCNT;
 | |
|   if ((ECX >> 25) & 1)
 | |
|     Features |= 1 << FEATURE_AES;
 | |
| 
 | |
|   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
 | |
|   // indicates that the AVX registers will be saved and restored on context
 | |
|   // switch, then we have full AVX support.
 | |
|   const unsigned AVXBits = (1 << 27) | (1 << 28);
 | |
|   bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
 | |
|                 ((EAX & 0x6) == 0x6);
 | |
|   bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
 | |
| 
 | |
|   if (HasAVX)
 | |
|     Features |= 1 << FEATURE_AVX;
 | |
| 
 | |
|   bool HasLeaf7 =
 | |
|       MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
 | |
| 
 | |
|   if (HasLeaf7 && ((EBX >> 3) & 1))
 | |
|     Features |= 1 << FEATURE_BMI;
 | |
|   if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
 | |
|     Features |= 1 << FEATURE_AVX2;
 | |
|   if (HasLeaf7 && ((EBX >> 9) & 1))
 | |
|     Features |= 1 << FEATURE_BMI2;
 | |
|   if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512F;
 | |
|   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512DQ;
 | |
|   if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512IFMA;
 | |
|   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512PF;
 | |
|   if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512ER;
 | |
|   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512CD;
 | |
|   if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512BW;
 | |
|   if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512VL;
 | |
| 
 | |
|   if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512VBMI;
 | |
|   if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
 | |
| 
 | |
|   if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX5124VNNIW;
 | |
|   if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
 | |
|     Features |= 1 << FEATURE_AVX5124FMAPS;
 | |
| 
 | |
|   unsigned MaxExtLevel;
 | |
|   getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
 | |
| 
 | |
|   bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
 | |
|                      !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
 | |
|   if (HasExtLeaf1 && ((ECX >> 6) & 1))
 | |
|     Features |= 1 << FEATURE_SSE4_A;
 | |
|   if (HasExtLeaf1 && ((ECX >> 11) & 1))
 | |
|     Features |= 1 << FEATURE_XOP;
 | |
|   if (HasExtLeaf1 && ((ECX >> 16) & 1))
 | |
|     Features |= 1 << FEATURE_FMA4;
 | |
| 
 | |
|   *FeaturesOut = Features;
 | |
| }
 | |
| 
 | |
| #if defined(HAVE_INIT_PRIORITY)
 | |
| #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
 | |
| #elif __has_attribute(__constructor__)
 | |
| #define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
 | |
| #else
 | |
| // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
 | |
| // this runs during initialization.
 | |
| #define CONSTRUCTOR_ATTRIBUTE
 | |
| #endif
 | |
| 
 | |
| int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
 | |
| 
 | |
| struct __processor_model {
 | |
|   unsigned int __cpu_vendor;
 | |
|   unsigned int __cpu_type;
 | |
|   unsigned int __cpu_subtype;
 | |
|   unsigned int __cpu_features[1];
 | |
| } __cpu_model = {0, 0, 0, {0}};
 | |
| 
 | |
| /* A constructor function that is sets __cpu_model and __cpu_features with
 | |
|    the right values.  This needs to run only once.  This constructor is
 | |
|    given the highest priority and it should run before constructors without
 | |
|    the priority set.  However, it still runs after ifunc initializers and
 | |
|    needs to be called explicitly there.  */
 | |
| 
 | |
| int CONSTRUCTOR_ATTRIBUTE
 | |
| __cpu_indicator_init(void) {
 | |
|   unsigned EAX, EBX, ECX, EDX;
 | |
|   unsigned MaxLeaf = 5;
 | |
|   unsigned Vendor;
 | |
|   unsigned Model, Family, Brand_id;
 | |
|   unsigned Features = 0;
 | |
| 
 | |
|   /* This function needs to run just once.  */
 | |
|   if (__cpu_model.__cpu_vendor)
 | |
|     return 0;
 | |
| 
 | |
|   if (!isCpuIdSupported())
 | |
|     return -1;
 | |
| 
 | |
|   /* Assume cpuid insn present. Run in level 0 to get vendor id. */
 | |
|   if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
 | |
|     __cpu_model.__cpu_vendor = VENDOR_OTHER;
 | |
|     return -1;
 | |
|   }
 | |
|   getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
 | |
|   detectX86FamilyModel(EAX, &Family, &Model);
 | |
|   Brand_id = EBX & 0xff;
 | |
| 
 | |
|   /* Find available features. */
 | |
|   getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
 | |
|   __cpu_model.__cpu_features[0] = Features;
 | |
| 
 | |
|   if (Vendor == SIG_INTEL) {
 | |
|     /* Get CPU type.  */
 | |
|     getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
 | |
|                                     &(__cpu_model.__cpu_type),
 | |
|                                     &(__cpu_model.__cpu_subtype));
 | |
|     __cpu_model.__cpu_vendor = VENDOR_INTEL;
 | |
|   } else if (Vendor == SIG_AMD) {
 | |
|     /* Get CPU type.  */
 | |
|     getAMDProcessorTypeAndSubtype(Family, Model, Features,
 | |
|                                   &(__cpu_model.__cpu_type),
 | |
|                                   &(__cpu_model.__cpu_subtype));
 | |
|     __cpu_model.__cpu_vendor = VENDOR_AMD;
 | |
|   } else
 | |
|     __cpu_model.__cpu_vendor = VENDOR_OTHER;
 | |
| 
 | |
|   assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
 | |
|   assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
 | |
|   assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
 | |
| 
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| #endif
 |