1
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
3
// Package cpuid provides information about the CPU running the current program.
5
// CPU features are detected on startup, and kept for fast access through the life of the application.
6
// Currently x86 / x64 (AMD64) as well as arm64 is supported.
8
// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
10
// Package home: https://github.com/klauspost/cpuid
23
// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
24
// and Processor Programming Reference (PPR)
26
// Vendor is a representation of a CPU vendor.
30
VendorUnknown Vendor = iota
36
KVM // Kernel-based Virtual Machine
37
MSVM // Microsoft Hyper-V or Windows Virtual PC
61
//go:generate stringer -type=FeatureID,Vendor
63
// FeatureID is the ID of a specific cpu feature.
67
// Keep index -1 as unknown
71
ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
72
AESNI // Advanced Encryption Standard New Instructions
74
AMD3DNOWEXT // AMD 3DNowExt
75
AMXBF16 // Tile computational operations on BFLOAT16 numbers
76
AMXINT8 // Tile computational operations on 8-bit integers
77
AMXTILE // Tile architecture
79
AVX2 // AVX2 functions
80
AVX512BF16 // AVX-512 BFLOAT16 Instructions
81
AVX512BITALG // AVX-512 Bit Algorithms
82
AVX512BW // AVX-512 Byte and Word Instructions
83
AVX512CD // AVX-512 Conflict Detection Instructions
84
AVX512DQ // AVX-512 Doubleword and Quadword Instructions
85
AVX512ER // AVX-512 Exponential and Reciprocal Instructions
86
AVX512F // AVX-512 Foundation
87
AVX512FP16 // AVX-512 FP16 Instructions
88
AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
89
AVX512PF // AVX-512 Prefetch Instructions
90
AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
91
AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
92
AVX512VL // AVX-512 Vector Length Extensions
93
AVX512VNNI // AVX-512 Vector Neural Network Instructions
94
AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
95
AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
96
AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
97
AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
98
BMI1 // Bit Manipulation Instruction Set 1
99
BMI2 // Bit Manipulation Instruction Set 2
100
CETIBT // Intel CET Indirect Branch Tracking
101
CETSS // Intel CET Shadow Stack
102
CLDEMOTE // Cache Line Demote
103
CLMUL // Carry-less Multiplication
104
CLZERO // CLZERO instruction supported
106
CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
107
CMPXCHG8 // CMPXCHG8 instruction
108
CPBOOST // Core Performance Boost
109
CX16 // CMPXCHG16B Instruction
110
ENQCMD // Enqueue Command
111
ERMS // Enhanced REP MOVSB/STOSB
112
F16C // Half-precision floating-point conversion
113
FMA3 // Intel FMA 3. Does not imply AVX.
114
FMA4 // Bulldozer FMA4 functions
115
FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
116
FXSROPT // FXSAVE/FXRSTOR optimizations
117
GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
118
HLE // Hardware Lock Elision
119
HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
120
HTT // Hyperthreading (enabled)
121
HWA // Hardware assert supported. Indicates support for MSRC001_10
122
HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
123
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
124
IBS // Instruction Based Sampling (AMD)
125
IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
126
IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
127
IBSFFV // Instruction Based Sampling Feature (AMD)
128
IBSOPCNT // Instruction Based Sampling Feature (AMD)
129
IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
130
IBSOPSAM // Instruction Based Sampling Feature (AMD)
131
IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
132
IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
133
IBS_PREVENTHOST // Disallowing IBS use by the host supported
134
INT_WBINVD // WBINVD/WBNOINVD are interruptible.
135
INVLPGB // NVLPGB and TLBSYNC instruction supported
136
LAHF // LAHF/SAHF in long mode
137
LAM // If set, CPU supports Linear Address Masking
138
LBRVIRT // LBR virtualization
139
LZCNT // LZCNT instruction
140
MCAOVERFLOW // MCA overflow recovery support.
141
MCOMMIT // MCOMMIT instruction supported
143
MMXEXT // SSE integer functions or AMD MMX ext
144
MOVBE // MOVBE instruction (big-endian)
145
MOVDIR64B // Move 64 Bytes as Direct Store
146
MOVDIRI // Move Doubleword as Direct Store
147
MOVSB_ZL // Fast Zero-Length MOVSB
148
MPX // Intel MPX (Memory Protection Extensions)
149
MSRIRC // Instruction Retired Counter MSR available
150
MSR_PAGEFLUSH // Page Flush MSR available
151
NRIPS // Indicates support for NRIP save on VMEXIT
152
NX // NX (No-Execute) bit
153
OSXSAVE // XSAVE enabled by OS
154
PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
155
POPCNT // POPCNT instruction
156
RDPRU // RDPRU instruction supported
157
RDRAND // RDRAND instruction is available
158
RDSEED // RDSEED instruction is available
159
RDTSCP // RDTSCP Instruction
160
RTM // Restricted Transactional Memory
161
RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
162
SERIALIZE // Serialize Instruction Execution
163
SEV // AMD Secure Encrypted Virtualization supported
164
SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
165
SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
166
SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
167
SEV_ES // AMD SEV Encrypted State supported
168
SEV_RESTRICTED // AMD SEV Restricted Injection supported
169
SEV_SNP // AMD SEV Secure Nested Paging supported
170
SGX // Software Guard Extensions
171
SGXLC // Software Guard Extensions Launch Control
172
SHA // Intel SHA Extensions
173
SME // AMD Secure Memory Encryption supported
174
SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
176
SSE2 // P4 SSE functions
177
SSE3 // Prescott SSE3 functions
178
SSE4 // Penryn SSE4.1 functions
179
SSE42 // Nehalem SSE4.2 functions
180
SSE4A // AMD Barcelona microarchitecture SSE4a instructions
181
SSSE3 // Conroe SSSE3 functions
182
STIBP // Single Thread Indirect Branch Predictors
183
STOSB_SHORT // Fast short STOSB
184
SUCCOR // Software uncorrectable error containment and recovery capability.
185
SVM // AMD Secure Virtual Machine
186
SVMDA // Indicates support for the SVM decode assists.
187
SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
188
SVML // AMD SVM lock. Indicates support for SVM-Lock.
189
SVMNP // AMD SVM nested paging
190
SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
191
SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
192
SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
193
SYSEE // SYSENTER and SYSEXIT instructions
194
TBM // AMD Trailing Bit Manipulation
195
TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
196
TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
197
TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
198
TSXLDTRK // Intel TSX Suspend Load Address Tracking
199
VAES // Vector AES. AVX(512) versions requires additional checks.
200
VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
201
VMPL // AMD VM Permission Levels supported
202
VMSA_REGPROT // AMD VMSA Register Protection supported
203
VMX // Virtual Machine Extensions
204
VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
205
VTE // AMD Virtual Transparent Encryption supported
206
WAITPKG // TPAUSE, UMONITOR, UMWAIT
207
WBNOINVD // Write Back and Do Not Invalidate Cache
209
XGETBV1 // Supports XGETBV with ECX = 1
210
XOP // Bulldozer XOP functions
211
XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
212
XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
213
XSAVEOPT // XSAVEOPT available
214
XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
217
AESARM // AES instructions
218
ARMCPUID // Some CPU ID registers readable at user-level
219
ASIMD // Advanced SIMD
220
ASIMDDP // SIMD Dot Product
221
ASIMDHP // Advanced SIMD half-precision floating point
222
ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
223
ATOMICS // Large System Extensions (LSE)
224
CRC32 // CRC32/CRC32C instructions
225
DCPOP // Data cache clean to Point of Persistence (DC CVAP)
226
EVTSTRM // Generic timer
227
FCMA // Floatin point complex number addition and multiplication
228
FP // Single-precision and double-precision floating point
229
FPHP // Half-precision floating point
230
GPA // Generic Pointer Authentication
231
JSCVT // Javascript-style double->int convert (FJCVTZS)
232
LRCPC // Weaker release consistency (LDAPR, etc)
233
PMULL // Polynomial Multiply instructions (PMULL/PMULL2)
234
SHA1 // SHA-1 instructions (SHA1C, etc)
235
SHA2 // SHA-2 instructions (SHA256H, etc)
236
SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
237
SHA512 // SHA512 instructions
238
SM3 // SM3 instructions
239
SM4 // SM4 instructions
240
SVE // Scalable Vector Extension
241
// Keep it last. It automatically defines the size of []flagSet
244
firstID FeatureID = UNKNOWN + 1
247
// CPUInfo contains information about the detected system CPU.
249
BrandName string // Brand name reported by the CPU
250
VendorID Vendor // Comparable CPU vendor ID
251
VendorString string // Raw vendor string.
252
featureSet flagSet // Features of the CPU
253
PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
254
ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
255
LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
256
Family int // CPU family number
257
Model int // CPU model number
258
Stepping int // CPU stepping info
259
CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
260
Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
261
BoostFreq int64 // Max clock speed, if known, 0 otherwise
263
L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
264
L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
265
L2 int // L2 Cache (per core or shared). Will be -1 if undetected
266
L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
273
var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
274
var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
275
var xgetbv func(index uint32) (eax, edx uint32)
276
var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
277
var darwinHasAVX512 = func() bool { return false }
279
// CPU contains information about the CPU as detected on startup,
280
// or when Detect last was called.
282
// Use this as the primary entry point to you data.
290
// Detect will re-detect current CPU info.
291
// This will replace the content of the exported CPU variable.
293
// Unless you expect the CPU to change while you are running your program
294
// you should not need to call this function.
295
// If you call this, you must ensure that no other goroutine is accessing the
296
// exported CPU variable.
299
CPU.ThreadsPerCore = 1
305
if detectArmFlag != nil {
306
safe = !*detectArmFlag
309
if displayFeats != nil && *displayFeats {
310
fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
311
// Exit with non-zero so tests will print value.
314
if disableFlag != nil {
315
s := strings.Split(*disableFlag, ",")
316
for _, feat := range s {
317
feat := ParseFeature(strings.TrimSpace(feat))
319
CPU.featureSet.unset(feat)
325
// DetectARM will detect ARM64 features.
326
// This is NOT done automatically since it can potentially crash
327
// if the OS does not handle the command.
328
// If in the future this can be done safely this function may not
334
var detectArmFlag *bool
335
var displayFeats *bool
336
var disableFlag *string
338
// Flags will enable flags.
339
// This must be called *before* flag.Parse AND
340
// Detect must be called after the flags have been parsed.
341
// Note that this means that any detection used in init() functions
342
// will not contain these flags.
344
disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
345
displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
346
detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
349
// Supports returns whether the CPU supports all of the requested features.
350
func (c CPUInfo) Supports(ids ...FeatureID) bool {
351
for _, id := range ids {
352
if !c.featureSet.inSet(id) {
359
// Has allows for checking a single feature.
360
// Should be inlined by the compiler.
361
func (c CPUInfo) Has(id FeatureID) bool {
362
return c.featureSet.inSet(id)
365
// AnyOf returns whether the CPU supports one or more of the requested features.
366
func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
367
for _, id := range ids {
368
if c.featureSet.inSet(id) {
375
// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
376
var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
377
var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
378
var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
379
var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
381
// X64Level returns the microarchitecture level detected on the CPU.
382
// If features are lacking or non x64 mode, 0 is returned.
383
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
384
func (c CPUInfo) X64Level() int {
385
if c.featureSet.hasSet(level4Features) {
388
if c.featureSet.hasSet(level3Features) {
391
if c.featureSet.hasSet(level2Features) {
394
if c.featureSet.hasSet(level1Features) {
400
// Disable will disable one or several features.
401
func (c *CPUInfo) Disable(ids ...FeatureID) bool {
402
for _, id := range ids {
403
c.featureSet.unset(id)
408
// Enable will disable one or several features even if they were undetected.
409
// This is of course not recommended for obvious reasons.
410
func (c *CPUInfo) Enable(ids ...FeatureID) bool {
411
for _, id := range ids {
417
// IsVendor returns true if vendor is recognized as Intel
418
func (c CPUInfo) IsVendor(v Vendor) bool {
419
return c.VendorID == v
422
// FeatureSet returns all available features as strings.
423
func (c CPUInfo) FeatureSet() []string {
424
s := make([]string, 0, c.featureSet.nEnabled())
425
s = append(s, c.featureSet.Strings()...)
429
// RTCounter returns the 64-bit time-stamp counter
430
// Uses the RDTSCP instruction. The value 0 is returned
431
// if the CPU does not support the instruction.
432
func (c CPUInfo) RTCounter() uint64 {
433
if !c.Supports(RDTSCP) {
436
a, _, _, d := rdtscpAsm()
437
return uint64(a) | (uint64(d) << 32)
440
// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
441
// This variable is OS dependent, but on Linux contains information
442
// about the current cpu/core the code is running on.
443
// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
444
func (c CPUInfo) Ia32TscAux() uint32 {
445
if !c.Supports(RDTSCP) {
448
_, _, ecx, _ := rdtscpAsm()
452
// LogicalCPU will return the Logical CPU the code is currently executing on.
453
// This is likely to change when the OS re-schedules the running thread
455
// If the current core cannot be detected, -1 will be returned.
456
func (c CPUInfo) LogicalCPU() int {
460
_, ebx, _, _ := cpuid(1)
461
return int(ebx >> 24)
464
// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
465
// supported, use it, otherwise parse the brand string. Yes, really.
466
func (c *CPUInfo) frequencies() {
467
c.Hz, c.BoostFreq = 0, 0
468
mfi := maxFunctionID()
470
eax, ebx, ecx, _ := cpuid(0x15)
471
if eax != 0 && ebx != 0 && ecx != 0 {
472
c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
476
a, b, _, _ := cpuid(0x16)
479
c.Hz = int64(a&0xffff) * 1_000_000
483
c.BoostFreq = int64(b&0xffff) * 1_000_000
490
// computeHz determines the official rated speed of a CPU from its brand
491
// string. This insanity is *actually the official documented way to do
492
// this according to Intel*, prior to leaf 0x15 existing. The official
493
// documentation only shows this working for exactly `x.xx` or `xxxx`
494
// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
497
hz := strings.LastIndex(model, "Hz")
504
multiplier = 1000 * 1000
506
multiplier = 1000 * 1000 * 1000
508
multiplier = 1000 * 1000 * 1000 * 1000
515
decimalShift := int64(1)
517
for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
518
if model[i] >= '0' && model[i] <= '9' {
519
freq += int64(model[i]-'0') * decimalShift
521
} else if model[i] == '.' {
525
divisor = decimalShift
530
// we didn't find a space
535
c.Hz = (freq * multiplier) / divisor
538
c.Hz = freq * multiplier
541
// VM Will return true if the cpu id indicates we are in
543
func (c CPUInfo) VM() bool {
544
return CPU.featureSet.inSet(HYPERVISOR)
547
// flags contains detected cpu features and characteristics
550
// log2(bits_in_uint64)
551
const flagBitsLog2 = 6
552
const flagBits = 1 << flagBitsLog2
553
const flagMask = flagBits - 1
555
// flagSet contains detected cpu features and characteristics in an array of flags
556
type flagSet [(lastID + flagMask) / flagBits]flags
558
func (s flagSet) inSet(feat FeatureID) bool {
559
return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
562
func (s *flagSet) set(feat FeatureID) {
563
s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
566
// setIf will set a feature if boolean is true.
567
func (s *flagSet) setIf(cond bool, features ...FeatureID) {
569
for _, offset := range features {
570
s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
575
func (s *flagSet) unset(offset FeatureID) {
576
bit := flags(1 << (offset & flagMask))
577
s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
580
// or with another flagset.
581
func (s *flagSet) or(other flagSet) {
582
for i, v := range other[:] {
587
// hasSet returns whether all features are present.
588
func (s flagSet) hasSet(other flagSet) bool {
589
for i, v := range other[:] {
597
// nEnabled will return the number of enabled flags.
598
func (s flagSet) nEnabled() (n int) {
599
for _, v := range s[:] {
600
n += bits.OnesCount64(uint64(v))
605
func flagSetWith(feat ...FeatureID) flagSet {
607
for _, f := range feat {
613
// ParseFeature will parse the string and return the ID of the matching feature.
614
// Will return UNKNOWN if not found.
615
func ParseFeature(s string) FeatureID {
616
s = strings.ToUpper(s)
617
for i := firstID; i < lastID; i++ {
625
// Strings returns an array of the detected features for FlagsSet.
626
func (s flagSet) Strings() []string {
630
r := make([]string, 0)
631
for i := firstID; i < lastID; i++ {
633
r = append(r, i.String())
639
func maxExtendedFunction() uint32 {
640
eax, _, _, _ := cpuid(0x80000000)
644
func maxFunctionID() uint32 {
645
a, _, _, _ := cpuid(0)
649
func brandName() string {
650
if maxExtendedFunction() >= 0x80000004 {
651
v := make([]uint32, 0, 48)
652
for i := uint32(0); i < 3; i++ {
653
a, b, c, d := cpuid(0x80000002 + i)
654
v = append(v, a, b, c, d)
656
return strings.Trim(string(valAsString(v...)), " ")
661
func threadsPerCore() int {
662
mfi := maxFunctionID()
663
vend, _ := vendorID()
665
if mfi < 0x4 || (vend != Intel && vend != AMD) {
673
_, b, _, d := cpuid(1)
674
if (d & (1 << 28)) != 0 {
675
// v will contain logical core count
678
a4, _, _, _ := cpuid(4)
682
return int(v) / int(v2)
688
_, b, _, _ := cpuidex(0xb, 0)
691
// Workaround for AMD returning 0, assume 2 if >= Zen 2
692
// It will be more correct than not.
693
fam, _, _ := familyModel()
694
_, _, _, d := cpuid(1)
695
if (d&(1<<28)) != 0 && fam >= 23 {
701
return int(b & 0xffff)
704
func logicalCores() int {
705
mfi := maxFunctionID()
709
// Use this on old Intel processors
714
// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
715
// that can be assigned to logical processors in a physical package.
716
// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
717
_, ebx, _, _ := cpuid(1)
718
logical := (ebx >> 16) & 0xff
721
_, b, _, _ := cpuidex(0xb, 1)
722
return int(b & 0xffff)
724
_, b, _, _ := cpuid(1)
725
return int((b >> 16) & 0xff)
731
func familyModel() (family, model, stepping int) {
732
if maxFunctionID() < 0x1 {
735
eax, _, _, _ := cpuid(1)
736
// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
737
family = int((eax >> 8) & 0xf)
738
extFam := family == 0x6 // Intel is 0x6, needs extended model.
741
family += int((eax >> 20) & 0xff)
744
// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
745
model = int((eax >> 4) & 0xf)
748
model += int((eax >> 12) & 0xf0)
750
stepping = int(eax & 0xf)
751
return family, model, stepping
754
func physicalCores() int {
758
return logicalCores() / threadsPerCore()
761
tpc := threadsPerCore()
762
if lc > 0 && tpc > 0 {
766
// The following is inaccurate on AMD EPYC 7742 64-Core Processor
767
if maxExtendedFunction() >= 0x80000008 {
768
_, _, c, _ := cpuid(0x80000008)
770
return int(c&0xff) + 1
777
// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
778
var vendorMapping = map[string]Vendor{
782
"GenuineIntel": Intel,
783
"TransmetaCPU": Transmeta,
784
"GenuineTMx86": Transmeta,
788
"Microsoft Hv": MSVM,
789
"VMwareVMware": VMware,
790
"XenVMMXenVMM": XenHVM,
791
"bhyve bhyve ": Bhyve,
792
"HygonGenuine": Hygon,
799
func vendorID() (Vendor, string) {
800
_, b, c, d := cpuid(0)
801
v := string(valAsString(b, d, c))
802
vend, ok := vendorMapping[v]
804
return VendorUnknown, v
809
func cacheLine() int {
810
if maxFunctionID() < 0x1 {
814
_, ebx, _, _ := cpuid(1)
815
cache := (ebx & 0xff00) >> 5 // cflush size
816
if cache == 0 && maxExtendedFunction() >= 0x80000006 {
817
_, _, ecx, _ := cpuid(0x80000006)
818
cache = ecx & 0xff // cacheline size
820
// TODO: Read from Cache and TLB Information
824
func (c *CPUInfo) cacheSize() {
829
vendor, _ := vendorID()
832
if maxFunctionID() < 4 {
835
c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
836
for i := uint32(0); ; i++ {
837
eax, ebx, ecx, _ := cpuidex(4, i)
838
cacheType := eax & 15
842
cacheLevel := (eax >> 5) & 7
843
coherency := int(ebx&0xfff) + 1
844
partitions := int((ebx>>12)&0x3ff) + 1
845
associativity := int((ebx>>22)&0x3ff) + 1
847
size := associativity * partitions * coherency * sets
853
} else if cacheType == 2 {
854
// 2 = Instruction Cache
872
if maxExtendedFunction() < 0x80000005 {
875
_, _, ecx, edx := cpuid(0x80000005)
876
c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
877
c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
879
if maxExtendedFunction() < 0x80000006 {
882
_, _, ecx, _ = cpuid(0x80000006)
883
c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
885
// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
886
if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
890
// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
891
// Hack: When we encounter the same entry 100 times we break.
894
for i := uint32(0); i < math.MaxUint32; i++ {
895
eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
897
level := (eax >> 5) & 7
898
cacheNumSets := ecx + 1
899
cacheLineSize := 1 + (ebx & 2047)
900
cachePhysPartitions := 1 + ((ebx >> 12) & 511)
901
cacheNumWays := 1 + ((ebx >> 22) & 511)
904
size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
909
// Check for the same value repeated.
910
comb := eax ^ ebx ^ ecx
945
type SGXEPCSection struct {
950
type SGXSupport struct {
955
MaxEnclaveSizeNot64 int64
956
MaxEnclaveSize64 int64
957
EPCSections []SGXEPCSection
960
func hasSGX(available, lc bool) (rval SGXSupport) {
961
rval.Available = available
967
rval.LaunchControl = lc
969
a, _, _, d := cpuidex(0x12, 0)
970
rval.SGX1Supported = a&0x01 != 0
971
rval.SGX2Supported = a&0x02 != 0
972
rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
973
rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
974
rval.EPCSections = make([]SGXEPCSection, 0)
976
for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
977
eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
978
leafType := eax & 0xf
981
// Invalid subleaf, stop iterating
983
} else if leafType == 1 {
984
// EPC Section subleaf
985
baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
986
size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
988
section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
989
rval.EPCSections = append(rval.EPCSections, section)
996
func support() flagSet {
998
mfi := maxFunctionID()
999
vend, _ := vendorID()
1003
family, model, _ := familyModel()
1005
_, _, c, d := cpuid(1)
1006
fs.setIf((d&(1<<0)) != 0, X87)
1007
fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
1008
fs.setIf((d&(1<<11)) != 0, SYSEE)
1009
fs.setIf((d&(1<<15)) != 0, CMOV)
1010
fs.setIf((d&(1<<23)) != 0, MMX)
1011
fs.setIf((d&(1<<24)) != 0, FXSR)
1012
fs.setIf((d&(1<<25)) != 0, FXSROPT)
1013
fs.setIf((d&(1<<25)) != 0, SSE)
1014
fs.setIf((d&(1<<26)) != 0, SSE2)
1015
fs.setIf((c&1) != 0, SSE3)
1016
fs.setIf((c&(1<<5)) != 0, VMX)
1017
fs.setIf((c&(1<<9)) != 0, SSSE3)
1018
fs.setIf((c&(1<<19)) != 0, SSE4)
1019
fs.setIf((c&(1<<20)) != 0, SSE42)
1020
fs.setIf((c&(1<<25)) != 0, AESNI)
1021
fs.setIf((c&(1<<1)) != 0, CLMUL)
1022
fs.setIf(c&(1<<22) != 0, MOVBE)
1023
fs.setIf(c&(1<<23) != 0, POPCNT)
1024
fs.setIf(c&(1<<30) != 0, RDRAND)
1026
// This bit has been reserved by Intel & AMD for use by hypervisors,
1027
// and indicates the presence of a hypervisor.
1028
fs.setIf(c&(1<<31) != 0, HYPERVISOR)
1029
fs.setIf(c&(1<<29) != 0, F16C)
1030
fs.setIf(c&(1<<13) != 0, CX16)
1032
if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
1033
fs.setIf(threadsPerCore() > 1, HTT)
1035
if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
1036
fs.setIf(threadsPerCore() > 1, HTT)
1038
fs.setIf(c&1<<26 != 0, XSAVE)
1039
fs.setIf(c&1<<27 != 0, OSXSAVE)
1040
// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
1041
const avxCheck = 1<<26 | 1<<27 | 1<<28
1042
if c&avxCheck == avxCheck {
1043
// Check for OS support
1045
if (eax & 0x6) == 0x6 {
1049
// Older than Haswell.
1050
fs.setIf(family == 6 && model < 60, AVXSLOW)
1053
fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
1057
// FMA3 can be used with SSE registers, so no OS support is strictly needed.
1058
// fma3 and OSXSAVE needed.
1059
const fma3Check = 1<<12 | 1<<27
1060
fs.setIf(c&fma3Check == fma3Check, FMA3)
1062
// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
1064
_, ebx, ecx, edx := cpuidex(7, 0)
1065
if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
1068
// CPUID.(EAX=7, ECX=0).EBX
1069
if (ebx & 0x00000008) != 0 {
1071
fs.setIf((ebx&0x00000100) != 0, BMI2)
1073
fs.setIf(ebx&(1<<2) != 0, SGX)
1074
fs.setIf(ebx&(1<<4) != 0, HLE)
1075
fs.setIf(ebx&(1<<9) != 0, ERMS)
1076
fs.setIf(ebx&(1<<11) != 0, RTM)
1077
fs.setIf(ebx&(1<<14) != 0, MPX)
1078
fs.setIf(ebx&(1<<18) != 0, RDSEED)
1079
fs.setIf(ebx&(1<<19) != 0, ADX)
1080
fs.setIf(ebx&(1<<29) != 0, SHA)
1082
// CPUID.(EAX=7, ECX=0).ECX
1083
fs.setIf(ecx&(1<<5) != 0, WAITPKG)
1084
fs.setIf(ecx&(1<<7) != 0, CETSS)
1085
fs.setIf(ecx&(1<<8) != 0, GFNI)
1086
fs.setIf(ecx&(1<<9) != 0, VAES)
1087
fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
1088
fs.setIf(ecx&(1<<13) != 0, TME)
1089
fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
1090
fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
1091
fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
1092
fs.setIf(ecx&(1<<29) != 0, ENQCMD)
1093
fs.setIf(ecx&(1<<30) != 0, SGXLC)
1095
// CPUID.(EAX=7, ECX=0).EDX
1096
fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
1097
fs.setIf(edx&(1<<14) != 0, SERIALIZE)
1098
fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
1099
fs.setIf(edx&(1<<18) != 0, PCONFIG)
1100
fs.setIf(edx&(1<<20) != 0, CETIBT)
1101
fs.setIf(edx&(1<<26) != 0, IBPB)
1102
fs.setIf(edx&(1<<27) != 0, STIBP)
1104
// CPUID.(EAX=7, ECX=1)
1105
eax1, _, _, _ := cpuidex(7, 1)
1106
fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
1107
fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
1108
fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
1109
fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
1110
fs.setIf(eax1&(1<<22) != 0, HRESET)
1111
fs.setIf(eax1&(1<<26) != 0, LAM)
1113
// Only detect AVX-512 features if XGETBV is supported
1114
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
1115
// Check for OS support
1118
// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
1119
// ZMM16-ZMM31 state are enabled by OS)
1120
/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
1121
hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
1122
if runtime.GOOS == "darwin" {
1123
hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
1126
fs.setIf(ebx&(1<<16) != 0, AVX512F)
1127
fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
1128
fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
1129
fs.setIf(ebx&(1<<26) != 0, AVX512PF)
1130
fs.setIf(ebx&(1<<27) != 0, AVX512ER)
1131
fs.setIf(ebx&(1<<28) != 0, AVX512CD)
1132
fs.setIf(ebx&(1<<30) != 0, AVX512BW)
1133
fs.setIf(ebx&(1<<31) != 0, AVX512VL)
1135
fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
1136
fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
1137
fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
1138
fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
1139
fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
1141
fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
1142
fs.setIf(edx&(1<<22) != 0, AMXBF16)
1143
fs.setIf(edx&(1<<23) != 0, AVX512FP16)
1144
fs.setIf(edx&(1<<24) != 0, AMXTILE)
1145
fs.setIf(edx&(1<<25) != 0, AMXINT8)
1146
// eax1 = CPUID.(EAX=7, ECX=1).EAX
1147
fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
1151
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
1153
// Bit 00: XSAVEOPT is available.
1154
// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
1155
// Bit 02: Supports XGETBV with ECX = 1 if set.
1156
// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
1157
// Bits 31 - 04: Reserved.
1159
// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
1161
// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
1163
// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
1165
if fs.inSet(XSAVE) {
1166
eax, _, _, _ := cpuidex(0xd, 1)
1167
fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
1168
fs.setIf(eax&(1<<1) != 0, XSAVEC)
1169
fs.setIf(eax&(1<<2) != 0, XGETBV1)
1170
fs.setIf(eax&(1<<3) != 0, XSAVES)
1173
if maxExtendedFunction() >= 0x80000001 {
1174
_, _, c, d := cpuid(0x80000001)
1175
if (c & (1 << 5)) != 0 {
1180
fs.setIf((c&(1<<0)) != 0, LAHF)
1181
fs.setIf((c&(1<<2)) != 0, SVM)
1182
fs.setIf((c&(1<<6)) != 0, SSE4A)
1183
fs.setIf((c&(1<<10)) != 0, IBS)
1184
fs.setIf((c&(1<<22)) != 0, TOPEXT)
1187
fs.setIf(d&(1<<11) != 0, SYSCALL)
1188
fs.setIf(d&(1<<20) != 0, NX)
1189
fs.setIf(d&(1<<22) != 0, MMXEXT)
1190
fs.setIf(d&(1<<23) != 0, MMX)
1191
fs.setIf(d&(1<<24) != 0, FXSR)
1192
fs.setIf(d&(1<<25) != 0, FXSROPT)
1193
fs.setIf(d&(1<<27) != 0, RDTSCP)
1194
fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
1195
fs.setIf(d&(1<<31) != 0, AMD3DNOW)
1197
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1198
* used unless the OS has AVX support. */
1200
fs.setIf((c&(1<<11)) != 0, XOP)
1201
fs.setIf((c&(1<<16)) != 0, FMA4)
1205
if maxExtendedFunction() >= 0x80000007 {
1206
_, b, _, d := cpuid(0x80000007)
1207
fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
1208
fs.setIf((b&(1<<1)) != 0, SUCCOR)
1209
fs.setIf((b&(1<<2)) != 0, HWA)
1210
fs.setIf((d&(1<<9)) != 0, CPBOOST)
1213
if maxExtendedFunction() >= 0x80000008 {
1214
_, b, _, _ := cpuid(0x80000008)
1215
fs.setIf((b&(1<<9)) != 0, WBNOINVD)
1216
fs.setIf((b&(1<<8)) != 0, MCOMMIT)
1217
fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
1218
fs.setIf((b&(1<<4)) != 0, RDPRU)
1219
fs.setIf((b&(1<<3)) != 0, INVLPGB)
1220
fs.setIf((b&(1<<1)) != 0, MSRIRC)
1221
fs.setIf((b&(1<<0)) != 0, CLZERO)
1224
if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
1225
_, _, _, edx := cpuid(0x8000000A)
1226
fs.setIf((edx>>0)&1 == 1, SVMNP)
1227
fs.setIf((edx>>1)&1 == 1, LBRVIRT)
1228
fs.setIf((edx>>2)&1 == 1, SVML)
1229
fs.setIf((edx>>3)&1 == 1, NRIPS)
1230
fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
1231
fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
1232
fs.setIf((edx>>6)&1 == 1, SVMFBASID)
1233
fs.setIf((edx>>7)&1 == 1, SVMDA)
1234
fs.setIf((edx>>10)&1 == 1, SVMPF)
1235
fs.setIf((edx>>12)&1 == 1, SVMPFT)
1238
if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
1239
eax, _, _, _ := cpuid(0x8000001b)
1240
fs.setIf((eax>>0)&1 == 1, IBSFFV)
1241
fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
1242
fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
1243
fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
1244
fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
1245
fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
1246
fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
1247
fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
1250
if maxExtendedFunction() >= 0x8000001f && vend == AMD {
1251
a, _, _, _ := cpuid(0x8000001f)
1252
fs.setIf((a>>0)&1 == 1, SME)
1253
fs.setIf((a>>1)&1 == 1, SEV)
1254
fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
1255
fs.setIf((a>>3)&1 == 1, SEV_ES)
1256
fs.setIf((a>>4)&1 == 1, SEV_SNP)
1257
fs.setIf((a>>5)&1 == 1, VMPL)
1258
fs.setIf((a>>10)&1 == 1, SME_COHERENT)
1259
fs.setIf((a>>11)&1 == 1, SEV_64BIT)
1260
fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
1261
fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
1262
fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
1263
fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
1264
fs.setIf((a>>16)&1 == 1, VTE)
1265
fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
1271
func valAsString(values ...uint32) []byte {
1272
r := make([]byte, 4*len(values))
1273
for i, v := range values {
1275
dst[0] = byte(v & 0xff)
1276
dst[1] = byte((v >> 8) & 0xff)
1277
dst[2] = byte((v >> 16) & 0xff)
1278
dst[3] = byte((v >> 24) & 0xff)