podman

Форк
0
1514 строк · 51.1 Кб
1
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2

3
// Package cpuid provides information about the CPU running the current program.
4
//
5
// CPU features are detected on startup, and kept for fast access through the life of the application.
6
// Currently x86 / x64 (AMD64) as well as arm64 is supported.
7
//
8
// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9
//
10
// Package home: https://github.com/klauspost/cpuid
11
package cpuid
12

13
import (
14
	"flag"
15
	"fmt"
16
	"math"
17
	"math/bits"
18
	"os"
19
	"runtime"
20
	"strings"
21
)
22

23
// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
24
// and Processor Programming Reference (PPR)
25

26
// Vendor is a representation of a CPU vendor.
27
type Vendor int
28

29
const (
30
	VendorUnknown Vendor = iota
31
	Intel
32
	AMD
33
	VIA
34
	Transmeta
35
	NSC
36
	KVM  // Kernel-based Virtual Machine
37
	MSVM // Microsoft Hyper-V or Windows Virtual PC
38
	VMware
39
	XenHVM
40
	Bhyve
41
	Hygon
42
	SiS
43
	RDC
44

45
	Ampere
46
	ARM
47
	Broadcom
48
	Cavium
49
	DEC
50
	Fujitsu
51
	Infineon
52
	Motorola
53
	NVIDIA
54
	AMCC
55
	Qualcomm
56
	Marvell
57

58
	lastVendor
59
)
60

61
//go:generate stringer -type=FeatureID,Vendor
62

63
// FeatureID is the ID of a specific cpu feature.
64
type FeatureID int
65

66
const (
67
	// Keep index -1 as unknown
68
	UNKNOWN = -1
69

70
	// x86 features
71
	ADX                 FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
72
	AESNI                                // Advanced Encryption Standard New Instructions
73
	AMD3DNOW                             // AMD 3DNOW
74
	AMD3DNOWEXT                          // AMD 3DNowExt
75
	AMXBF16                              // Tile computational operations on BFLOAT16 numbers
76
	AMXFP16                              // Tile computational operations on FP16 numbers
77
	AMXINT8                              // Tile computational operations on 8-bit integers
78
	AMXTILE                              // Tile architecture
79
	APX_F                                // Intel APX
80
	AVX                                  // AVX functions
81
	AVX10                                // If set the Intel AVX10 Converged Vector ISA is supported
82
	AVX10_128                            // If set indicates that AVX10 128-bit vector support is present
83
	AVX10_256                            // If set indicates that AVX10 256-bit vector support is present
84
	AVX10_512                            // If set indicates that AVX10 512-bit vector support is present
85
	AVX2                                 // AVX2 functions
86
	AVX512BF16                           // AVX-512 BFLOAT16 Instructions
87
	AVX512BITALG                         // AVX-512 Bit Algorithms
88
	AVX512BW                             // AVX-512 Byte and Word Instructions
89
	AVX512CD                             // AVX-512 Conflict Detection Instructions
90
	AVX512DQ                             // AVX-512 Doubleword and Quadword Instructions
91
	AVX512ER                             // AVX-512 Exponential and Reciprocal Instructions
92
	AVX512F                              // AVX-512 Foundation
93
	AVX512FP16                           // AVX-512 FP16 Instructions
94
	AVX512IFMA                           // AVX-512 Integer Fused Multiply-Add Instructions
95
	AVX512PF                             // AVX-512 Prefetch Instructions
96
	AVX512VBMI                           // AVX-512 Vector Bit Manipulation Instructions
97
	AVX512VBMI2                          // AVX-512 Vector Bit Manipulation Instructions, Version 2
98
	AVX512VL                             // AVX-512 Vector Length Extensions
99
	AVX512VNNI                           // AVX-512 Vector Neural Network Instructions
100
	AVX512VP2INTERSECT                   // AVX-512 Intersect for D/Q
101
	AVX512VPOPCNTDQ                      // AVX-512 Vector Population Count Doubleword and Quadword
102
	AVXIFMA                              // AVX-IFMA instructions
103
	AVXNECONVERT                         // AVX-NE-CONVERT instructions
104
	AVXSLOW                              // Indicates the CPU performs 2 128 bit operations instead of one
105
	AVXVNNI                              // AVX (VEX encoded) VNNI neural network instructions
106
	AVXVNNIINT8                          // AVX-VNNI-INT8 instructions
107
	BHI_CTRL                             // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
108
	BMI1                                 // Bit Manipulation Instruction Set 1
109
	BMI2                                 // Bit Manipulation Instruction Set 2
110
	CETIBT                               // Intel CET Indirect Branch Tracking
111
	CETSS                                // Intel CET Shadow Stack
112
	CLDEMOTE                             // Cache Line Demote
113
	CLMUL                                // Carry-less Multiplication
114
	CLZERO                               // CLZERO instruction supported
115
	CMOV                                 // i686 CMOV
116
	CMPCCXADD                            // CMPCCXADD instructions
117
	CMPSB_SCADBS_SHORT                   // Fast short CMPSB and SCASB
118
	CMPXCHG8                             // CMPXCHG8 instruction
119
	CPBOOST                              // Core Performance Boost
120
	CPPC                                 // AMD: Collaborative Processor Performance Control
121
	CX16                                 // CMPXCHG16B Instruction
122
	EFER_LMSLE_UNS                       // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
123
	ENQCMD                               // Enqueue Command
124
	ERMS                                 // Enhanced REP MOVSB/STOSB
125
	F16C                                 // Half-precision floating-point conversion
126
	FLUSH_L1D                            // Flush L1D cache
127
	FMA3                                 // Intel FMA 3. Does not imply AVX.
128
	FMA4                                 // Bulldozer FMA4 functions
129
	FP128                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
130
	FP256                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
131
	FSRM                                 // Fast Short Rep Mov
132
	FXSR                                 // FXSAVE, FXRESTOR instructions, CR4 bit 9
133
	FXSROPT                              // FXSAVE/FXRSTOR optimizations
134
	GFNI                                 // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
135
	HLE                                  // Hardware Lock Elision
136
	HRESET                               // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
137
	HTT                                  // Hyperthreading (enabled)
138
	HWA                                  // Hardware assert supported. Indicates support for MSRC001_10
139
	HYBRID_CPU                           // This part has CPUs of more than one type.
140
	HYPERVISOR                           // This bit has been reserved by Intel & AMD for use by hypervisors
141
	IA32_ARCH_CAP                        // IA32_ARCH_CAPABILITIES MSR (Intel)
142
	IA32_CORE_CAP                        // IA32_CORE_CAPABILITIES MSR
143
	IBPB                                 // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
144
	IBPB_BRTYPE                          // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes	all branch type predictions from the CPU branch predictor
145
	IBRS                                 // AMD: Indirect Branch Restricted Speculation
146
	IBRS_PREFERRED                       // AMD: IBRS is preferred over software solution
147
	IBRS_PROVIDES_SMP                    // AMD: IBRS provides Same Mode Protection
148
	IBS                                  // Instruction Based Sampling (AMD)
149
	IBSBRNTRGT                           // Instruction Based Sampling Feature (AMD)
150
	IBSFETCHSAM                          // Instruction Based Sampling Feature (AMD)
151
	IBSFFV                               // Instruction Based Sampling Feature (AMD)
152
	IBSOPCNT                             // Instruction Based Sampling Feature (AMD)
153
	IBSOPCNTEXT                          // Instruction Based Sampling Feature (AMD)
154
	IBSOPSAM                             // Instruction Based Sampling Feature (AMD)
155
	IBSRDWROPCNT                         // Instruction Based Sampling Feature (AMD)
156
	IBSRIPINVALIDCHK                     // Instruction Based Sampling Feature (AMD)
157
	IBS_FETCH_CTLX                       // AMD: IBS fetch control extended MSR supported
158
	IBS_OPDATA4                          // AMD: IBS op data 4 MSR supported
159
	IBS_OPFUSE                           // AMD: Indicates support for IbsOpFuse
160
	IBS_PREVENTHOST                      // Disallowing IBS use by the host supported
161
	IBS_ZEN4                             // AMD: Fetch and Op IBS support IBS extensions added with Zen4
162
	IDPRED_CTRL                          // IPRED_DIS
163
	INT_WBINVD                           // WBINVD/WBNOINVD are interruptible.
164
	INVLPGB                              // NVLPGB and TLBSYNC instruction supported
165
	KEYLOCKER                            // Key locker
166
	KEYLOCKERW                           // Key locker wide
167
	LAHF                                 // LAHF/SAHF in long mode
168
	LAM                                  // If set, CPU supports Linear Address Masking
169
	LBRVIRT                              // LBR virtualization
170
	LZCNT                                // LZCNT instruction
171
	MCAOVERFLOW                          // MCA overflow recovery support.
172
	MCDT_NO                              // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
173
	MCOMMIT                              // MCOMMIT instruction supported
174
	MD_CLEAR                             // VERW clears CPU buffers
175
	MMX                                  // standard MMX
176
	MMXEXT                               // SSE integer functions or AMD MMX ext
177
	MOVBE                                // MOVBE instruction (big-endian)
178
	MOVDIR64B                            // Move 64 Bytes as Direct Store
179
	MOVDIRI                              // Move Doubleword as Direct Store
180
	MOVSB_ZL                             // Fast Zero-Length MOVSB
181
	MOVU                                 // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
182
	MPX                                  // Intel MPX (Memory Protection Extensions)
183
	MSRIRC                               // Instruction Retired Counter MSR available
184
	MSRLIST                              // Read/Write List of Model Specific Registers
185
	MSR_PAGEFLUSH                        // Page Flush MSR available
186
	NRIPS                                // Indicates support for NRIP save on VMEXIT
187
	NX                                   // NX (No-Execute) bit
188
	OSXSAVE                              // XSAVE enabled by OS
189
	PCONFIG                              // PCONFIG for Intel Multi-Key Total Memory Encryption
190
	POPCNT                               // POPCNT instruction
191
	PPIN                                 // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
192
	PREFETCHI                            // PREFETCHIT0/1 instructions
193
	PSFD                                 // Predictive Store Forward Disable
194
	RDPRU                                // RDPRU instruction supported
195
	RDRAND                               // RDRAND instruction is available
196
	RDSEED                               // RDSEED instruction is available
197
	RDTSCP                               // RDTSCP Instruction
198
	RRSBA_CTRL                           // Restricted RSB Alternate
199
	RTM                                  // Restricted Transactional Memory
200
	RTM_ALWAYS_ABORT                     // Indicates that the loaded microcode is forcing RTM abort.
201
	SBPB                                 // Indicates support for the Selective Branch Predictor Barrier
202
	SERIALIZE                            // Serialize Instruction Execution
203
	SEV                                  // AMD Secure Encrypted Virtualization supported
204
	SEV_64BIT                            // AMD SEV guest execution only allowed from a 64-bit host
205
	SEV_ALTERNATIVE                      // AMD SEV Alternate Injection supported
206
	SEV_DEBUGSWAP                        // Full debug state swap supported for SEV-ES guests
207
	SEV_ES                               // AMD SEV Encrypted State supported
208
	SEV_RESTRICTED                       // AMD SEV Restricted Injection supported
209
	SEV_SNP                              // AMD SEV Secure Nested Paging supported
210
	SGX                                  // Software Guard Extensions
211
	SGXLC                                // Software Guard Extensions Launch Control
212
	SHA                                  // Intel SHA Extensions
213
	SME                                  // AMD Secure Memory Encryption supported
214
	SME_COHERENT                         // AMD Hardware cache coherency across encryption domains enforced
215
	SPEC_CTRL_SSBD                       // Speculative Store Bypass Disable
216
	SRBDS_CTRL                           // SRBDS mitigation MSR available
217
	SRSO_MSR_FIX                         // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
218
	SRSO_NO                              // Indicates the CPU is not subject to the SRSO vulnerability
219
	SRSO_USER_KERNEL_NO                  // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
220
	SSE                                  // SSE functions
221
	SSE2                                 // P4 SSE functions
222
	SSE3                                 // Prescott SSE3 functions
223
	SSE4                                 // Penryn SSE4.1 functions
224
	SSE42                                // Nehalem SSE4.2 functions
225
	SSE4A                                // AMD Barcelona microarchitecture SSE4a instructions
226
	SSSE3                                // Conroe SSSE3 functions
227
	STIBP                                // Single Thread Indirect Branch Predictors
228
	STIBP_ALWAYSON                       // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
229
	STOSB_SHORT                          // Fast short STOSB
230
	SUCCOR                               // Software uncorrectable error containment and recovery capability.
231
	SVM                                  // AMD Secure Virtual Machine
232
	SVMDA                                // Indicates support for the SVM decode assists.
233
	SVMFBASID                            // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
234
	SVML                                 // AMD SVM lock. Indicates support for SVM-Lock.
235
	SVMNP                                // AMD SVM nested paging
236
	SVMPF                                // SVM pause intercept filter. Indicates support for the pause intercept filter
237
	SVMPFT                               // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
238
	SYSCALL                              // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
239
	SYSEE                                // SYSENTER and SYSEXIT instructions
240
	TBM                                  // AMD Trailing Bit Manipulation
241
	TDX_GUEST                            // Intel Trust Domain Extensions Guest
242
	TLB_FLUSH_NESTED                     // AMD: Flushing includes all the nested translations for guest translations
243
	TME                                  // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
244
	TOPEXT                               // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
245
	TSCRATEMSR                           // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
246
	TSXLDTRK                             // Intel TSX Suspend Load Address Tracking
247
	VAES                                 // Vector AES. AVX(512) versions requires additional checks.
248
	VMCBCLEAN                            // VMCB clean bits. Indicates support for VMCB clean bits.
249
	VMPL                                 // AMD VM Permission Levels supported
250
	VMSA_REGPROT                         // AMD VMSA Register Protection supported
251
	VMX                                  // Virtual Machine Extensions
252
	VPCLMULQDQ                           // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
253
	VTE                                  // AMD Virtual Transparent Encryption supported
254
	WAITPKG                              // TPAUSE, UMONITOR, UMWAIT
255
	WBNOINVD                             // Write Back and Do Not Invalidate Cache
256
	WRMSRNS                              // Non-Serializing Write to Model Specific Register
257
	X87                                  // FPU
258
	XGETBV1                              // Supports XGETBV with ECX = 1
259
	XOP                                  // Bulldozer XOP functions
260
	XSAVE                                // XSAVE, XRESTOR, XSETBV, XGETBV
261
	XSAVEC                               // Supports XSAVEC and the compacted form of XRSTOR.
262
	XSAVEOPT                             // XSAVEOPT available
263
	XSAVES                               // Supports XSAVES/XRSTORS and IA32_XSS
264

265
	// ARM features:
266
	AESARM   // AES instructions
267
	ARMCPUID // Some CPU ID registers readable at user-level
268
	ASIMD    // Advanced SIMD
269
	ASIMDDP  // SIMD Dot Product
270
	ASIMDHP  // Advanced SIMD half-precision floating point
271
	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
272
	ATOMICS  // Large System Extensions (LSE)
273
	CRC32    // CRC32/CRC32C instructions
274
	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
275
	EVTSTRM  // Generic timer
276
	FCMA     // Floatin point complex number addition and multiplication
277
	FP       // Single-precision and double-precision floating point
278
	FPHP     // Half-precision floating point
279
	GPA      // Generic Pointer Authentication
280
	JSCVT    // Javascript-style double->int convert (FJCVTZS)
281
	LRCPC    // Weaker release consistency (LDAPR, etc)
282
	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
283
	SHA1     // SHA-1 instructions (SHA1C, etc)
284
	SHA2     // SHA-2 instructions (SHA256H, etc)
285
	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
286
	SHA512   // SHA512 instructions
287
	SM3      // SM3 instructions
288
	SM4      // SM4 instructions
289
	SVE      // Scalable Vector Extension
290
	// Keep it last. It automatically defines the size of []flagSet
291
	lastID
292

293
	firstID FeatureID = UNKNOWN + 1
294
)
295

296
// CPUInfo contains information about the detected system CPU.
297
type CPUInfo struct {
298
	BrandName      string  // Brand name reported by the CPU
299
	VendorID       Vendor  // Comparable CPU vendor ID
300
	VendorString   string  // Raw vendor string.
301
	featureSet     flagSet // Features of the CPU
302
	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
303
	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
304
	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
305
	Family         int     // CPU family number
306
	Model          int     // CPU model number
307
	Stepping       int     // CPU stepping info
308
	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
309
	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
310
	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
311
	Cache          struct {
312
		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
313
		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
314
		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
315
		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
316
	}
317
	SGX              SGXSupport
318
	AMDMemEncryption AMDMemEncryptionSupport
319
	AVX10Level       uint8
320
	maxFunc          uint32
321
	maxExFunc        uint32
322
}
323

324
var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
325
var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
326
var xgetbv func(index uint32) (eax, edx uint32)
327
var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
328
var darwinHasAVX512 = func() bool { return false }
329

330
// CPU contains information about the CPU as detected on startup,
331
// or when Detect last was called.
332
//
333
// Use this as the primary entry point to you data.
334
var CPU CPUInfo
335

336
func init() {
337
	initCPU()
338
	Detect()
339
}
340

341
// Detect will re-detect current CPU info.
342
// This will replace the content of the exported CPU variable.
343
//
344
// Unless you expect the CPU to change while you are running your program
345
// you should not need to call this function.
346
// If you call this, you must ensure that no other goroutine is accessing the
347
// exported CPU variable.
348
func Detect() {
349
	// Set defaults
350
	CPU.ThreadsPerCore = 1
351
	CPU.Cache.L1I = -1
352
	CPU.Cache.L1D = -1
353
	CPU.Cache.L2 = -1
354
	CPU.Cache.L3 = -1
355
	safe := true
356
	if detectArmFlag != nil {
357
		safe = !*detectArmFlag
358
	}
359
	addInfo(&CPU, safe)
360
	if displayFeats != nil && *displayFeats {
361
		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
362
		// Exit with non-zero so tests will print value.
363
		os.Exit(1)
364
	}
365
	if disableFlag != nil {
366
		s := strings.Split(*disableFlag, ",")
367
		for _, feat := range s {
368
			feat := ParseFeature(strings.TrimSpace(feat))
369
			if feat != UNKNOWN {
370
				CPU.featureSet.unset(feat)
371
			}
372
		}
373
	}
374
}
375

376
// DetectARM will detect ARM64 features.
377
// This is NOT done automatically since it can potentially crash
378
// if the OS does not handle the command.
379
// If in the future this can be done safely this function may not
380
// do anything.
381
func DetectARM() {
382
	addInfo(&CPU, false)
383
}
384

385
var detectArmFlag *bool
386
var displayFeats *bool
387
var disableFlag *string
388

389
// Flags will enable flags.
390
// This must be called *before* flag.Parse AND
391
// Detect must be called after the flags have been parsed.
392
// Note that this means that any detection used in init() functions
393
// will not contain these flags.
394
func Flags() {
395
	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
396
	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
397
	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
398
}
399

400
// Supports returns whether the CPU supports all of the requested features.
401
func (c CPUInfo) Supports(ids ...FeatureID) bool {
402
	for _, id := range ids {
403
		if !c.featureSet.inSet(id) {
404
			return false
405
		}
406
	}
407
	return true
408
}
409

410
// Has allows for checking a single feature.
411
// Should be inlined by the compiler.
412
func (c *CPUInfo) Has(id FeatureID) bool {
413
	return c.featureSet.inSet(id)
414
}
415

416
// AnyOf returns whether the CPU supports one or more of the requested features.
417
func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
418
	for _, id := range ids {
419
		if c.featureSet.inSet(id) {
420
			return true
421
		}
422
	}
423
	return false
424
}
425

426
// Features contains several features combined for a fast check using
427
// CpuInfo.HasAll
428
type Features *flagSet
429

430
// CombineFeatures allows to combine several features for a close to constant time lookup.
431
func CombineFeatures(ids ...FeatureID) Features {
432
	var v flagSet
433
	for _, id := range ids {
434
		v.set(id)
435
	}
436
	return &v
437
}
438

439
func (c *CPUInfo) HasAll(f Features) bool {
440
	return c.featureSet.hasSetP(f)
441
}
442

443
// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
444
var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
445
var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
446
var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
447
var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
448
var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
449

450
// X64Level returns the microarchitecture level detected on the CPU.
451
// If features are lacking or non x64 mode, 0 is returned.
452
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
453
func (c CPUInfo) X64Level() int {
454
	if !c.featureSet.hasOneOf(oneOfLevel) {
455
		return 0
456
	}
457
	if c.featureSet.hasSetP(level4Features) {
458
		return 4
459
	}
460
	if c.featureSet.hasSetP(level3Features) {
461
		return 3
462
	}
463
	if c.featureSet.hasSetP(level2Features) {
464
		return 2
465
	}
466
	if c.featureSet.hasSetP(level1Features) {
467
		return 1
468
	}
469
	return 0
470
}
471

472
// Disable will disable one or several features.
473
func (c *CPUInfo) Disable(ids ...FeatureID) bool {
474
	for _, id := range ids {
475
		c.featureSet.unset(id)
476
	}
477
	return true
478
}
479

480
// Enable will disable one or several features even if they were undetected.
481
// This is of course not recommended for obvious reasons.
482
func (c *CPUInfo) Enable(ids ...FeatureID) bool {
483
	for _, id := range ids {
484
		c.featureSet.set(id)
485
	}
486
	return true
487
}
488

489
// IsVendor returns true if vendor is recognized as Intel
490
func (c CPUInfo) IsVendor(v Vendor) bool {
491
	return c.VendorID == v
492
}
493

494
// FeatureSet returns all available features as strings.
495
func (c CPUInfo) FeatureSet() []string {
496
	s := make([]string, 0, c.featureSet.nEnabled())
497
	s = append(s, c.featureSet.Strings()...)
498
	return s
499
}
500

501
// RTCounter returns the 64-bit time-stamp counter
502
// Uses the RDTSCP instruction. The value 0 is returned
503
// if the CPU does not support the instruction.
504
func (c CPUInfo) RTCounter() uint64 {
505
	if !c.Supports(RDTSCP) {
506
		return 0
507
	}
508
	a, _, _, d := rdtscpAsm()
509
	return uint64(a) | (uint64(d) << 32)
510
}
511

512
// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
513
// This variable is OS dependent, but on Linux contains information
514
// about the current cpu/core the code is running on.
515
// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
516
func (c CPUInfo) Ia32TscAux() uint32 {
517
	if !c.Supports(RDTSCP) {
518
		return 0
519
	}
520
	_, _, ecx, _ := rdtscpAsm()
521
	return ecx
522
}
523

524
// LogicalCPU will return the Logical CPU the code is currently executing on.
525
// This is likely to change when the OS re-schedules the running thread
526
// to another CPU.
527
// If the current core cannot be detected, -1 will be returned.
528
func (c CPUInfo) LogicalCPU() int {
529
	if c.maxFunc < 1 {
530
		return -1
531
	}
532
	_, ebx, _, _ := cpuid(1)
533
	return int(ebx >> 24)
534
}
535

536
// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
537
// supported, use it, otherwise parse the brand string. Yes, really.
538
func (c *CPUInfo) frequencies() {
539
	c.Hz, c.BoostFreq = 0, 0
540
	mfi := maxFunctionID()
541
	if mfi >= 0x15 {
542
		eax, ebx, ecx, _ := cpuid(0x15)
543
		if eax != 0 && ebx != 0 && ecx != 0 {
544
			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
545
		}
546
	}
547
	if mfi >= 0x16 {
548
		a, b, _, _ := cpuid(0x16)
549
		// Base...
550
		if a&0xffff > 0 {
551
			c.Hz = int64(a&0xffff) * 1_000_000
552
		}
553
		// Boost...
554
		if b&0xffff > 0 {
555
			c.BoostFreq = int64(b&0xffff) * 1_000_000
556
		}
557
	}
558
	if c.Hz > 0 {
559
		return
560
	}
561

562
	// computeHz determines the official rated speed of a CPU from its brand
563
	// string. This insanity is *actually the official documented way to do
564
	// this according to Intel*, prior to leaf 0x15 existing. The official
565
	// documentation only shows this working for exactly `x.xx` or `xxxx`
566
	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
567
	// sizes.
568
	model := c.BrandName
569
	hz := strings.LastIndex(model, "Hz")
570
	if hz < 3 {
571
		return
572
	}
573
	var multiplier int64
574
	switch model[hz-1] {
575
	case 'M':
576
		multiplier = 1000 * 1000
577
	case 'G':
578
		multiplier = 1000 * 1000 * 1000
579
	case 'T':
580
		multiplier = 1000 * 1000 * 1000 * 1000
581
	}
582
	if multiplier == 0 {
583
		return
584
	}
585
	freq := int64(0)
586
	divisor := int64(0)
587
	decimalShift := int64(1)
588
	var i int
589
	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
590
		if model[i] >= '0' && model[i] <= '9' {
591
			freq += int64(model[i]-'0') * decimalShift
592
			decimalShift *= 10
593
		} else if model[i] == '.' {
594
			if divisor != 0 {
595
				return
596
			}
597
			divisor = decimalShift
598
		} else {
599
			return
600
		}
601
	}
602
	// we didn't find a space
603
	if i < 0 {
604
		return
605
	}
606
	if divisor != 0 {
607
		c.Hz = (freq * multiplier) / divisor
608
		return
609
	}
610
	c.Hz = freq * multiplier
611
}
612

613
// VM Will return true if the cpu id indicates we are in
614
// a virtual machine.
615
func (c CPUInfo) VM() bool {
616
	return CPU.featureSet.inSet(HYPERVISOR)
617
}
618

619
// flags contains detected cpu features and characteristics
620
type flags uint64
621

622
// log2(bits_in_uint64)
623
const flagBitsLog2 = 6
624
const flagBits = 1 << flagBitsLog2
625
const flagMask = flagBits - 1
626

627
// flagSet contains detected cpu features and characteristics in an array of flags
628
type flagSet [(lastID + flagMask) / flagBits]flags
629

630
func (s *flagSet) inSet(feat FeatureID) bool {
631
	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
632
}
633

634
func (s *flagSet) set(feat FeatureID) {
635
	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
636
}
637

638
// setIf will set a feature if boolean is true.
639
func (s *flagSet) setIf(cond bool, features ...FeatureID) {
640
	if cond {
641
		for _, offset := range features {
642
			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
643
		}
644
	}
645
}
646

647
func (s *flagSet) unset(offset FeatureID) {
648
	bit := flags(1 << (offset & flagMask))
649
	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
650
}
651

652
// or with another flagset.
653
func (s *flagSet) or(other flagSet) {
654
	for i, v := range other[:] {
655
		s[i] |= v
656
	}
657
}
658

659
// hasSet returns whether all features are present.
660
func (s *flagSet) hasSet(other flagSet) bool {
661
	for i, v := range other[:] {
662
		if s[i]&v != v {
663
			return false
664
		}
665
	}
666
	return true
667
}
668

669
// hasSet returns whether all features are present.
670
func (s *flagSet) hasSetP(other *flagSet) bool {
671
	for i, v := range other[:] {
672
		if s[i]&v != v {
673
			return false
674
		}
675
	}
676
	return true
677
}
678

679
// hasOneOf returns whether one or more features are present.
680
func (s *flagSet) hasOneOf(other *flagSet) bool {
681
	for i, v := range other[:] {
682
		if s[i]&v != 0 {
683
			return true
684
		}
685
	}
686
	return false
687
}
688

689
// nEnabled will return the number of enabled flags.
690
func (s *flagSet) nEnabled() (n int) {
691
	for _, v := range s[:] {
692
		n += bits.OnesCount64(uint64(v))
693
	}
694
	return n
695
}
696

697
func flagSetWith(feat ...FeatureID) flagSet {
698
	var res flagSet
699
	for _, f := range feat {
700
		res.set(f)
701
	}
702
	return res
703
}
704

705
// ParseFeature will parse the string and return the ID of the matching feature.
706
// Will return UNKNOWN if not found.
707
func ParseFeature(s string) FeatureID {
708
	s = strings.ToUpper(s)
709
	for i := firstID; i < lastID; i++ {
710
		if i.String() == s {
711
			return i
712
		}
713
	}
714
	return UNKNOWN
715
}
716

717
// Strings returns an array of the detected features for FlagsSet.
718
func (s flagSet) Strings() []string {
719
	if len(s) == 0 {
720
		return []string{""}
721
	}
722
	r := make([]string, 0)
723
	for i := firstID; i < lastID; i++ {
724
		if s.inSet(i) {
725
			r = append(r, i.String())
726
		}
727
	}
728
	return r
729
}
730

731
func maxExtendedFunction() uint32 {
732
	eax, _, _, _ := cpuid(0x80000000)
733
	return eax
734
}
735

736
func maxFunctionID() uint32 {
737
	a, _, _, _ := cpuid(0)
738
	return a
739
}
740

741
func brandName() string {
742
	if maxExtendedFunction() >= 0x80000004 {
743
		v := make([]uint32, 0, 48)
744
		for i := uint32(0); i < 3; i++ {
745
			a, b, c, d := cpuid(0x80000002 + i)
746
			v = append(v, a, b, c, d)
747
		}
748
		return strings.Trim(string(valAsString(v...)), " ")
749
	}
750
	return "unknown"
751
}
752

753
func threadsPerCore() int {
754
	mfi := maxFunctionID()
755
	vend, _ := vendorID()
756

757
	if mfi < 0x4 || (vend != Intel && vend != AMD) {
758
		return 1
759
	}
760

761
	if mfi < 0xb {
762
		if vend != Intel {
763
			return 1
764
		}
765
		_, b, _, d := cpuid(1)
766
		if (d & (1 << 28)) != 0 {
767
			// v will contain logical core count
768
			v := (b >> 16) & 255
769
			if v > 1 {
770
				a4, _, _, _ := cpuid(4)
771
				// physical cores
772
				v2 := (a4 >> 26) + 1
773
				if v2 > 0 {
774
					return int(v) / int(v2)
775
				}
776
			}
777
		}
778
		return 1
779
	}
780
	_, b, _, _ := cpuidex(0xb, 0)
781
	if b&0xffff == 0 {
782
		if vend == AMD {
783
			// Workaround for AMD returning 0, assume 2 if >= Zen 2
784
			// It will be more correct than not.
785
			fam, _, _ := familyModel()
786
			_, _, _, d := cpuid(1)
787
			if (d&(1<<28)) != 0 && fam >= 23 {
788
				return 2
789
			}
790
		}
791
		return 1
792
	}
793
	return int(b & 0xffff)
794
}
795

796
func logicalCores() int {
797
	mfi := maxFunctionID()
798
	v, _ := vendorID()
799
	switch v {
800
	case Intel:
801
		// Use this on old Intel processors
802
		if mfi < 0xb {
803
			if mfi < 1 {
804
				return 0
805
			}
806
			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
807
			// that can be assigned to logical processors in a physical package.
808
			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
809
			_, ebx, _, _ := cpuid(1)
810
			logical := (ebx >> 16) & 0xff
811
			return int(logical)
812
		}
813
		_, b, _, _ := cpuidex(0xb, 1)
814
		return int(b & 0xffff)
815
	case AMD, Hygon:
816
		_, b, _, _ := cpuid(1)
817
		return int((b >> 16) & 0xff)
818
	default:
819
		return 0
820
	}
821
}
822

823
func familyModel() (family, model, stepping int) {
824
	if maxFunctionID() < 0x1 {
825
		return 0, 0, 0
826
	}
827
	eax, _, _, _ := cpuid(1)
828
	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
829
	family = int((eax >> 8) & 0xf)
830
	extFam := family == 0x6 // Intel is 0x6, needs extended model.
831
	if family == 0xf {
832
		// Add ExtFamily
833
		family += int((eax >> 20) & 0xff)
834
		extFam = true
835
	}
836
	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
837
	model = int((eax >> 4) & 0xf)
838
	if extFam {
839
		// Add ExtModel
840
		model += int((eax >> 12) & 0xf0)
841
	}
842
	stepping = int(eax & 0xf)
843
	return family, model, stepping
844
}
845

846
func physicalCores() int {
847
	v, _ := vendorID()
848
	switch v {
849
	case Intel:
850
		return logicalCores() / threadsPerCore()
851
	case AMD, Hygon:
852
		lc := logicalCores()
853
		tpc := threadsPerCore()
854
		if lc > 0 && tpc > 0 {
855
			return lc / tpc
856
		}
857

858
		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
859
		if maxExtendedFunction() >= 0x80000008 {
860
			_, _, c, _ := cpuid(0x80000008)
861
			if c&0xff > 0 {
862
				return int(c&0xff) + 1
863
			}
864
		}
865
	}
866
	return 0
867
}
868

869
// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
870
var vendorMapping = map[string]Vendor{
871
	"AMDisbetter!": AMD,
872
	"AuthenticAMD": AMD,
873
	"CentaurHauls": VIA,
874
	"GenuineIntel": Intel,
875
	"TransmetaCPU": Transmeta,
876
	"GenuineTMx86": Transmeta,
877
	"Geode by NSC": NSC,
878
	"VIA VIA VIA ": VIA,
879
	"KVMKVMKVMKVM": KVM,
880
	"Microsoft Hv": MSVM,
881
	"VMwareVMware": VMware,
882
	"XenVMMXenVMM": XenHVM,
883
	"bhyve bhyve ": Bhyve,
884
	"HygonGenuine": Hygon,
885
	"Vortex86 SoC": SiS,
886
	"SiS SiS SiS ": SiS,
887
	"RiseRiseRise": SiS,
888
	"Genuine  RDC": RDC,
889
}
890

891
func vendorID() (Vendor, string) {
892
	_, b, c, d := cpuid(0)
893
	v := string(valAsString(b, d, c))
894
	vend, ok := vendorMapping[v]
895
	if !ok {
896
		return VendorUnknown, v
897
	}
898
	return vend, v
899
}
900

901
func cacheLine() int {
902
	if maxFunctionID() < 0x1 {
903
		return 0
904
	}
905

906
	_, ebx, _, _ := cpuid(1)
907
	cache := (ebx & 0xff00) >> 5 // cflush size
908
	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
909
		_, _, ecx, _ := cpuid(0x80000006)
910
		cache = ecx & 0xff // cacheline size
911
	}
912
	// TODO: Read from Cache and TLB Information
913
	return int(cache)
914
}
915

916
func (c *CPUInfo) cacheSize() {
917
	c.Cache.L1D = -1
918
	c.Cache.L1I = -1
919
	c.Cache.L2 = -1
920
	c.Cache.L3 = -1
921
	vendor, _ := vendorID()
922
	switch vendor {
923
	case Intel:
924
		if maxFunctionID() < 4 {
925
			return
926
		}
927
		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
928
		for i := uint32(0); ; i++ {
929
			eax, ebx, ecx, _ := cpuidex(4, i)
930
			cacheType := eax & 15
931
			if cacheType == 0 {
932
				break
933
			}
934
			cacheLevel := (eax >> 5) & 7
935
			coherency := int(ebx&0xfff) + 1
936
			partitions := int((ebx>>12)&0x3ff) + 1
937
			associativity := int((ebx>>22)&0x3ff) + 1
938
			sets := int(ecx) + 1
939
			size := associativity * partitions * coherency * sets
940
			switch cacheLevel {
941
			case 1:
942
				if cacheType == 1 {
943
					// 1 = Data Cache
944
					c.Cache.L1D = size
945
				} else if cacheType == 2 {
946
					// 2 = Instruction Cache
947
					c.Cache.L1I = size
948
				} else {
949
					if c.Cache.L1D < 0 {
950
						c.Cache.L1I = size
951
					}
952
					if c.Cache.L1I < 0 {
953
						c.Cache.L1I = size
954
					}
955
				}
956
			case 2:
957
				c.Cache.L2 = size
958
			case 3:
959
				c.Cache.L3 = size
960
			}
961
		}
962
	case AMD, Hygon:
963
		// Untested.
964
		if maxExtendedFunction() < 0x80000005 {
965
			return
966
		}
967
		_, _, ecx, edx := cpuid(0x80000005)
968
		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
969
		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
970

971
		if maxExtendedFunction() < 0x80000006 {
972
			return
973
		}
974
		_, _, ecx, _ = cpuid(0x80000006)
975
		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
976

977
		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
978
		if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
979
			return
980
		}
981

982
		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
983
		// Hack: When we encounter the same entry 100 times we break.
984
		nSame := 0
985
		var last uint32
986
		for i := uint32(0); i < math.MaxUint32; i++ {
987
			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
988

989
			level := (eax >> 5) & 7
990
			cacheNumSets := ecx + 1
991
			cacheLineSize := 1 + (ebx & 2047)
992
			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
993
			cacheNumWays := 1 + ((ebx >> 22) & 511)
994

995
			typ := eax & 15
996
			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
997
			if typ == 0 {
998
				return
999
			}
1000

1001
			// Check for the same value repeated.
1002
			comb := eax ^ ebx ^ ecx
1003
			if comb == last {
1004
				nSame++
1005
				if nSame == 100 {
1006
					return
1007
				}
1008
			}
1009
			last = comb
1010

1011
			switch level {
1012
			case 1:
1013
				switch typ {
1014
				case 1:
1015
					// Data cache
1016
					c.Cache.L1D = size
1017
				case 2:
1018
					// Inst cache
1019
					c.Cache.L1I = size
1020
				default:
1021
					if c.Cache.L1D < 0 {
1022
						c.Cache.L1I = size
1023
					}
1024
					if c.Cache.L1I < 0 {
1025
						c.Cache.L1I = size
1026
					}
1027
				}
1028
			case 2:
1029
				c.Cache.L2 = size
1030
			case 3:
1031
				c.Cache.L3 = size
1032
			}
1033
		}
1034
	}
1035
}
1036

1037
type SGXEPCSection struct {
1038
	BaseAddress uint64
1039
	EPCSize     uint64
1040
}
1041

1042
type SGXSupport struct {
1043
	Available           bool
1044
	LaunchControl       bool
1045
	SGX1Supported       bool
1046
	SGX2Supported       bool
1047
	MaxEnclaveSizeNot64 int64
1048
	MaxEnclaveSize64    int64
1049
	EPCSections         []SGXEPCSection
1050
}
1051

1052
func hasSGX(available, lc bool) (rval SGXSupport) {
1053
	rval.Available = available
1054

1055
	if !available {
1056
		return
1057
	}
1058

1059
	rval.LaunchControl = lc
1060

1061
	a, _, _, d := cpuidex(0x12, 0)
1062
	rval.SGX1Supported = a&0x01 != 0
1063
	rval.SGX2Supported = a&0x02 != 0
1064
	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
1065
	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
1066
	rval.EPCSections = make([]SGXEPCSection, 0)
1067

1068
	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
1069
		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
1070
		leafType := eax & 0xf
1071

1072
		if leafType == 0 {
1073
			// Invalid subleaf, stop iterating
1074
			break
1075
		} else if leafType == 1 {
1076
			// EPC Section subleaf
1077
			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
1078
			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
1079

1080
			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
1081
			rval.EPCSections = append(rval.EPCSections, section)
1082
		}
1083
	}
1084

1085
	return
1086
}
1087

1088
type AMDMemEncryptionSupport struct {
1089
	Available          bool
1090
	CBitPossition      uint32
1091
	NumVMPL            uint32
1092
	PhysAddrReduction  uint32
1093
	NumEntryptedGuests uint32
1094
	MinSevNoEsAsid     uint32
1095
}
1096

1097
func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
1098
	rval.Available = available
1099
	if !available {
1100
		return
1101
	}
1102

1103
	_, b, c, d := cpuidex(0x8000001f, 0)
1104

1105
	rval.CBitPossition = b & 0x3f
1106
	rval.PhysAddrReduction = (b >> 6) & 0x3F
1107
	rval.NumVMPL = (b >> 12) & 0xf
1108
	rval.NumEntryptedGuests = c
1109
	rval.MinSevNoEsAsid = d
1110

1111
	return
1112
}
1113

1114
func support() flagSet {
1115
	var fs flagSet
1116
	mfi := maxFunctionID()
1117
	vend, _ := vendorID()
1118
	if mfi < 0x1 {
1119
		return fs
1120
	}
1121
	family, model, _ := familyModel()
1122

1123
	_, _, c, d := cpuid(1)
1124
	fs.setIf((d&(1<<0)) != 0, X87)
1125
	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
1126
	fs.setIf((d&(1<<11)) != 0, SYSEE)
1127
	fs.setIf((d&(1<<15)) != 0, CMOV)
1128
	fs.setIf((d&(1<<23)) != 0, MMX)
1129
	fs.setIf((d&(1<<24)) != 0, FXSR)
1130
	fs.setIf((d&(1<<25)) != 0, FXSROPT)
1131
	fs.setIf((d&(1<<25)) != 0, SSE)
1132
	fs.setIf((d&(1<<26)) != 0, SSE2)
1133
	fs.setIf((c&1) != 0, SSE3)
1134
	fs.setIf((c&(1<<5)) != 0, VMX)
1135
	fs.setIf((c&(1<<9)) != 0, SSSE3)
1136
	fs.setIf((c&(1<<19)) != 0, SSE4)
1137
	fs.setIf((c&(1<<20)) != 0, SSE42)
1138
	fs.setIf((c&(1<<25)) != 0, AESNI)
1139
	fs.setIf((c&(1<<1)) != 0, CLMUL)
1140
	fs.setIf(c&(1<<22) != 0, MOVBE)
1141
	fs.setIf(c&(1<<23) != 0, POPCNT)
1142
	fs.setIf(c&(1<<30) != 0, RDRAND)
1143

1144
	// This bit has been reserved by Intel & AMD for use by hypervisors,
1145
	// and indicates the presence of a hypervisor.
1146
	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
1147
	fs.setIf(c&(1<<29) != 0, F16C)
1148
	fs.setIf(c&(1<<13) != 0, CX16)
1149

1150
	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
1151
		fs.setIf(threadsPerCore() > 1, HTT)
1152
	}
1153
	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
1154
		fs.setIf(threadsPerCore() > 1, HTT)
1155
	}
1156
	fs.setIf(c&1<<26 != 0, XSAVE)
1157
	fs.setIf(c&1<<27 != 0, OSXSAVE)
1158
	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
1159
	const avxCheck = 1<<26 | 1<<27 | 1<<28
1160
	if c&avxCheck == avxCheck {
1161
		// Check for OS support
1162
		eax, _ := xgetbv(0)
1163
		if (eax & 0x6) == 0x6 {
1164
			fs.set(AVX)
1165
			switch vend {
1166
			case Intel:
1167
				// Older than Haswell.
1168
				fs.setIf(family == 6 && model < 60, AVXSLOW)
1169
			case AMD:
1170
				// Older than Zen 2
1171
				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
1172
			}
1173
		}
1174
	}
1175
	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
1176
	// fma3 and OSXSAVE needed.
1177
	const fma3Check = 1<<12 | 1<<27
1178
	fs.setIf(c&fma3Check == fma3Check, FMA3)
1179

1180
	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
1181
	if mfi >= 7 {
1182
		_, ebx, ecx, edx := cpuidex(7, 0)
1183
		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
1184
			fs.set(AVX2)
1185
		}
1186
		// CPUID.(EAX=7, ECX=0).EBX
1187
		if (ebx & 0x00000008) != 0 {
1188
			fs.set(BMI1)
1189
			fs.setIf((ebx&0x00000100) != 0, BMI2)
1190
		}
1191
		fs.setIf(ebx&(1<<2) != 0, SGX)
1192
		fs.setIf(ebx&(1<<4) != 0, HLE)
1193
		fs.setIf(ebx&(1<<9) != 0, ERMS)
1194
		fs.setIf(ebx&(1<<11) != 0, RTM)
1195
		fs.setIf(ebx&(1<<14) != 0, MPX)
1196
		fs.setIf(ebx&(1<<18) != 0, RDSEED)
1197
		fs.setIf(ebx&(1<<19) != 0, ADX)
1198
		fs.setIf(ebx&(1<<29) != 0, SHA)
1199

1200
		// CPUID.(EAX=7, ECX=0).ECX
1201
		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
1202
		fs.setIf(ecx&(1<<7) != 0, CETSS)
1203
		fs.setIf(ecx&(1<<8) != 0, GFNI)
1204
		fs.setIf(ecx&(1<<9) != 0, VAES)
1205
		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
1206
		fs.setIf(ecx&(1<<13) != 0, TME)
1207
		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
1208
		fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
1209
		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
1210
		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
1211
		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
1212
		fs.setIf(ecx&(1<<30) != 0, SGXLC)
1213

1214
		// CPUID.(EAX=7, ECX=0).EDX
1215
		fs.setIf(edx&(1<<4) != 0, FSRM)
1216
		fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
1217
		fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
1218
		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
1219
		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
1220
		fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
1221
		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
1222
		fs.setIf(edx&(1<<18) != 0, PCONFIG)
1223
		fs.setIf(edx&(1<<20) != 0, CETIBT)
1224
		fs.setIf(edx&(1<<26) != 0, IBPB)
1225
		fs.setIf(edx&(1<<27) != 0, STIBP)
1226
		fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
1227
		fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
1228
		fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
1229
		fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
1230

1231
		// CPUID.(EAX=7, ECX=1).EAX
1232
		eax1, _, _, edx1 := cpuidex(7, 1)
1233
		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
1234
		fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
1235
		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
1236
		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
1237
		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
1238
		fs.setIf(eax1&(1<<22) != 0, HRESET)
1239
		fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
1240
		fs.setIf(eax1&(1<<26) != 0, LAM)
1241

1242
		// CPUID.(EAX=7, ECX=1).EDX
1243
		fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
1244
		fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
1245
		fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
1246
		fs.setIf(edx1&(1<<19) != 0, AVX10)
1247
		fs.setIf(edx1&(1<<21) != 0, APX_F)
1248

1249
		// Only detect AVX-512 features if XGETBV is supported
1250
		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
1251
			// Check for OS support
1252
			eax, _ := xgetbv(0)
1253

1254
			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
1255
			// ZMM16-ZMM31 state are enabled by OS)
1256
			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
1257
			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
1258
			if runtime.GOOS == "darwin" {
1259
				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
1260
			}
1261
			if hasAVX512 {
1262
				fs.setIf(ebx&(1<<16) != 0, AVX512F)
1263
				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
1264
				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
1265
				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
1266
				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
1267
				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
1268
				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
1269
				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
1270
				// ecx
1271
				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
1272
				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
1273
				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
1274
				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
1275
				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
1276
				// edx
1277
				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
1278
				fs.setIf(edx&(1<<22) != 0, AMXBF16)
1279
				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
1280
				fs.setIf(edx&(1<<24) != 0, AMXTILE)
1281
				fs.setIf(edx&(1<<25) != 0, AMXINT8)
1282
				// eax1 = CPUID.(EAX=7, ECX=1).EAX
1283
				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
1284
				fs.setIf(eax1&(1<<19) != 0, WRMSRNS)
1285
				fs.setIf(eax1&(1<<21) != 0, AMXFP16)
1286
				fs.setIf(eax1&(1<<27) != 0, MSRLIST)
1287
			}
1288
		}
1289

1290
		// CPUID.(EAX=7, ECX=2)
1291
		_, _, _, edx = cpuidex(7, 2)
1292
		fs.setIf(edx&(1<<0) != 0, PSFD)
1293
		fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL)
1294
		fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL)
1295
		fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
1296
		fs.setIf(edx&(1<<5) != 0, MCDT_NO)
1297

1298
		// Add keylocker features.
1299
		if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
1300
			_, ebx, _, _ := cpuidex(0x19, 0)
1301
			fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
1302
		}
1303

1304
		// Add AVX10 features.
1305
		if fs.inSet(AVX10) && mfi >= 0x24 {
1306
			_, ebx, _, _ := cpuidex(0x24, 0)
1307
			fs.setIf(ebx&(1<<16) != 0, AVX10_128)
1308
			fs.setIf(ebx&(1<<17) != 0, AVX10_256)
1309
			fs.setIf(ebx&(1<<18) != 0, AVX10_512)
1310
		}
1311
	}
1312

1313
	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
1314
	// EAX
1315
	// Bit 00: XSAVEOPT is available.
1316
	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
1317
	// Bit 02: Supports XGETBV with ECX = 1 if set.
1318
	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
1319
	// Bits 31 - 04: Reserved.
1320
	// EBX
1321
	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
1322
	// ECX
1323
	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
1324
	// EDX?
1325
	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
1326
	if mfi >= 0xd {
1327
		if fs.inSet(XSAVE) {
1328
			eax, _, _, _ := cpuidex(0xd, 1)
1329
			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
1330
			fs.setIf(eax&(1<<1) != 0, XSAVEC)
1331
			fs.setIf(eax&(1<<2) != 0, XGETBV1)
1332
			fs.setIf(eax&(1<<3) != 0, XSAVES)
1333
		}
1334
	}
1335
	if maxExtendedFunction() >= 0x80000001 {
1336
		_, _, c, d := cpuid(0x80000001)
1337
		if (c & (1 << 5)) != 0 {
1338
			fs.set(LZCNT)
1339
			fs.set(POPCNT)
1340
		}
1341
		// ECX
1342
		fs.setIf((c&(1<<0)) != 0, LAHF)
1343
		fs.setIf((c&(1<<2)) != 0, SVM)
1344
		fs.setIf((c&(1<<6)) != 0, SSE4A)
1345
		fs.setIf((c&(1<<10)) != 0, IBS)
1346
		fs.setIf((c&(1<<22)) != 0, TOPEXT)
1347

1348
		// EDX
1349
		fs.setIf(d&(1<<11) != 0, SYSCALL)
1350
		fs.setIf(d&(1<<20) != 0, NX)
1351
		fs.setIf(d&(1<<22) != 0, MMXEXT)
1352
		fs.setIf(d&(1<<23) != 0, MMX)
1353
		fs.setIf(d&(1<<24) != 0, FXSR)
1354
		fs.setIf(d&(1<<25) != 0, FXSROPT)
1355
		fs.setIf(d&(1<<27) != 0, RDTSCP)
1356
		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
1357
		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
1358

1359
		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1360
		 * used unless the OS has AVX support. */
1361
		if fs.inSet(AVX) {
1362
			fs.setIf((c&(1<<11)) != 0, XOP)
1363
			fs.setIf((c&(1<<16)) != 0, FMA4)
1364
		}
1365

1366
	}
1367
	if maxExtendedFunction() >= 0x80000007 {
1368
		_, b, _, d := cpuid(0x80000007)
1369
		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
1370
		fs.setIf((b&(1<<1)) != 0, SUCCOR)
1371
		fs.setIf((b&(1<<2)) != 0, HWA)
1372
		fs.setIf((d&(1<<9)) != 0, CPBOOST)
1373
	}
1374

1375
	if maxExtendedFunction() >= 0x80000008 {
1376
		_, b, _, _ := cpuid(0x80000008)
1377
		fs.setIf(b&(1<<28) != 0, PSFD)
1378
		fs.setIf(b&(1<<27) != 0, CPPC)
1379
		fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
1380
		fs.setIf(b&(1<<23) != 0, PPIN)
1381
		fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
1382
		fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
1383
		fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
1384
		fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
1385
		fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
1386
		fs.setIf(b&(1<<15) != 0, STIBP)
1387
		fs.setIf(b&(1<<14) != 0, IBRS)
1388
		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
1389
		fs.setIf(b&(1<<12) != 0, IBPB)
1390
		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
1391
		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
1392
		fs.setIf((b&(1<<4)) != 0, RDPRU)
1393
		fs.setIf((b&(1<<3)) != 0, INVLPGB)
1394
		fs.setIf((b&(1<<1)) != 0, MSRIRC)
1395
		fs.setIf((b&(1<<0)) != 0, CLZERO)
1396
	}
1397

1398
	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
1399
		_, _, _, edx := cpuid(0x8000000A)
1400
		fs.setIf((edx>>0)&1 == 1, SVMNP)
1401
		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
1402
		fs.setIf((edx>>2)&1 == 1, SVML)
1403
		fs.setIf((edx>>3)&1 == 1, NRIPS)
1404
		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
1405
		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
1406
		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
1407
		fs.setIf((edx>>7)&1 == 1, SVMDA)
1408
		fs.setIf((edx>>10)&1 == 1, SVMPF)
1409
		fs.setIf((edx>>12)&1 == 1, SVMPFT)
1410
	}
1411

1412
	if maxExtendedFunction() >= 0x8000001a {
1413
		eax, _, _, _ := cpuid(0x8000001a)
1414
		fs.setIf((eax>>0)&1 == 1, FP128)
1415
		fs.setIf((eax>>1)&1 == 1, MOVU)
1416
		fs.setIf((eax>>2)&1 == 1, FP256)
1417
	}
1418

1419
	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
1420
		eax, _, _, _ := cpuid(0x8000001b)
1421
		fs.setIf((eax>>0)&1 == 1, IBSFFV)
1422
		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
1423
		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
1424
		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
1425
		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
1426
		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
1427
		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
1428
		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
1429
		fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
1430
		fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
1431
		fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
1432
		fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
1433
	}
1434

1435
	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
1436
		a, _, _, _ := cpuid(0x8000001f)
1437
		fs.setIf((a>>0)&1 == 1, SME)
1438
		fs.setIf((a>>1)&1 == 1, SEV)
1439
		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
1440
		fs.setIf((a>>3)&1 == 1, SEV_ES)
1441
		fs.setIf((a>>4)&1 == 1, SEV_SNP)
1442
		fs.setIf((a>>5)&1 == 1, VMPL)
1443
		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
1444
		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
1445
		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
1446
		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
1447
		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
1448
		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
1449
		fs.setIf((a>>16)&1 == 1, VTE)
1450
		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
1451
	}
1452

1453
	if maxExtendedFunction() >= 0x80000021 && vend == AMD {
1454
		a, _, _, _ := cpuid(0x80000021)
1455
		fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
1456
		fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
1457
		fs.setIf((a>>29)&1 == 1, SRSO_NO)
1458
		fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
1459
		fs.setIf((a>>27)&1 == 1, SBPB)
1460
	}
1461

1462
	if mfi >= 0x20 {
1463
		// Microsoft has decided to purposefully hide the information
1464
		// of the guest TEE when VMs are being created using Hyper-V.
1465
		//
1466
		// This leads us to check for the Hyper-V cpuid features
1467
		// (0x4000000C), and then for the `ebx` value set.
1468
		//
1469
		// For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
1470
		// we're mostly interested about,according to:
1471
		// https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
1472
		_, ebx, _, _ := cpuid(0x4000000C)
1473
		fs.setIf(ebx == 0xbe3, TDX_GUEST)
1474
	}
1475

1476
	if mfi >= 0x21 {
1477
		// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
1478
		_, ebx, ecx, edx := cpuid(0x21)
1479
		identity := string(valAsString(ebx, edx, ecx))
1480
		fs.setIf(identity == "IntelTDX    ", TDX_GUEST)
1481
	}
1482

1483
	return fs
1484
}
1485

1486
func (c *CPUInfo) supportAVX10() uint8 {
1487
	if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
1488
		_, ebx, _, _ := cpuidex(0x24, 0)
1489
		return uint8(ebx)
1490
	}
1491
	return 0
1492
}
1493

1494
func valAsString(values ...uint32) []byte {
1495
	r := make([]byte, 4*len(values))
1496
	for i, v := range values {
1497
		dst := r[i*4:]
1498
		dst[0] = byte(v & 0xff)
1499
		dst[1] = byte((v >> 8) & 0xff)
1500
		dst[2] = byte((v >> 16) & 0xff)
1501
		dst[3] = byte((v >> 24) & 0xff)
1502
		switch {
1503
		case dst[0] == 0:
1504
			return r[:i*4]
1505
		case dst[1] == 0:
1506
			return r[:i*4+1]
1507
		case dst[2] == 0:
1508
			return r[:i*4+2]
1509
		case dst[3] == 0:
1510
			return r[:i*4+3]
1511
		}
1512
	}
1513
	return r
1514
}
1515

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.