cubefs

cpuid.go
1291 строка · 41.5 Кб
Перенос по словам
1
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2

3
// Package cpuid provides information about the CPU running the current program.
4
//
5
// CPU features are detected on startup, and kept for fast access through the life of the application.
6
// Currently x86 / x64 (AMD64) as well as arm64 is supported.
7
//
8
// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9
//
10
// Package home: https://github.com/klauspost/cpuid
11
package cpuid
12

13
import (
14
	"flag"
15
	"fmt"
16
	"math"
17
	"math/bits"
18
	"os"
19
	"runtime"
20
	"strings"
21
)
22

23
// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
24
// and Processor Programming Reference (PPR)
25

26
// Vendor is a representation of a CPU vendor.
27
type Vendor int
28

29
const (
30
	VendorUnknown Vendor = iota
31
	Intel
32
	AMD
33
	VIA
34
	Transmeta
35
	NSC
36
	KVM  // Kernel-based Virtual Machine
37
	MSVM // Microsoft Hyper-V or Windows Virtual PC
38
	VMware
39
	XenHVM
40
	Bhyve
41
	Hygon
42
	SiS
43
	RDC
44

45
	Ampere
46
	ARM
47
	Broadcom
48
	Cavium
49
	DEC
50
	Fujitsu
51
	Infineon
52
	Motorola
53
	NVIDIA
54
	AMCC
55
	Qualcomm
56
	Marvell
57

58
	lastVendor
59
)
60

61
//go:generate stringer -type=FeatureID,Vendor
62

63
// FeatureID is the ID of a specific cpu feature.
64
type FeatureID int
65

66
const (
67
	// Keep index -1 as unknown
68
	UNKNOWN = -1
69

70
	// Add features
71
	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
72
	AESNI                               // Advanced Encryption Standard New Instructions
73
	AMD3DNOW                            // AMD 3DNOW
74
	AMD3DNOWEXT                         // AMD 3DNowExt
75
	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
76
	AMXINT8                             // Tile computational operations on 8-bit integers
77
	AMXTILE                             // Tile architecture
78
	AVX                                 // AVX functions
79
	AVX2                                // AVX2 functions
80
	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
81
	AVX512BITALG                        // AVX-512 Bit Algorithms
82
	AVX512BW                            // AVX-512 Byte and Word Instructions
83
	AVX512CD                            // AVX-512 Conflict Detection Instructions
84
	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
85
	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
86
	AVX512F                             // AVX-512 Foundation
87
	AVX512FP16                          // AVX-512 FP16 Instructions
88
	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
89
	AVX512PF                            // AVX-512 Prefetch Instructions
90
	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
91
	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
92
	AVX512VL                            // AVX-512 Vector Length Extensions
93
	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
94
	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
95
	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
96
	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
97
	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
98
	BMI1                                // Bit Manipulation Instruction Set 1
99
	BMI2                                // Bit Manipulation Instruction Set 2
100
	CETIBT                              // Intel CET Indirect Branch Tracking
101
	CETSS                               // Intel CET Shadow Stack
102
	CLDEMOTE                            // Cache Line Demote
103
	CLMUL                               // Carry-less Multiplication
104
	CLZERO                              // CLZERO instruction supported
105
	CMOV                                // i686 CMOV
106
	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
107
	CMPXCHG8                            // CMPXCHG8 instruction
108
	CPBOOST                             // Core Performance Boost
109
	CX16                                // CMPXCHG16B Instruction
110
	ENQCMD                              // Enqueue Command
111
	ERMS                                // Enhanced REP MOVSB/STOSB
112
	F16C                                // Half-precision floating-point conversion
113
	FMA3                                // Intel FMA 3. Does not imply AVX.
114
	FMA4                                // Bulldozer FMA4 functions
115
	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
116
	FXSROPT                             // FXSAVE/FXRSTOR optimizations
117
	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
118
	HLE                                 // Hardware Lock Elision
119
	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
120
	HTT                                 // Hyperthreading (enabled)
121
	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
122
	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
123
	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
124
	IBS                                 // Instruction Based Sampling (AMD)
125
	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
126
	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
127
	IBSFFV                              // Instruction Based Sampling Feature (AMD)
128
	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
129
	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
130
	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
131
	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
132
	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
133
	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
134
	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
135
	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
136
	LAHF                                // LAHF/SAHF in long mode
137
	LAM                                 // If set, CPU supports Linear Address Masking
138
	LBRVIRT                             // LBR virtualization
139
	LZCNT                               // LZCNT instruction
140
	MCAOVERFLOW                         // MCA overflow recovery support.
141
	MCOMMIT                             // MCOMMIT instruction supported
142
	MMX                                 // standard MMX
143
	MMXEXT                              // SSE integer functions or AMD MMX ext
144
	MOVBE                               // MOVBE instruction (big-endian)
145
	MOVDIR64B                           // Move 64 Bytes as Direct Store
146
	MOVDIRI                             // Move Doubleword as Direct Store
147
	MOVSB_ZL                            // Fast Zero-Length MOVSB
148
	MPX                                 // Intel MPX (Memory Protection Extensions)
149
	MSRIRC                              // Instruction Retired Counter MSR available
150
	MSR_PAGEFLUSH                       // Page Flush MSR available
151
	NRIPS                               // Indicates support for NRIP save on VMEXIT
152
	NX                                  // NX (No-Execute) bit
153
	OSXSAVE                             // XSAVE enabled by OS
154
	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
155
	POPCNT                              // POPCNT instruction
156
	RDPRU                               // RDPRU instruction supported
157
	RDRAND                              // RDRAND instruction is available
158
	RDSEED                              // RDSEED instruction is available
159
	RDTSCP                              // RDTSCP Instruction
160
	RTM                                 // Restricted Transactional Memory
161
	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
162
	SERIALIZE                           // Serialize Instruction Execution
163
	SEV                                 // AMD Secure Encrypted Virtualization supported
164
	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
165
	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
166
	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
167
	SEV_ES                              // AMD SEV Encrypted State supported
168
	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
169
	SEV_SNP                             // AMD SEV Secure Nested Paging supported
170
	SGX                                 // Software Guard Extensions
171
	SGXLC                               // Software Guard Extensions Launch Control
172
	SHA                                 // Intel SHA Extensions
173
	SME                                 // AMD Secure Memory Encryption supported
174
	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
175
	SSE                                 // SSE functions
176
	SSE2                                // P4 SSE functions
177
	SSE3                                // Prescott SSE3 functions
178
	SSE4                                // Penryn SSE4.1 functions
179
	SSE42                               // Nehalem SSE4.2 functions
180
	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
181
	SSSE3                               // Conroe SSSE3 functions
182
	STIBP                               // Single Thread Indirect Branch Predictors
183
	STOSB_SHORT                         // Fast short STOSB
184
	SUCCOR                              // Software uncorrectable error containment and recovery capability.
185
	SVM                                 // AMD Secure Virtual Machine
186
	SVMDA                               // Indicates support for the SVM decode assists.
187
	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
188
	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
189
	SVMNP                               // AMD SVM nested paging
190
	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
191
	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
192
	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
193
	SYSEE                               // SYSENTER and SYSEXIT instructions
194
	TBM                                 // AMD Trailing Bit Manipulation
195
	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
196
	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
197
	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
198
	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
199
	VAES                                // Vector AES. AVX(512) versions requires additional checks.
200
	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
201
	VMPL                                // AMD VM Permission Levels supported
202
	VMSA_REGPROT                        // AMD VMSA Register Protection supported
203
	VMX                                 // Virtual Machine Extensions
204
	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
205
	VTE                                 // AMD Virtual Transparent Encryption supported
206
	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
207
	WBNOINVD                            // Write Back and Do Not Invalidate Cache
208
	X87                                 // FPU
209
	XGETBV1                             // Supports XGETBV with ECX = 1
210
	XOP                                 // Bulldozer XOP functions
211
	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
212
	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
213
	XSAVEOPT                            // XSAVEOPT available
214
	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
215

216
	// ARM features:
217
	AESARM   // AES instructions
218
	ARMCPUID // Some CPU ID registers readable at user-level
219
	ASIMD    // Advanced SIMD
220
	ASIMDDP  // SIMD Dot Product
221
	ASIMDHP  // Advanced SIMD half-precision floating point
222
	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
223
	ATOMICS  // Large System Extensions (LSE)
224
	CRC32    // CRC32/CRC32C instructions
225
	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
226
	EVTSTRM  // Generic timer
227
	FCMA     // Floatin point complex number addition and multiplication
228
	FP       // Single-precision and double-precision floating point
229
	FPHP     // Half-precision floating point
230
	GPA      // Generic Pointer Authentication
231
	JSCVT    // Javascript-style double->int convert (FJCVTZS)
232
	LRCPC    // Weaker release consistency (LDAPR, etc)
233
	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
234
	SHA1     // SHA-1 instructions (SHA1C, etc)
235
	SHA2     // SHA-2 instructions (SHA256H, etc)
236
	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
237
	SHA512   // SHA512 instructions
238
	SM3      // SM3 instructions
239
	SM4      // SM4 instructions
240
	SVE      // Scalable Vector Extension
241
	// Keep it last. It automatically defines the size of []flagSet
242
	lastID
243

244
	firstID FeatureID = UNKNOWN + 1
245
)
246

247
// CPUInfo contains information about the detected system CPU.
248
type CPUInfo struct {
249
	BrandName      string  // Brand name reported by the CPU
250
	VendorID       Vendor  // Comparable CPU vendor ID
251
	VendorString   string  // Raw vendor string.
252
	featureSet     flagSet // Features of the CPU
253
	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
254
	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
255
	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
256
	Family         int     // CPU family number
257
	Model          int     // CPU model number
258
	Stepping       int     // CPU stepping info
259
	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
260
	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
261
	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
262
	Cache          struct {
263
		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
264
		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
265
		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
266
		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
267
	}
268
	SGX       SGXSupport
269
	maxFunc   uint32
270
	maxExFunc uint32
271
}
272

273
var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
274
var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
275
var xgetbv func(index uint32) (eax, edx uint32)
276
var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
277
var darwinHasAVX512 = func() bool { return false }
278

279
// CPU contains information about the CPU as detected on startup,
280
// or when Detect last was called.
281
//
282
// Use this as the primary entry point to you data.
283
var CPU CPUInfo
284

285
func init() {
286
	initCPU()
287
	Detect()
288
}
289

290
// Detect will re-detect current CPU info.
291
// This will replace the content of the exported CPU variable.
292
//
293
// Unless you expect the CPU to change while you are running your program
294
// you should not need to call this function.
295
// If you call this, you must ensure that no other goroutine is accessing the
296
// exported CPU variable.
297
func Detect() {
298
	// Set defaults
299
	CPU.ThreadsPerCore = 1
300
	CPU.Cache.L1I = -1
301
	CPU.Cache.L1D = -1
302
	CPU.Cache.L2 = -1
303
	CPU.Cache.L3 = -1
304
	safe := true
305
	if detectArmFlag != nil {
306
		safe = !*detectArmFlag
307
	}
308
	addInfo(&CPU, safe)
309
	if displayFeats != nil && *displayFeats {
310
		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
311
		// Exit with non-zero so tests will print value.
312
		os.Exit(1)
313
	}
314
	if disableFlag != nil {
315
		s := strings.Split(*disableFlag, ",")
316
		for _, feat := range s {
317
			feat := ParseFeature(strings.TrimSpace(feat))
318
			if feat != UNKNOWN {
319
				CPU.featureSet.unset(feat)
320
			}
321
		}
322
	}
323
}
324

325
// DetectARM will detect ARM64 features.
326
// This is NOT done automatically since it can potentially crash
327
// if the OS does not handle the command.
328
// If in the future this can be done safely this function may not
329
// do anything.
330
func DetectARM() {
331
	addInfo(&CPU, false)
332
}
333

334
var detectArmFlag *bool
335
var displayFeats *bool
336
var disableFlag *string
337

338
// Flags will enable flags.
339
// This must be called *before* flag.Parse AND
340
// Detect must be called after the flags have been parsed.
341
// Note that this means that any detection used in init() functions
342
// will not contain these flags.
343
func Flags() {
344
	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
345
	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
346
	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
347
}
348

349
// Supports returns whether the CPU supports all of the requested features.
350
func (c CPUInfo) Supports(ids ...FeatureID) bool {
351
	for _, id := range ids {
352
		if !c.featureSet.inSet(id) {
353
			return false
354
		}
355
	}
356
	return true
357
}
358

359
// Has allows for checking a single feature.
360
// Should be inlined by the compiler.
361
func (c CPUInfo) Has(id FeatureID) bool {
362
	return c.featureSet.inSet(id)
363
}
364

365
// AnyOf returns whether the CPU supports one or more of the requested features.
366
func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
367
	for _, id := range ids {
368
		if c.featureSet.inSet(id) {
369
			return true
370
		}
371
	}
372
	return false
373
}
374

375
// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
376
var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
377
var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
378
var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
379
var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
380

381
// X64Level returns the microarchitecture level detected on the CPU.
382
// If features are lacking or non x64 mode, 0 is returned.
383
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
384
func (c CPUInfo) X64Level() int {
385
	if c.featureSet.hasSet(level4Features) {
386
		return 4
387
	}
388
	if c.featureSet.hasSet(level3Features) {
389
		return 3
390
	}
391
	if c.featureSet.hasSet(level2Features) {
392
		return 2
393
	}
394
	if c.featureSet.hasSet(level1Features) {
395
		return 1
396
	}
397
	return 0
398
}
399

400
// Disable will disable one or several features.
401
func (c *CPUInfo) Disable(ids ...FeatureID) bool {
402
	for _, id := range ids {
403
		c.featureSet.unset(id)
404
	}
405
	return true
406
}
407

408
// Enable will disable one or several features even if they were undetected.
409
// This is of course not recommended for obvious reasons.
410
func (c *CPUInfo) Enable(ids ...FeatureID) bool {
411
	for _, id := range ids {
412
		c.featureSet.set(id)
413
	}
414
	return true
415
}
416

417
// IsVendor returns true if vendor is recognized as Intel
418
func (c CPUInfo) IsVendor(v Vendor) bool {
419
	return c.VendorID == v
420
}
421

422
// FeatureSet returns all available features as strings.
423
func (c CPUInfo) FeatureSet() []string {
424
	s := make([]string, 0, c.featureSet.nEnabled())
425
	s = append(s, c.featureSet.Strings()...)
426
	return s
427
}
428

429
// RTCounter returns the 64-bit time-stamp counter
430
// Uses the RDTSCP instruction. The value 0 is returned
431
// if the CPU does not support the instruction.
432
func (c CPUInfo) RTCounter() uint64 {
433
	if !c.Supports(RDTSCP) {
434
		return 0
435
	}
436
	a, _, _, d := rdtscpAsm()
437
	return uint64(a) | (uint64(d) << 32)
438
}
439

440
// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
441
// This variable is OS dependent, but on Linux contains information
442
// about the current cpu/core the code is running on.
443
// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
444
func (c CPUInfo) Ia32TscAux() uint32 {
445
	if !c.Supports(RDTSCP) {
446
		return 0
447
	}
448
	_, _, ecx, _ := rdtscpAsm()
449
	return ecx
450
}
451

452
// LogicalCPU will return the Logical CPU the code is currently executing on.
453
// This is likely to change when the OS re-schedules the running thread
454
// to another CPU.
455
// If the current core cannot be detected, -1 will be returned.
456
func (c CPUInfo) LogicalCPU() int {
457
	if c.maxFunc < 1 {
458
		return -1
459
	}
460
	_, ebx, _, _ := cpuid(1)
461
	return int(ebx >> 24)
462
}
463

464
// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
465
// supported, use it, otherwise parse the brand string. Yes, really.
466
func (c *CPUInfo) frequencies() {
467
	c.Hz, c.BoostFreq = 0, 0
468
	mfi := maxFunctionID()
469
	if mfi >= 0x15 {
470
		eax, ebx, ecx, _ := cpuid(0x15)
471
		if eax != 0 && ebx != 0 && ecx != 0 {
472
			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
473
		}
474
	}
475
	if mfi >= 0x16 {
476
		a, b, _, _ := cpuid(0x16)
477
		// Base...
478
		if a&0xffff > 0 {
479
			c.Hz = int64(a&0xffff) * 1_000_000
480
		}
481
		// Boost...
482
		if b&0xffff > 0 {
483
			c.BoostFreq = int64(b&0xffff) * 1_000_000
484
		}
485
	}
486
	if c.Hz > 0 {
487
		return
488
	}
489

490
	// computeHz determines the official rated speed of a CPU from its brand
491
	// string. This insanity is *actually the official documented way to do
492
	// this according to Intel*, prior to leaf 0x15 existing. The official
493
	// documentation only shows this working for exactly `x.xx` or `xxxx`
494
	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
495
	// sizes.
496
	model := c.BrandName
497
	hz := strings.LastIndex(model, "Hz")
498
	if hz < 3 {
499
		return
500
	}
501
	var multiplier int64
502
	switch model[hz-1] {
503
	case 'M':
504
		multiplier = 1000 * 1000
505
	case 'G':
506
		multiplier = 1000 * 1000 * 1000
507
	case 'T':
508
		multiplier = 1000 * 1000 * 1000 * 1000
509
	}
510
	if multiplier == 0 {
511
		return
512
	}
513
	freq := int64(0)
514
	divisor := int64(0)
515
	decimalShift := int64(1)
516
	var i int
517
	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
518
		if model[i] >= '0' && model[i] <= '9' {
519
			freq += int64(model[i]-'0') * decimalShift
520
			decimalShift *= 10
521
		} else if model[i] == '.' {
522
			if divisor != 0 {
523
				return
524
			}
525
			divisor = decimalShift
526
		} else {
527
			return
528
		}
529
	}
530
	// we didn't find a space
531
	if i < 0 {
532
		return
533
	}
534
	if divisor != 0 {
535
		c.Hz = (freq * multiplier) / divisor
536
		return
537
	}
538
	c.Hz = freq * multiplier
539
}
540

541
// VM Will return true if the cpu id indicates we are in
542
// a virtual machine.
543
func (c CPUInfo) VM() bool {
544
	return CPU.featureSet.inSet(HYPERVISOR)
545
}
546

547
// flags contains detected cpu features and characteristics
548
type flags uint64
549

550
// log2(bits_in_uint64)
551
const flagBitsLog2 = 6
552
const flagBits = 1 << flagBitsLog2
553
const flagMask = flagBits - 1
554

555
// flagSet contains detected cpu features and characteristics in an array of flags
556
type flagSet [(lastID + flagMask) / flagBits]flags
557

558
func (s flagSet) inSet(feat FeatureID) bool {
559
	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
560
}
561

562
func (s *flagSet) set(feat FeatureID) {
563
	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
564
}
565

566
// setIf will set a feature if boolean is true.
567
func (s *flagSet) setIf(cond bool, features ...FeatureID) {
568
	if cond {
569
		for _, offset := range features {
570
			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
571
		}
572
	}
573
}
574

575
func (s *flagSet) unset(offset FeatureID) {
576
	bit := flags(1 << (offset & flagMask))
577
	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
578
}
579

580
// or with another flagset.
581
func (s *flagSet) or(other flagSet) {
582
	for i, v := range other[:] {
583
		s[i] |= v
584
	}
585
}
586

587
// hasSet returns whether all features are present.
588
func (s flagSet) hasSet(other flagSet) bool {
589
	for i, v := range other[:] {
590
		if s[i]&v != v {
591
			return false
592
		}
593
	}
594
	return true
595
}
596

597
// nEnabled will return the number of enabled flags.
598
func (s flagSet) nEnabled() (n int) {
599
	for _, v := range s[:] {
600
		n += bits.OnesCount64(uint64(v))
601
	}
602
	return n
603
}
604

605
func flagSetWith(feat ...FeatureID) flagSet {
606
	var res flagSet
607
	for _, f := range feat {
608
		res.set(f)
609
	}
610
	return res
611
}
612

613
// ParseFeature will parse the string and return the ID of the matching feature.
614
// Will return UNKNOWN if not found.
615
func ParseFeature(s string) FeatureID {
616
	s = strings.ToUpper(s)
617
	for i := firstID; i < lastID; i++ {
618
		if i.String() == s {
619
			return i
620
		}
621
	}
622
	return UNKNOWN
623
}
624

625
// Strings returns an array of the detected features for FlagsSet.
626
func (s flagSet) Strings() []string {
627
	if len(s) == 0 {
628
		return []string{""}
629
	}
630
	r := make([]string, 0)
631
	for i := firstID; i < lastID; i++ {
632
		if s.inSet(i) {
633
			r = append(r, i.String())
634
		}
635
	}
636
	return r
637
}
638

639
func maxExtendedFunction() uint32 {
640
	eax, _, _, _ := cpuid(0x80000000)
641
	return eax
642
}
643

644
func maxFunctionID() uint32 {
645
	a, _, _, _ := cpuid(0)
646
	return a
647
}
648

649
func brandName() string {
650
	if maxExtendedFunction() >= 0x80000004 {
651
		v := make([]uint32, 0, 48)
652
		for i := uint32(0); i < 3; i++ {
653
			a, b, c, d := cpuid(0x80000002 + i)
654
			v = append(v, a, b, c, d)
655
		}
656
		return strings.Trim(string(valAsString(v...)), " ")
657
	}
658
	return "unknown"
659
}
660

661
func threadsPerCore() int {
662
	mfi := maxFunctionID()
663
	vend, _ := vendorID()
664

665
	if mfi < 0x4 || (vend != Intel && vend != AMD) {
666
		return 1
667
	}
668

669
	if mfi < 0xb {
670
		if vend != Intel {
671
			return 1
672
		}
673
		_, b, _, d := cpuid(1)
674
		if (d & (1 << 28)) != 0 {
675
			// v will contain logical core count
676
			v := (b >> 16) & 255
677
			if v > 1 {
678
				a4, _, _, _ := cpuid(4)
679
				// physical cores
680
				v2 := (a4 >> 26) + 1
681
				if v2 > 0 {
682
					return int(v) / int(v2)
683
				}
684
			}
685
		}
686
		return 1
687
	}
688
	_, b, _, _ := cpuidex(0xb, 0)
689
	if b&0xffff == 0 {
690
		if vend == AMD {
691
			// Workaround for AMD returning 0, assume 2 if >= Zen 2
692
			// It will be more correct than not.
693
			fam, _, _ := familyModel()
694
			_, _, _, d := cpuid(1)
695
			if (d&(1<<28)) != 0 && fam >= 23 {
696
				return 2
697
			}
698
		}
699
		return 1
700
	}
701
	return int(b & 0xffff)
702
}
703

704
func logicalCores() int {
705
	mfi := maxFunctionID()
706
	v, _ := vendorID()
707
	switch v {
708
	case Intel:
709
		// Use this on old Intel processors
710
		if mfi < 0xb {
711
			if mfi < 1 {
712
				return 0
713
			}
714
			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
715
			// that can be assigned to logical processors in a physical package.
716
			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
717
			_, ebx, _, _ := cpuid(1)
718
			logical := (ebx >> 16) & 0xff
719
			return int(logical)
720
		}
721
		_, b, _, _ := cpuidex(0xb, 1)
722
		return int(b & 0xffff)
723
	case AMD, Hygon:
724
		_, b, _, _ := cpuid(1)
725
		return int((b >> 16) & 0xff)
726
	default:
727
		return 0
728
	}
729
}
730

731
func familyModel() (family, model, stepping int) {
732
	if maxFunctionID() < 0x1 {
733
		return 0, 0, 0
734
	}
735
	eax, _, _, _ := cpuid(1)
736
	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
737
	family = int((eax >> 8) & 0xf)
738
	extFam := family == 0x6 // Intel is 0x6, needs extended model.
739
	if family == 0xf {
740
		// Add ExtFamily
741
		family += int((eax >> 20) & 0xff)
742
		extFam = true
743
	}
744
	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
745
	model = int((eax >> 4) & 0xf)
746
	if extFam {
747
		// Add ExtModel
748
		model += int((eax >> 12) & 0xf0)
749
	}
750
	stepping = int(eax & 0xf)
751
	return family, model, stepping
752
}
753

754
func physicalCores() int {
755
	v, _ := vendorID()
756
	switch v {
757
	case Intel:
758
		return logicalCores() / threadsPerCore()
759
	case AMD, Hygon:
760
		lc := logicalCores()
761
		tpc := threadsPerCore()
762
		if lc > 0 && tpc > 0 {
763
			return lc / tpc
764
		}
765

766
		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
767
		if maxExtendedFunction() >= 0x80000008 {
768
			_, _, c, _ := cpuid(0x80000008)
769
			if c&0xff > 0 {
770
				return int(c&0xff) + 1
771
			}
772
		}
773
	}
774
	return 0
775
}
776

777
// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
778
var vendorMapping = map[string]Vendor{
779
	"AMDisbetter!": AMD,
780
	"AuthenticAMD": AMD,
781
	"CentaurHauls": VIA,
782
	"GenuineIntel": Intel,
783
	"TransmetaCPU": Transmeta,
784
	"GenuineTMx86": Transmeta,
785
	"Geode by NSC": NSC,
786
	"VIA VIA VIA ": VIA,
787
	"KVMKVMKVMKVM": KVM,
788
	"Microsoft Hv": MSVM,
789
	"VMwareVMware": VMware,
790
	"XenVMMXenVMM": XenHVM,
791
	"bhyve bhyve ": Bhyve,
792
	"HygonGenuine": Hygon,
793
	"Vortex86 SoC": SiS,
794
	"SiS SiS SiS ": SiS,
795
	"RiseRiseRise": SiS,
796
	"Genuine  RDC": RDC,
797
}
798

799
func vendorID() (Vendor, string) {
800
	_, b, c, d := cpuid(0)
801
	v := string(valAsString(b, d, c))
802
	vend, ok := vendorMapping[v]
803
	if !ok {
804
		return VendorUnknown, v
805
	}
806
	return vend, v
807
}
808

809
func cacheLine() int {
810
	if maxFunctionID() < 0x1 {
811
		return 0
812
	}
813

814
	_, ebx, _, _ := cpuid(1)
815
	cache := (ebx & 0xff00) >> 5 // cflush size
816
	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
817
		_, _, ecx, _ := cpuid(0x80000006)
818
		cache = ecx & 0xff // cacheline size
819
	}
820
	// TODO: Read from Cache and TLB Information
821
	return int(cache)
822
}
823

824
func (c *CPUInfo) cacheSize() {
825
	c.Cache.L1D = -1
826
	c.Cache.L1I = -1
827
	c.Cache.L2 = -1
828
	c.Cache.L3 = -1
829
	vendor, _ := vendorID()
830
	switch vendor {
831
	case Intel:
832
		if maxFunctionID() < 4 {
833
			return
834
		}
835
		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
836
		for i := uint32(0); ; i++ {
837
			eax, ebx, ecx, _ := cpuidex(4, i)
838
			cacheType := eax & 15
839
			if cacheType == 0 {
840
				break
841
			}
842
			cacheLevel := (eax >> 5) & 7
843
			coherency := int(ebx&0xfff) + 1
844
			partitions := int((ebx>>12)&0x3ff) + 1
845
			associativity := int((ebx>>22)&0x3ff) + 1
846
			sets := int(ecx) + 1
847
			size := associativity * partitions * coherency * sets
848
			switch cacheLevel {
849
			case 1:
850
				if cacheType == 1 {
851
					// 1 = Data Cache
852
					c.Cache.L1D = size
853
				} else if cacheType == 2 {
854
					// 2 = Instruction Cache
855
					c.Cache.L1I = size
856
				} else {
857
					if c.Cache.L1D < 0 {
858
						c.Cache.L1I = size
859
					}
860
					if c.Cache.L1I < 0 {
861
						c.Cache.L1I = size
862
					}
863
				}
864
			case 2:
865
				c.Cache.L2 = size
866
			case 3:
867
				c.Cache.L3 = size
868
			}
869
		}
870
	case AMD, Hygon:
871
		// Untested.
872
		if maxExtendedFunction() < 0x80000005 {
873
			return
874
		}
875
		_, _, ecx, edx := cpuid(0x80000005)
876
		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
877
		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
878

879
		if maxExtendedFunction() < 0x80000006 {
880
			return
881
		}
882
		_, _, ecx, _ = cpuid(0x80000006)
883
		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
884

885
		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
886
		if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
887
			return
888
		}
889

890
		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
891
		// Hack: When we encounter the same entry 100 times we break.
892
		nSame := 0
893
		var last uint32
894
		for i := uint32(0); i < math.MaxUint32; i++ {
895
			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
896

897
			level := (eax >> 5) & 7
898
			cacheNumSets := ecx + 1
899
			cacheLineSize := 1 + (ebx & 2047)
900
			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
901
			cacheNumWays := 1 + ((ebx >> 22) & 511)
902

903
			typ := eax & 15
904
			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
905
			if typ == 0 {
906
				return
907
			}
908

909
			// Check for the same value repeated.
910
			comb := eax ^ ebx ^ ecx
911
			if comb == last {
912
				nSame++
913
				if nSame == 100 {
914
					return
915
				}
916
			}
917
			last = comb
918

919
			switch level {
920
			case 1:
921
				switch typ {
922
				case 1:
923
					// Data cache
924
					c.Cache.L1D = size
925
				case 2:
926
					// Inst cache
927
					c.Cache.L1I = size
928
				default:
929
					if c.Cache.L1D < 0 {
930
						c.Cache.L1I = size
931
					}
932
					if c.Cache.L1I < 0 {
933
						c.Cache.L1I = size
934
					}
935
				}
936
			case 2:
937
				c.Cache.L2 = size
938
			case 3:
939
				c.Cache.L3 = size
940
			}
941
		}
942
	}
943
}
944

945
type SGXEPCSection struct {
946
	BaseAddress uint64
947
	EPCSize     uint64
948
}
949

950
type SGXSupport struct {
951
	Available           bool
952
	LaunchControl       bool
953
	SGX1Supported       bool
954
	SGX2Supported       bool
955
	MaxEnclaveSizeNot64 int64
956
	MaxEnclaveSize64    int64
957
	EPCSections         []SGXEPCSection
958
}
959

960
func hasSGX(available, lc bool) (rval SGXSupport) {
961
	rval.Available = available
962

963
	if !available {
964
		return
965
	}
966

967
	rval.LaunchControl = lc
968

969
	a, _, _, d := cpuidex(0x12, 0)
970
	rval.SGX1Supported = a&0x01 != 0
971
	rval.SGX2Supported = a&0x02 != 0
972
	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
973
	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
974
	rval.EPCSections = make([]SGXEPCSection, 0)
975

976
	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
977
		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
978
		leafType := eax & 0xf
979

980
		if leafType == 0 {
981
			// Invalid subleaf, stop iterating
982
			break
983
		} else if leafType == 1 {
984
			// EPC Section subleaf
985
			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
986
			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
987

988
			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
989
			rval.EPCSections = append(rval.EPCSections, section)
990
		}
991
	}
992

993
	return
994
}
995

996
func support() flagSet {
997
	var fs flagSet
998
	mfi := maxFunctionID()
999
	vend, _ := vendorID()
1000
	if mfi < 0x1 {
1001
		return fs
1002
	}
1003
	family, model, _ := familyModel()
1004

1005
	_, _, c, d := cpuid(1)
1006
	fs.setIf((d&(1<<0)) != 0, X87)
1007
	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
1008
	fs.setIf((d&(1<<11)) != 0, SYSEE)
1009
	fs.setIf((d&(1<<15)) != 0, CMOV)
1010
	fs.setIf((d&(1<<23)) != 0, MMX)
1011
	fs.setIf((d&(1<<24)) != 0, FXSR)
1012
	fs.setIf((d&(1<<25)) != 0, FXSROPT)
1013
	fs.setIf((d&(1<<25)) != 0, SSE)
1014
	fs.setIf((d&(1<<26)) != 0, SSE2)
1015
	fs.setIf((c&1) != 0, SSE3)
1016
	fs.setIf((c&(1<<5)) != 0, VMX)
1017
	fs.setIf((c&(1<<9)) != 0, SSSE3)
1018
	fs.setIf((c&(1<<19)) != 0, SSE4)
1019
	fs.setIf((c&(1<<20)) != 0, SSE42)
1020
	fs.setIf((c&(1<<25)) != 0, AESNI)
1021
	fs.setIf((c&(1<<1)) != 0, CLMUL)
1022
	fs.setIf(c&(1<<22) != 0, MOVBE)
1023
	fs.setIf(c&(1<<23) != 0, POPCNT)
1024
	fs.setIf(c&(1<<30) != 0, RDRAND)
1025

1026
	// This bit has been reserved by Intel & AMD for use by hypervisors,
1027
	// and indicates the presence of a hypervisor.
1028
	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
1029
	fs.setIf(c&(1<<29) != 0, F16C)
1030
	fs.setIf(c&(1<<13) != 0, CX16)
1031

1032
	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
1033
		fs.setIf(threadsPerCore() > 1, HTT)
1034
	}
1035
	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
1036
		fs.setIf(threadsPerCore() > 1, HTT)
1037
	}
1038
	fs.setIf(c&1<<26 != 0, XSAVE)
1039
	fs.setIf(c&1<<27 != 0, OSXSAVE)
1040
	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
1041
	const avxCheck = 1<<26 | 1<<27 | 1<<28
1042
	if c&avxCheck == avxCheck {
1043
		// Check for OS support
1044
		eax, _ := xgetbv(0)
1045
		if (eax & 0x6) == 0x6 {
1046
			fs.set(AVX)
1047
			switch vend {
1048
			case Intel:
1049
				// Older than Haswell.
1050
				fs.setIf(family == 6 && model < 60, AVXSLOW)
1051
			case AMD:
1052
				// Older than Zen 2
1053
				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
1054
			}
1055
		}
1056
	}
1057
	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
1058
	// fma3 and OSXSAVE needed.
1059
	const fma3Check = 1<<12 | 1<<27
1060
	fs.setIf(c&fma3Check == fma3Check, FMA3)
1061

1062
	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
1063
	if mfi >= 7 {
1064
		_, ebx, ecx, edx := cpuidex(7, 0)
1065
		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
1066
			fs.set(AVX2)
1067
		}
1068
		// CPUID.(EAX=7, ECX=0).EBX
1069
		if (ebx & 0x00000008) != 0 {
1070
			fs.set(BMI1)
1071
			fs.setIf((ebx&0x00000100) != 0, BMI2)
1072
		}
1073
		fs.setIf(ebx&(1<<2) != 0, SGX)
1074
		fs.setIf(ebx&(1<<4) != 0, HLE)
1075
		fs.setIf(ebx&(1<<9) != 0, ERMS)
1076
		fs.setIf(ebx&(1<<11) != 0, RTM)
1077
		fs.setIf(ebx&(1<<14) != 0, MPX)
1078
		fs.setIf(ebx&(1<<18) != 0, RDSEED)
1079
		fs.setIf(ebx&(1<<19) != 0, ADX)
1080
		fs.setIf(ebx&(1<<29) != 0, SHA)
1081

1082
		// CPUID.(EAX=7, ECX=0).ECX
1083
		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
1084
		fs.setIf(ecx&(1<<7) != 0, CETSS)
1085
		fs.setIf(ecx&(1<<8) != 0, GFNI)
1086
		fs.setIf(ecx&(1<<9) != 0, VAES)
1087
		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
1088
		fs.setIf(ecx&(1<<13) != 0, TME)
1089
		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
1090
		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
1091
		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
1092
		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
1093
		fs.setIf(ecx&(1<<30) != 0, SGXLC)
1094

1095
		// CPUID.(EAX=7, ECX=0).EDX
1096
		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
1097
		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
1098
		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
1099
		fs.setIf(edx&(1<<18) != 0, PCONFIG)
1100
		fs.setIf(edx&(1<<20) != 0, CETIBT)
1101
		fs.setIf(edx&(1<<26) != 0, IBPB)
1102
		fs.setIf(edx&(1<<27) != 0, STIBP)
1103

1104
		// CPUID.(EAX=7, ECX=1)
1105
		eax1, _, _, _ := cpuidex(7, 1)
1106
		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
1107
		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
1108
		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
1109
		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
1110
		fs.setIf(eax1&(1<<22) != 0, HRESET)
1111
		fs.setIf(eax1&(1<<26) != 0, LAM)
1112

1113
		// Only detect AVX-512 features if XGETBV is supported
1114
		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
1115
			// Check for OS support
1116
			eax, _ := xgetbv(0)
1117

1118
			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
1119
			// ZMM16-ZMM31 state are enabled by OS)
1120
			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
1121
			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
1122
			if runtime.GOOS == "darwin" {
1123
				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
1124
			}
1125
			if hasAVX512 {
1126
				fs.setIf(ebx&(1<<16) != 0, AVX512F)
1127
				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
1128
				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
1129
				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
1130
				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
1131
				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
1132
				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
1133
				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
1134
				// ecx
1135
				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
1136
				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
1137
				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
1138
				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
1139
				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
1140
				// edx
1141
				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
1142
				fs.setIf(edx&(1<<22) != 0, AMXBF16)
1143
				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
1144
				fs.setIf(edx&(1<<24) != 0, AMXTILE)
1145
				fs.setIf(edx&(1<<25) != 0, AMXINT8)
1146
				// eax1 = CPUID.(EAX=7, ECX=1).EAX
1147
				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
1148
			}
1149
		}
1150
	}
1151
	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
1152
	// EAX
1153
	// Bit 00: XSAVEOPT is available.
1154
	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
1155
	// Bit 02: Supports XGETBV with ECX = 1 if set.
1156
	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
1157
	// Bits 31 - 04: Reserved.
1158
	// EBX
1159
	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
1160
	// ECX
1161
	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
1162
	// EDX?
1163
	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
1164
	if mfi >= 0xd {
1165
		if fs.inSet(XSAVE) {
1166
			eax, _, _, _ := cpuidex(0xd, 1)
1167
			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
1168
			fs.setIf(eax&(1<<1) != 0, XSAVEC)
1169
			fs.setIf(eax&(1<<2) != 0, XGETBV1)
1170
			fs.setIf(eax&(1<<3) != 0, XSAVES)
1171
		}
1172
	}
1173
	if maxExtendedFunction() >= 0x80000001 {
1174
		_, _, c, d := cpuid(0x80000001)
1175
		if (c & (1 << 5)) != 0 {
1176
			fs.set(LZCNT)
1177
			fs.set(POPCNT)
1178
		}
1179
		// ECX
1180
		fs.setIf((c&(1<<0)) != 0, LAHF)
1181
		fs.setIf((c&(1<<2)) != 0, SVM)
1182
		fs.setIf((c&(1<<6)) != 0, SSE4A)
1183
		fs.setIf((c&(1<<10)) != 0, IBS)
1184
		fs.setIf((c&(1<<22)) != 0, TOPEXT)
1185

1186
		// EDX
1187
		fs.setIf(d&(1<<11) != 0, SYSCALL)
1188
		fs.setIf(d&(1<<20) != 0, NX)
1189
		fs.setIf(d&(1<<22) != 0, MMXEXT)
1190
		fs.setIf(d&(1<<23) != 0, MMX)
1191
		fs.setIf(d&(1<<24) != 0, FXSR)
1192
		fs.setIf(d&(1<<25) != 0, FXSROPT)
1193
		fs.setIf(d&(1<<27) != 0, RDTSCP)
1194
		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
1195
		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
1196

1197
		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1198
		 * used unless the OS has AVX support. */
1199
		if fs.inSet(AVX) {
1200
			fs.setIf((c&(1<<11)) != 0, XOP)
1201
			fs.setIf((c&(1<<16)) != 0, FMA4)
1202
		}
1203

1204
	}
1205
	if maxExtendedFunction() >= 0x80000007 {
1206
		_, b, _, d := cpuid(0x80000007)
1207
		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
1208
		fs.setIf((b&(1<<1)) != 0, SUCCOR)
1209
		fs.setIf((b&(1<<2)) != 0, HWA)
1210
		fs.setIf((d&(1<<9)) != 0, CPBOOST)
1211
	}
1212

1213
	if maxExtendedFunction() >= 0x80000008 {
1214
		_, b, _, _ := cpuid(0x80000008)
1215
		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
1216
		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
1217
		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
1218
		fs.setIf((b&(1<<4)) != 0, RDPRU)
1219
		fs.setIf((b&(1<<3)) != 0, INVLPGB)
1220
		fs.setIf((b&(1<<1)) != 0, MSRIRC)
1221
		fs.setIf((b&(1<<0)) != 0, CLZERO)
1222
	}
1223

1224
	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
1225
		_, _, _, edx := cpuid(0x8000000A)
1226
		fs.setIf((edx>>0)&1 == 1, SVMNP)
1227
		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
1228
		fs.setIf((edx>>2)&1 == 1, SVML)
1229
		fs.setIf((edx>>3)&1 == 1, NRIPS)
1230
		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
1231
		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
1232
		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
1233
		fs.setIf((edx>>7)&1 == 1, SVMDA)
1234
		fs.setIf((edx>>10)&1 == 1, SVMPF)
1235
		fs.setIf((edx>>12)&1 == 1, SVMPFT)
1236
	}
1237

1238
	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
1239
		eax, _, _, _ := cpuid(0x8000001b)
1240
		fs.setIf((eax>>0)&1 == 1, IBSFFV)
1241
		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
1242
		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
1243
		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
1244
		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
1245
		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
1246
		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
1247
		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
1248
	}
1249

1250
	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
1251
		a, _, _, _ := cpuid(0x8000001f)
1252
		fs.setIf((a>>0)&1 == 1, SME)
1253
		fs.setIf((a>>1)&1 == 1, SEV)
1254
		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
1255
		fs.setIf((a>>3)&1 == 1, SEV_ES)
1256
		fs.setIf((a>>4)&1 == 1, SEV_SNP)
1257
		fs.setIf((a>>5)&1 == 1, VMPL)
1258
		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
1259
		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
1260
		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
1261
		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
1262
		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
1263
		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
1264
		fs.setIf((a>>16)&1 == 1, VTE)
1265
		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
1266
	}
1267

1268
	return fs
1269
}
1270

1271
func valAsString(values ...uint32) []byte {
1272
	r := make([]byte, 4*len(values))
1273
	for i, v := range values {
1274
		dst := r[i*4:]
1275
		dst[0] = byte(v & 0xff)
1276
		dst[1] = byte((v >> 8) & 0xff)
1277
		dst[2] = byte((v >> 16) & 0xff)
1278
		dst[3] = byte((v >> 24) & 0xff)
1279
		switch {
1280
		case dst[0] == 0:
1281
			return r[:i*4]
1282
		case dst[1] == 0:
1283
			return r[:i*4+1]
1284
		case dst[2] == 0:
1285
			return r[:i*4+2]
1286
		case dst[3] == 0:
1287
			return r[:i*4+3]
1288
		}
1289
	}
1290
	return r
1291
}
1292
cubefs

Использование cookies