jdk

Форк
0
/
vm_version_x86.cpp 
3333 строки · 107.7 Кб
1
/*
2
 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 *
23
 */
24

25
#include "precompiled.hpp"
26
#include "asm/macroAssembler.hpp"
27
#include "asm/macroAssembler.inline.hpp"
28
#include "classfile/vmIntrinsics.hpp"
29
#include "code/codeBlob.hpp"
30
#include "compiler/compilerDefinitions.inline.hpp"
31
#include "jvm.h"
32
#include "logging/log.hpp"
33
#include "logging/logStream.hpp"
34
#include "memory/resourceArea.hpp"
35
#include "memory/universe.hpp"
36
#include "runtime/globals_extension.hpp"
37
#include "runtime/java.hpp"
38
#include "runtime/os.inline.hpp"
39
#include "runtime/stubCodeGenerator.hpp"
40
#include "runtime/vm_version.hpp"
41
#include "utilities/checkedCast.hpp"
42
#include "utilities/powerOfTwo.hpp"
43
#include "utilities/virtualizationSupport.hpp"
44

45
int VM_Version::_cpu;
46
int VM_Version::_model;
47
int VM_Version::_stepping;
48
bool VM_Version::_has_intel_jcc_erratum;
49
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
50

51
#define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
52
const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
53
#undef DECLARE_CPU_FEATURE_FLAG
54

55
// Address of instruction which causes SEGV
56
address VM_Version::_cpuinfo_segv_addr = 0;
57
// Address of instruction after the one which causes SEGV
58
address VM_Version::_cpuinfo_cont_addr = 0;
59
// Address of instruction which causes APX specific SEGV
60
address VM_Version::_cpuinfo_segv_addr_apx = 0;
61
// Address of instruction after the one which causes APX specific SEGV
62
address VM_Version::_cpuinfo_cont_addr_apx = 0;
63

64
static BufferBlob* stub_blob;
65
static const int stub_size = 2000;
66

67
extern "C" {
68
  typedef void (*get_cpu_info_stub_t)(void*);
69
  typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
70
  typedef void (*clear_apx_test_state_t)(void);
71
}
72
static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
73
static detect_virt_stub_t detect_virt_stub = nullptr;
74
static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
75

76
#ifdef _LP64
77

78
bool VM_Version::supports_clflush() {
79
  // clflush should always be available on x86_64
80
  // if not we are in real trouble because we rely on it
81
  // to flush the code cache.
82
  // Unfortunately, Assembler::clflush is currently called as part
83
  // of generation of the code cache flush routine. This happens
84
  // under Universe::init before the processor features are set
85
  // up. Assembler::flush calls this routine to check that clflush
86
  // is allowed. So, we give the caller a free pass if Universe init
87
  // is still in progress.
88
  assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
89
  return true;
90
}
91
#endif
92

93
#define CPUID_STANDARD_FN   0x0
94
#define CPUID_STANDARD_FN_1 0x1
95
#define CPUID_STANDARD_FN_4 0x4
96
#define CPUID_STANDARD_FN_B 0xb
97

98
#define CPUID_EXTENDED_FN   0x80000000
99
#define CPUID_EXTENDED_FN_1 0x80000001
100
#define CPUID_EXTENDED_FN_2 0x80000002
101
#define CPUID_EXTENDED_FN_3 0x80000003
102
#define CPUID_EXTENDED_FN_4 0x80000004
103
#define CPUID_EXTENDED_FN_7 0x80000007
104
#define CPUID_EXTENDED_FN_8 0x80000008
105

106
class VM_Version_StubGenerator: public StubCodeGenerator {
107
 public:
108

109
  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
110

111
#if defined(_LP64)
112
  address clear_apx_test_state() {
113
#   define __ _masm->
114
    address start = __ pc();
115
    // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
116
    // handling guarantees that preserved register values post signal handling were
117
    // re-instantiated by operating system and not because they were not modified externally.
118

119
    bool save_apx = UseAPX;
120
    VM_Version::set_apx_cpuFeatures();
121
    UseAPX = true;
122
    // EGPR state save/restoration.
123
    __ mov64(r16, 0L);
124
    __ mov64(r31, 0L);
125
    UseAPX = save_apx;
126
    VM_Version::clean_cpuFeatures();
127
    __ ret(0);
128
    return start;
129
  }
130
#endif
131

132
  address generate_get_cpu_info() {
133
    // Flags to test CPU type.
134
    const uint32_t HS_EFL_AC = 0x40000;
135
    const uint32_t HS_EFL_ID = 0x200000;
136
    // Values for when we don't have a CPUID instruction.
137
    const int      CPU_FAMILY_SHIFT = 8;
138
    const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
139
    const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
140
    bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
141

142
    Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
143
    Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
144
    Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
145
    Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
146

147
    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
148
#   define __ _masm->
149

150
    address start = __ pc();
151

152
    //
153
    // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
154
    //
155
    // LP64: rcx and rdx are first and second argument registers on windows
156

157
    __ push(rbp);
158
#ifdef _LP64
159
    __ mov(rbp, c_rarg0); // cpuid_info address
160
#else
161
    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
162
#endif
163
    __ push(rbx);
164
    __ push(rsi);
165
    __ pushf();          // preserve rbx, and flags
166
    __ pop(rax);
167
    __ push(rax);
168
    __ mov(rcx, rax);
169
    //
170
    // if we are unable to change the AC flag, we have a 386
171
    //
172
    __ xorl(rax, HS_EFL_AC);
173
    __ push(rax);
174
    __ popf();
175
    __ pushf();
176
    __ pop(rax);
177
    __ cmpptr(rax, rcx);
178
    __ jccb(Assembler::notEqual, detect_486);
179

180
    __ movl(rax, CPU_FAMILY_386);
181
    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
182
    __ jmp(done);
183

184
    //
185
    // If we are unable to change the ID flag, we have a 486 which does
186
    // not support the "cpuid" instruction.
187
    //
188
    __ bind(detect_486);
189
    __ mov(rax, rcx);
190
    __ xorl(rax, HS_EFL_ID);
191
    __ push(rax);
192
    __ popf();
193
    __ pushf();
194
    __ pop(rax);
195
    __ cmpptr(rcx, rax);
196
    __ jccb(Assembler::notEqual, detect_586);
197

198
    __ bind(cpu486);
199
    __ movl(rax, CPU_FAMILY_486);
200
    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
201
    __ jmp(done);
202

203
    //
204
    // At this point, we have a chip which supports the "cpuid" instruction
205
    //
206
    __ bind(detect_586);
207
    __ xorl(rax, rax);
208
    __ cpuid();
209
    __ orl(rax, rax);
210
    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
211
                                        // value of at least 1, we give up and
212
                                        // assume a 486
213
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
214
    __ movl(Address(rsi, 0), rax);
215
    __ movl(Address(rsi, 4), rbx);
216
    __ movl(Address(rsi, 8), rcx);
217
    __ movl(Address(rsi,12), rdx);
218

219
    __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
220
    __ jccb(Assembler::belowEqual, std_cpuid4);
221

222
    //
223
    // cpuid(0xB) Processor Topology
224
    //
225
    __ movl(rax, 0xb);
226
    __ xorl(rcx, rcx);   // Threads level
227
    __ cpuid();
228

229
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
230
    __ movl(Address(rsi, 0), rax);
231
    __ movl(Address(rsi, 4), rbx);
232
    __ movl(Address(rsi, 8), rcx);
233
    __ movl(Address(rsi,12), rdx);
234

235
    __ movl(rax, 0xb);
236
    __ movl(rcx, 1);     // Cores level
237
    __ cpuid();
238
    __ push(rax);
239
    __ andl(rax, 0x1f);  // Determine if valid topology level
240
    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
241
    __ andl(rax, 0xffff);
242
    __ pop(rax);
243
    __ jccb(Assembler::equal, std_cpuid4);
244

245
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
246
    __ movl(Address(rsi, 0), rax);
247
    __ movl(Address(rsi, 4), rbx);
248
    __ movl(Address(rsi, 8), rcx);
249
    __ movl(Address(rsi,12), rdx);
250

251
    __ movl(rax, 0xb);
252
    __ movl(rcx, 2);     // Packages level
253
    __ cpuid();
254
    __ push(rax);
255
    __ andl(rax, 0x1f);  // Determine if valid topology level
256
    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
257
    __ andl(rax, 0xffff);
258
    __ pop(rax);
259
    __ jccb(Assembler::equal, std_cpuid4);
260

261
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
262
    __ movl(Address(rsi, 0), rax);
263
    __ movl(Address(rsi, 4), rbx);
264
    __ movl(Address(rsi, 8), rcx);
265
    __ movl(Address(rsi,12), rdx);
266

267
    //
268
    // cpuid(0x4) Deterministic cache params
269
    //
270
    __ bind(std_cpuid4);
271
    __ movl(rax, 4);
272
    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
273
    __ jccb(Assembler::greater, std_cpuid1);
274

275
    __ xorl(rcx, rcx);   // L1 cache
276
    __ cpuid();
277
    __ push(rax);
278
    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
279
    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
280
    __ pop(rax);
281
    __ jccb(Assembler::equal, std_cpuid1);
282

283
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
284
    __ movl(Address(rsi, 0), rax);
285
    __ movl(Address(rsi, 4), rbx);
286
    __ movl(Address(rsi, 8), rcx);
287
    __ movl(Address(rsi,12), rdx);
288

289
    //
290
    // Standard cpuid(0x1)
291
    //
292
    __ bind(std_cpuid1);
293
    __ movl(rax, 1);
294
    __ cpuid();
295
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
296
    __ movl(Address(rsi, 0), rax);
297
    __ movl(Address(rsi, 4), rbx);
298
    __ movl(Address(rsi, 8), rcx);
299
    __ movl(Address(rsi,12), rdx);
300

301
    //
302
    // Check if OS has enabled XGETBV instruction to access XCR0
303
    // (OSXSAVE feature flag) and CPU supports AVX
304
    //
305
    __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
306
    __ cmpl(rcx, 0x18000000);
307
    __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
308

309
    //
310
    // XCR0, XFEATURE_ENABLED_MASK register
311
    //
312
    __ xorl(rcx, rcx);   // zero for XCR0 register
313
    __ xgetbv();
314
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
315
    __ movl(Address(rsi, 0), rax);
316
    __ movl(Address(rsi, 4), rdx);
317

318
    //
319
    // cpuid(0x7) Structured Extended Features Enumeration Leaf.
320
    //
321
    __ bind(sef_cpuid);
322
    __ movl(rax, 7);
323
    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
324
    __ jccb(Assembler::greater, ext_cpuid);
325
    // ECX = 0
326
    __ xorl(rcx, rcx);
327
    __ cpuid();
328
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
329
    __ movl(Address(rsi, 0), rax);
330
    __ movl(Address(rsi, 4), rbx);
331
    __ movl(Address(rsi, 8), rcx);
332
    __ movl(Address(rsi, 12), rdx);
333

334
    //
335
    // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
336
    //
337
    __ bind(sefsl1_cpuid);
338
    __ movl(rax, 7);
339
    __ movl(rcx, 1);
340
    __ cpuid();
341
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
342
    __ movl(Address(rsi, 0), rax);
343
    __ movl(Address(rsi, 4), rdx);
344

345
    //
346
    // Extended cpuid(0x80000000)
347
    //
348
    __ bind(ext_cpuid);
349
    __ movl(rax, 0x80000000);
350
    __ cpuid();
351
    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
352
    __ jcc(Assembler::belowEqual, done);
353
    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
354
    __ jcc(Assembler::belowEqual, ext_cpuid1);
355
    __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
356
    __ jccb(Assembler::belowEqual, ext_cpuid5);
357
    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
358
    __ jccb(Assembler::belowEqual, ext_cpuid7);
359
    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
360
    __ jccb(Assembler::belowEqual, ext_cpuid8);
361
    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
362
    __ jccb(Assembler::below, ext_cpuid8);
363
    //
364
    // Extended cpuid(0x8000001E)
365
    //
366
    __ movl(rax, 0x8000001E);
367
    __ cpuid();
368
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
369
    __ movl(Address(rsi, 0), rax);
370
    __ movl(Address(rsi, 4), rbx);
371
    __ movl(Address(rsi, 8), rcx);
372
    __ movl(Address(rsi,12), rdx);
373

374
    //
375
    // Extended cpuid(0x80000008)
376
    //
377
    __ bind(ext_cpuid8);
378
    __ movl(rax, 0x80000008);
379
    __ cpuid();
380
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
381
    __ movl(Address(rsi, 0), rax);
382
    __ movl(Address(rsi, 4), rbx);
383
    __ movl(Address(rsi, 8), rcx);
384
    __ movl(Address(rsi,12), rdx);
385

386
    //
387
    // Extended cpuid(0x80000007)
388
    //
389
    __ bind(ext_cpuid7);
390
    __ movl(rax, 0x80000007);
391
    __ cpuid();
392
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
393
    __ movl(Address(rsi, 0), rax);
394
    __ movl(Address(rsi, 4), rbx);
395
    __ movl(Address(rsi, 8), rcx);
396
    __ movl(Address(rsi,12), rdx);
397

398
    //
399
    // Extended cpuid(0x80000005)
400
    //
401
    __ bind(ext_cpuid5);
402
    __ movl(rax, 0x80000005);
403
    __ cpuid();
404
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
405
    __ movl(Address(rsi, 0), rax);
406
    __ movl(Address(rsi, 4), rbx);
407
    __ movl(Address(rsi, 8), rcx);
408
    __ movl(Address(rsi,12), rdx);
409

410
    //
411
    // Extended cpuid(0x80000001)
412
    //
413
    __ bind(ext_cpuid1);
414
    __ movl(rax, 0x80000001);
415
    __ cpuid();
416
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
417
    __ movl(Address(rsi, 0), rax);
418
    __ movl(Address(rsi, 4), rbx);
419
    __ movl(Address(rsi, 8), rcx);
420
    __ movl(Address(rsi,12), rdx);
421

422
#if defined(_LP64)
423
    //
424
    // Check if OS has enabled XGETBV instruction to access XCR0
425
    // (OSXSAVE feature flag) and CPU supports APX
426
    //
427
    // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
428
    // and XCRO[19] bit for OS support to save/restore extended GPR state.
429
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
430
    __ movl(rax, 0x200000);
431
    __ andl(rax, Address(rsi, 4));
432
    __ cmpl(rax, 0x200000);
433
    __ jcc(Assembler::notEqual, vector_save_restore);
434
    // check _cpuid_info.xem_xcr0_eax.bits.apx_f
435
    __ movl(rax, 0x80000);
436
    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
437
    __ cmpl(rax, 0x80000);
438
    __ jcc(Assembler::notEqual, vector_save_restore);
439

440
    bool save_apx = UseAPX;
441
    VM_Version::set_apx_cpuFeatures();
442
    UseAPX = true;
443
    __ mov64(r16, VM_Version::egpr_test_value());
444
    __ mov64(r31, VM_Version::egpr_test_value());
445
    __ xorl(rsi, rsi);
446
    VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
447
    // Generate SEGV
448
    __ movl(rax, Address(rsi, 0));
449

450
    VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
451
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
452
    __ movq(Address(rsi, 0), r16);
453
    __ movq(Address(rsi, 8), r31);
454

455
    UseAPX = save_apx;
456
#endif
457
    __ bind(vector_save_restore);
458
    //
459
    // Check if OS has enabled XGETBV instruction to access XCR0
460
    // (OSXSAVE feature flag) and CPU supports AVX
461
    //
462
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
463
    __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
464
    __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
465
    __ cmpl(rcx, 0x18000000);
466
    __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
467

468
    __ movl(rax, 0x6);
469
    __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
470
    __ cmpl(rax, 0x6);
471
    __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
472

473
    // we need to bridge farther than imm8, so we use this island as a thunk
474
    __ bind(done);
475
    __ jmp(wrapup);
476

477
    __ bind(start_simd_check);
478
    //
479
    // Some OSs have a bug when upper 128/256bits of YMM/ZMM
480
    // registers are not restored after a signal processing.
481
    // Generate SEGV here (reference through null)
482
    // and check upper YMM/ZMM bits after it.
483
    //
484
    int saved_useavx = UseAVX;
485
    int saved_usesse = UseSSE;
486

487
    // If UseAVX is uninitialized or is set by the user to include EVEX
488
    if (use_evex) {
489
      // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
490
      __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
491
      __ movl(rax, 0x10000);
492
      __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
493
      __ cmpl(rax, 0x10000);
494
      __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
495
      // check _cpuid_info.xem_xcr0_eax.bits.opmask
496
      // check _cpuid_info.xem_xcr0_eax.bits.zmm512
497
      // check _cpuid_info.xem_xcr0_eax.bits.zmm32
498
      __ movl(rax, 0xE0);
499
      __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
500
      __ cmpl(rax, 0xE0);
501
      __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
502

503
      if (FLAG_IS_DEFAULT(UseAVX)) {
504
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
505
        __ movl(rax, Address(rsi, 0));
506
        __ cmpl(rax, 0x50654);              // If it is Skylake
507
        __ jcc(Assembler::equal, legacy_setup);
508
      }
509
      // EVEX setup: run in lowest evex mode
510
      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
511
      UseAVX = 3;
512
      UseSSE = 2;
513
#ifdef _WINDOWS
514
      // xmm5-xmm15 are not preserved by caller on windows
515
      // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
516
      __ subptr(rsp, 64);
517
      __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
518
#ifdef _LP64
519
      __ subptr(rsp, 64);
520
      __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
521
      __ subptr(rsp, 64);
522
      __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
523
#endif // _LP64
524
#endif // _WINDOWS
525

526
      // load value into all 64 bytes of zmm7 register
527
      __ movl(rcx, VM_Version::ymm_test_value());
528
      __ movdl(xmm0, rcx);
529
      __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
530
      __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
531
#ifdef _LP64
532
      __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
533
      __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
534
#endif
535
      VM_Version::clean_cpuFeatures();
536
      __ jmp(save_restore_except);
537
    }
538

539
    __ bind(legacy_setup);
540
    // AVX setup
541
    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
542
    UseAVX = 1;
543
    UseSSE = 2;
544
#ifdef _WINDOWS
545
    __ subptr(rsp, 32);
546
    __ vmovdqu(Address(rsp, 0), xmm7);
547
#ifdef _LP64
548
    __ subptr(rsp, 32);
549
    __ vmovdqu(Address(rsp, 0), xmm8);
550
    __ subptr(rsp, 32);
551
    __ vmovdqu(Address(rsp, 0), xmm15);
552
#endif // _LP64
553
#endif // _WINDOWS
554

555
    // load value into all 32 bytes of ymm7 register
556
    __ movl(rcx, VM_Version::ymm_test_value());
557

558
    __ movdl(xmm0, rcx);
559
    __ pshufd(xmm0, xmm0, 0x00);
560
    __ vinsertf128_high(xmm0, xmm0);
561
    __ vmovdqu(xmm7, xmm0);
562
#ifdef _LP64
563
    __ vmovdqu(xmm8, xmm0);
564
    __ vmovdqu(xmm15, xmm0);
565
#endif
566
    VM_Version::clean_cpuFeatures();
567

568
    __ bind(save_restore_except);
569
    __ xorl(rsi, rsi);
570
    VM_Version::set_cpuinfo_segv_addr(__ pc());
571
    // Generate SEGV
572
    __ movl(rax, Address(rsi, 0));
573

574
    VM_Version::set_cpuinfo_cont_addr(__ pc());
575
    // Returns here after signal. Save xmm0 to check it later.
576

577
    // If UseAVX is uninitialized or is set by the user to include EVEX
578
    if (use_evex) {
579
      // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
580
      __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
581
      __ movl(rax, 0x10000);
582
      __ andl(rax, Address(rsi, 4));
583
      __ cmpl(rax, 0x10000);
584
      __ jcc(Assembler::notEqual, legacy_save_restore);
585
      // check _cpuid_info.xem_xcr0_eax.bits.opmask
586
      // check _cpuid_info.xem_xcr0_eax.bits.zmm512
587
      // check _cpuid_info.xem_xcr0_eax.bits.zmm32
588
      __ movl(rax, 0xE0);
589
      __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
590
      __ cmpl(rax, 0xE0);
591
      __ jcc(Assembler::notEqual, legacy_save_restore);
592

593
      if (FLAG_IS_DEFAULT(UseAVX)) {
594
        __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
595
        __ movl(rax, Address(rsi, 0));
596
        __ cmpl(rax, 0x50654);              // If it is Skylake
597
        __ jcc(Assembler::equal, legacy_save_restore);
598
      }
599
      // EVEX check: run in lowest evex mode
600
      VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
601
      UseAVX = 3;
602
      UseSSE = 2;
603
      __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
604
      __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
605
      __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
606
#ifdef _LP64
607
      __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
608
      __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
609
#endif
610

611
#ifdef _WINDOWS
612
#ifdef _LP64
613
      __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
614
      __ addptr(rsp, 64);
615
      __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
616
      __ addptr(rsp, 64);
617
#endif // _LP64
618
      __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
619
      __ addptr(rsp, 64);
620
#endif // _WINDOWS
621
      generate_vzeroupper(wrapup);
622
      VM_Version::clean_cpuFeatures();
623
      UseAVX = saved_useavx;
624
      UseSSE = saved_usesse;
625
      __ jmp(wrapup);
626
   }
627

628
    __ bind(legacy_save_restore);
629
    // AVX check
630
    VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
631
    UseAVX = 1;
632
    UseSSE = 2;
633
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
634
    __ vmovdqu(Address(rsi, 0), xmm0);
635
    __ vmovdqu(Address(rsi, 32), xmm7);
636
#ifdef _LP64
637
    __ vmovdqu(Address(rsi, 64), xmm8);
638
    __ vmovdqu(Address(rsi, 96), xmm15);
639
#endif
640

641
#ifdef _WINDOWS
642
#ifdef _LP64
643
    __ vmovdqu(xmm15, Address(rsp, 0));
644
    __ addptr(rsp, 32);
645
    __ vmovdqu(xmm8, Address(rsp, 0));
646
    __ addptr(rsp, 32);
647
#endif // _LP64
648
    __ vmovdqu(xmm7, Address(rsp, 0));
649
    __ addptr(rsp, 32);
650
#endif // _WINDOWS
651

652
    generate_vzeroupper(wrapup);
653
    VM_Version::clean_cpuFeatures();
654
    UseAVX = saved_useavx;
655
    UseSSE = saved_usesse;
656

657
    __ bind(wrapup);
658
    __ popf();
659
    __ pop(rsi);
660
    __ pop(rbx);
661
    __ pop(rbp);
662
    __ ret(0);
663

664
#   undef __
665

666
    return start;
667
  };
668
  void generate_vzeroupper(Label& L_wrapup) {
669
#   define __ _masm->
670
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
671
    __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
672
    __ jcc(Assembler::notEqual, L_wrapup);
673
    __ movl(rcx, 0x0FFF0FF0);
674
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
675
    __ andl(rcx, Address(rsi, 0));
676
    __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
677
    __ jcc(Assembler::equal, L_wrapup);
678
    __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
679
    __ jcc(Assembler::equal, L_wrapup);
680
    // vzeroupper() will use a pre-computed instruction sequence that we
681
    // can't compute until after we've determined CPU capabilities. Use
682
    // uncached variant here directly to be able to bootstrap correctly
683
    __ vzeroupper_uncached();
684
#   undef __
685
  }
686
  address generate_detect_virt() {
687
    StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
688
#   define __ _masm->
689

690
    address start = __ pc();
691

692
    // Evacuate callee-saved registers
693
    __ push(rbp);
694
    __ push(rbx);
695
    __ push(rsi); // for Windows
696

697
#ifdef _LP64
698
    __ mov(rax, c_rarg0); // CPUID leaf
699
    __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
700
#else
701
    __ movptr(rax, Address(rsp, 16)); // CPUID leaf
702
    __ movptr(rsi, Address(rsp, 20)); // register array address
703
#endif
704

705
    __ cpuid();
706

707
    // Store result to register array
708
    __ movl(Address(rsi,  0), rax);
709
    __ movl(Address(rsi,  4), rbx);
710
    __ movl(Address(rsi,  8), rcx);
711
    __ movl(Address(rsi, 12), rdx);
712

713
    // Epilogue
714
    __ pop(rsi);
715
    __ pop(rbx);
716
    __ pop(rbp);
717
    __ ret(0);
718

719
#   undef __
720

721
    return start;
722
  };
723

724

725
  address generate_getCPUIDBrandString(void) {
726
    // Flags to test CPU type.
727
    const uint32_t HS_EFL_AC           = 0x40000;
728
    const uint32_t HS_EFL_ID           = 0x200000;
729
    // Values for when we don't have a CPUID instruction.
730
    const int      CPU_FAMILY_SHIFT = 8;
731
    const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
732
    const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
733

734
    Label detect_486, cpu486, detect_586, done, ext_cpuid;
735

736
    StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
737
#   define __ _masm->
738

739
    address start = __ pc();
740

741
    //
742
    // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
743
    //
744
    // LP64: rcx and rdx are first and second argument registers on windows
745

746
    __ push(rbp);
747
#ifdef _LP64
748
    __ mov(rbp, c_rarg0); // cpuid_info address
749
#else
750
    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
751
#endif
752
    __ push(rbx);
753
    __ push(rsi);
754
    __ pushf();          // preserve rbx, and flags
755
    __ pop(rax);
756
    __ push(rax);
757
    __ mov(rcx, rax);
758
    //
759
    // if we are unable to change the AC flag, we have a 386
760
    //
761
    __ xorl(rax, HS_EFL_AC);
762
    __ push(rax);
763
    __ popf();
764
    __ pushf();
765
    __ pop(rax);
766
    __ cmpptr(rax, rcx);
767
    __ jccb(Assembler::notEqual, detect_486);
768

769
    __ movl(rax, CPU_FAMILY_386);
770
    __ jmp(done);
771

772
    //
773
    // If we are unable to change the ID flag, we have a 486 which does
774
    // not support the "cpuid" instruction.
775
    //
776
    __ bind(detect_486);
777
    __ mov(rax, rcx);
778
    __ xorl(rax, HS_EFL_ID);
779
    __ push(rax);
780
    __ popf();
781
    __ pushf();
782
    __ pop(rax);
783
    __ cmpptr(rcx, rax);
784
    __ jccb(Assembler::notEqual, detect_586);
785

786
    __ bind(cpu486);
787
    __ movl(rax, CPU_FAMILY_486);
788
    __ jmp(done);
789

790
    //
791
    // At this point, we have a chip which supports the "cpuid" instruction
792
    //
793
    __ bind(detect_586);
794
    __ xorl(rax, rax);
795
    __ cpuid();
796
    __ orl(rax, rax);
797
    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
798
                                        // value of at least 1, we give up and
799
                                        // assume a 486
800

801
    //
802
    // Extended cpuid(0x80000000) for processor brand string detection
803
    //
804
    __ bind(ext_cpuid);
805
    __ movl(rax, CPUID_EXTENDED_FN);
806
    __ cpuid();
807
    __ cmpl(rax, CPUID_EXTENDED_FN_4);
808
    __ jcc(Assembler::below, done);
809

810
    //
811
    // Extended cpuid(0x80000002)  // first 16 bytes in brand string
812
    //
813
    __ movl(rax, CPUID_EXTENDED_FN_2);
814
    __ cpuid();
815
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
816
    __ movl(Address(rsi, 0), rax);
817
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
818
    __ movl(Address(rsi, 0), rbx);
819
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
820
    __ movl(Address(rsi, 0), rcx);
821
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
822
    __ movl(Address(rsi,0), rdx);
823

824
    //
825
    // Extended cpuid(0x80000003) // next 16 bytes in brand string
826
    //
827
    __ movl(rax, CPUID_EXTENDED_FN_3);
828
    __ cpuid();
829
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
830
    __ movl(Address(rsi, 0), rax);
831
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
832
    __ movl(Address(rsi, 0), rbx);
833
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
834
    __ movl(Address(rsi, 0), rcx);
835
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
836
    __ movl(Address(rsi,0), rdx);
837

838
    //
839
    // Extended cpuid(0x80000004) // last 16 bytes in brand string
840
    //
841
    __ movl(rax, CPUID_EXTENDED_FN_4);
842
    __ cpuid();
843
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
844
    __ movl(Address(rsi, 0), rax);
845
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
846
    __ movl(Address(rsi, 0), rbx);
847
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
848
    __ movl(Address(rsi, 0), rcx);
849
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
850
    __ movl(Address(rsi,0), rdx);
851

852
    //
853
    // return
854
    //
855
    __ bind(done);
856
    __ popf();
857
    __ pop(rsi);
858
    __ pop(rbx);
859
    __ pop(rbp);
860
    __ ret(0);
861

862
#   undef __
863

864
    return start;
865
  };
866
};
867

868
void VM_Version::get_processor_features() {
869

870
  _cpu = 4; // 486 by default
871
  _model = 0;
872
  _stepping = 0;
873
  _features = 0;
874
  _logical_processors_per_package = 1;
875
  // i486 internal cache is both I&D and has a 16-byte line size
876
  _L1_data_cache_line_size = 16;
877

878
  // Get raw processor info
879

880
  get_cpu_info_stub(&_cpuid_info);
881

882
  assert_is_initialized();
883
  _cpu = extended_cpu_family();
884
  _model = extended_cpu_model();
885
  _stepping = cpu_stepping();
886

887
  if (cpu_family() > 4) { // it supports CPUID
888
    _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
889
    _cpu_features = _features;   // Preserve features
890
    // Logical processors are only available on P4s and above,
891
    // and only if hyperthreading is available.
892
    _logical_processors_per_package = logical_processor_count();
893
    _L1_data_cache_line_size = L1_line_size();
894
  }
895

896
  // xchg and xadd instructions
897
  _supports_atomic_getset4 = true;
898
  _supports_atomic_getadd4 = true;
899
  LP64_ONLY(_supports_atomic_getset8 = true);
900
  LP64_ONLY(_supports_atomic_getadd8 = true);
901

902
#ifdef _LP64
903
  // OS should support SSE for x64 and hardware should support at least SSE2.
904
  if (!VM_Version::supports_sse2()) {
905
    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
906
  }
907
  // in 64 bit the use of SSE2 is the minimum
908
  if (UseSSE < 2) UseSSE = 2;
909
#endif
910

911
#ifdef AMD64
912
  // flush_icache_stub have to be generated first.
913
  // That is why Icache line size is hard coded in ICache class,
914
  // see icache_x86.hpp. It is also the reason why we can't use
915
  // clflush instruction in 32-bit VM since it could be running
916
  // on CPU which does not support it.
917
  //
918
  // The only thing we can do is to verify that flushed
919
  // ICache::line_size has correct value.
920
  guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
921
  // clflush_size is size in quadwords (8 bytes).
922
  guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
923
#endif
924

925
#ifdef _LP64
926
  // assigning this field effectively enables Unsafe.writebackMemory()
927
  // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
928
  // that is only implemented on x86_64 and only if the OS plays ball
929
  if (os::supports_map_sync()) {
930
    // publish data cache line flush size to generic field, otherwise
931
    // let if default to zero thereby disabling writeback
932
    _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
933
  }
934
#endif
935

936
  // Check if processor has Intel Ecore
937
  if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
938
    (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
939
    FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
940
  }
941

942
  if (UseSSE < 4) {
943
    _features &= ~CPU_SSE4_1;
944
    _features &= ~CPU_SSE4_2;
945
  }
946

947
  if (UseSSE < 3) {
948
    _features &= ~CPU_SSE3;
949
    _features &= ~CPU_SSSE3;
950
    _features &= ~CPU_SSE4A;
951
  }
952

953
  if (UseSSE < 2)
954
    _features &= ~CPU_SSE2;
955

956
  if (UseSSE < 1)
957
    _features &= ~CPU_SSE;
958

959
  //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
960
  if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
961
    UseAVX = 0;
962
  }
963

964
  // UseSSE is set to the smaller of what hardware supports and what
965
  // the command line requires.  I.e., you cannot set UseSSE to 2 on
966
  // older Pentiums which do not support it.
967
  int use_sse_limit = 0;
968
  if (UseSSE > 0) {
969
    if (UseSSE > 3 && supports_sse4_1()) {
970
      use_sse_limit = 4;
971
    } else if (UseSSE > 2 && supports_sse3()) {
972
      use_sse_limit = 3;
973
    } else if (UseSSE > 1 && supports_sse2()) {
974
      use_sse_limit = 2;
975
    } else if (UseSSE > 0 && supports_sse()) {
976
      use_sse_limit = 1;
977
    } else {
978
      use_sse_limit = 0;
979
    }
980
  }
981
  if (FLAG_IS_DEFAULT(UseSSE)) {
982
    FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
983
  } else if (UseSSE > use_sse_limit) {
984
    warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
985
    FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
986
  }
987

988
  // first try initial setting and detect what we can support
989
  int use_avx_limit = 0;
990
  if (UseAVX > 0) {
991
    if (UseSSE < 4) {
992
      // Don't use AVX if SSE is unavailable or has been disabled.
993
      use_avx_limit = 0;
994
    } else if (UseAVX > 2 && supports_evex()) {
995
      use_avx_limit = 3;
996
    } else if (UseAVX > 1 && supports_avx2()) {
997
      use_avx_limit = 2;
998
    } else if (UseAVX > 0 && supports_avx()) {
999
      use_avx_limit = 1;
1000
    } else {
1001
      use_avx_limit = 0;
1002
    }
1003
  }
1004
  if (FLAG_IS_DEFAULT(UseAVX)) {
1005
    // Don't use AVX-512 on older Skylakes unless explicitly requested.
1006
    if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1007
      FLAG_SET_DEFAULT(UseAVX, 2);
1008
    } else {
1009
      FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1010
    }
1011
  }
1012

1013
  if (UseAVX > use_avx_limit) {
1014
    if (UseSSE < 4) {
1015
      warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1016
    } else {
1017
      warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1018
    }
1019
    FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1020
  }
1021

1022
  if (UseAVX < 3) {
1023
    _features &= ~CPU_AVX512F;
1024
    _features &= ~CPU_AVX512DQ;
1025
    _features &= ~CPU_AVX512CD;
1026
    _features &= ~CPU_AVX512BW;
1027
    _features &= ~CPU_AVX512VL;
1028
    _features &= ~CPU_AVX512_VPOPCNTDQ;
1029
    _features &= ~CPU_AVX512_VPCLMULQDQ;
1030
    _features &= ~CPU_AVX512_VAES;
1031
    _features &= ~CPU_AVX512_VNNI;
1032
    _features &= ~CPU_AVX512_VBMI;
1033
    _features &= ~CPU_AVX512_VBMI2;
1034
    _features &= ~CPU_AVX512_BITALG;
1035
    _features &= ~CPU_AVX512_IFMA;
1036
    _features &= ~CPU_APX_F;
1037
  }
1038

1039
  // Currently APX support is only enabled for targets supporting AVX512VL feature.
1040
  bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1041
  if (UseAPX && !apx_supported) {
1042
    warning("UseAPX is not supported on this CPU, setting it to false");
1043
    FLAG_SET_DEFAULT(UseAPX, false);
1044
  } else if (FLAG_IS_DEFAULT(UseAPX)) {
1045
    FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1046
  }
1047

1048
  if (UseAVX < 2) {
1049
    _features &= ~CPU_AVX2;
1050
    _features &= ~CPU_AVX_IFMA;
1051
  }
1052

1053
  if (UseAVX < 1) {
1054
    _features &= ~CPU_AVX;
1055
    _features &= ~CPU_VZEROUPPER;
1056
    _features &= ~CPU_F16C;
1057
  }
1058

1059
  if (logical_processors_per_package() == 1) {
1060
    // HT processor could be installed on a system which doesn't support HT.
1061
    _features &= ~CPU_HT;
1062
  }
1063

1064
  if (is_intel()) { // Intel cpus specific settings
1065
    if (is_knights_family()) {
1066
      _features &= ~CPU_VZEROUPPER;
1067
      _features &= ~CPU_AVX512BW;
1068
      _features &= ~CPU_AVX512VL;
1069
      _features &= ~CPU_AVX512DQ;
1070
      _features &= ~CPU_AVX512_VNNI;
1071
      _features &= ~CPU_AVX512_VAES;
1072
      _features &= ~CPU_AVX512_VPOPCNTDQ;
1073
      _features &= ~CPU_AVX512_VPCLMULQDQ;
1074
      _features &= ~CPU_AVX512_VBMI;
1075
      _features &= ~CPU_AVX512_VBMI2;
1076
      _features &= ~CPU_CLWB;
1077
      _features &= ~CPU_FLUSHOPT;
1078
      _features &= ~CPU_GFNI;
1079
      _features &= ~CPU_AVX512_BITALG;
1080
      _features &= ~CPU_AVX512_IFMA;
1081
      _features &= ~CPU_AVX_IFMA;
1082
    }
1083
  }
1084

1085
  if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1086
    _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1087
  } else {
1088
    _has_intel_jcc_erratum = IntelJccErratumMitigation;
1089
  }
1090

1091
  char buf[1024];
1092
  int res = jio_snprintf(
1093
              buf, sizeof(buf),
1094
              "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1095
              cores_per_cpu(), threads_per_core(),
1096
              cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1097
  assert(res > 0, "not enough temporary space allocated");
1098
  insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1099

1100
  _features_string = os::strdup(buf);
1101

1102
  // Use AES instructions if available.
1103
  if (supports_aes()) {
1104
    if (FLAG_IS_DEFAULT(UseAES)) {
1105
      FLAG_SET_DEFAULT(UseAES, true);
1106
    }
1107
    if (!UseAES) {
1108
      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1109
        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1110
      }
1111
      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1112
    } else {
1113
      if (UseSSE > 2) {
1114
        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1115
          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1116
        }
1117
      } else {
1118
        // The AES intrinsic stubs require AES instruction support (of course)
1119
        // but also require sse3 mode or higher for instructions it use.
1120
        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1121
          warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1122
        }
1123
        FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1124
      }
1125

1126
      // --AES-CTR begins--
1127
      if (!UseAESIntrinsics) {
1128
        if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1129
          warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1130
          FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1131
        }
1132
      } else {
1133
        if (supports_sse4_1()) {
1134
          if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1135
            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1136
          }
1137
        } else {
1138
           // The AES-CTR intrinsic stubs require AES instruction support (of course)
1139
           // but also require sse4.1 mode or higher for instructions it use.
1140
          if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1141
             warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1142
           }
1143
           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1144
        }
1145
      }
1146
      // --AES-CTR ends--
1147
    }
1148
  } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1149
    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1150
      warning("AES instructions are not available on this CPU");
1151
      FLAG_SET_DEFAULT(UseAES, false);
1152
    }
1153
    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1154
      warning("AES intrinsics are not available on this CPU");
1155
      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1156
    }
1157
    if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1158
      warning("AES-CTR intrinsics are not available on this CPU");
1159
      FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1160
    }
1161
  }
1162

1163
  // Use CLMUL instructions if available.
1164
  if (supports_clmul()) {
1165
    if (FLAG_IS_DEFAULT(UseCLMUL)) {
1166
      UseCLMUL = true;
1167
    }
1168
  } else if (UseCLMUL) {
1169
    if (!FLAG_IS_DEFAULT(UseCLMUL))
1170
      warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1171
    FLAG_SET_DEFAULT(UseCLMUL, false);
1172
  }
1173

1174
  if (UseCLMUL && (UseSSE > 2)) {
1175
    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1176
      UseCRC32Intrinsics = true;
1177
    }
1178
  } else if (UseCRC32Intrinsics) {
1179
    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1180
      warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1181
    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1182
  }
1183

1184
#ifdef _LP64
1185
  if (supports_avx2()) {
1186
    if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1187
      UseAdler32Intrinsics = true;
1188
    }
1189
  } else if (UseAdler32Intrinsics) {
1190
    if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1191
      warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1192
    }
1193
    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1194
  }
1195
#else
1196
  if (UseAdler32Intrinsics) {
1197
    warning("Adler32Intrinsics not available on this CPU.");
1198
    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1199
  }
1200
#endif
1201

1202
  if (supports_sse4_2() && supports_clmul()) {
1203
    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1204
      UseCRC32CIntrinsics = true;
1205
    }
1206
  } else if (UseCRC32CIntrinsics) {
1207
    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1208
      warning("CRC32C intrinsics are not available on this CPU");
1209
    }
1210
    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1211
  }
1212

1213
  // GHASH/GCM intrinsics
1214
  if (UseCLMUL && (UseSSE > 2)) {
1215
    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1216
      UseGHASHIntrinsics = true;
1217
    }
1218
  } else if (UseGHASHIntrinsics) {
1219
    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1220
      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1221
    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1222
  }
1223

1224
#ifdef _LP64
1225
  // ChaCha20 Intrinsics
1226
  // As long as the system supports AVX as a baseline we can do a
1227
  // SIMD-enabled block function.  StubGenerator makes the determination
1228
  // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1229
  // version.
1230
  if (UseAVX >= 1) {
1231
      if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1232
          UseChaCha20Intrinsics = true;
1233
      }
1234
  } else if (UseChaCha20Intrinsics) {
1235
      if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1236
          warning("ChaCha20 intrinsic requires AVX instructions");
1237
      }
1238
      FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1239
  }
1240
#else
1241
  // No support currently for ChaCha20 intrinsics on 32-bit platforms
1242
  if (UseChaCha20Intrinsics) {
1243
      warning("ChaCha20 intrinsics are not available on this CPU.");
1244
      FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1245
  }
1246
#endif // _LP64
1247

1248
  // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1249
  if (UseAVX >= 2) {
1250
    if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1251
      UseBASE64Intrinsics = true;
1252
    }
1253
  } else if (UseBASE64Intrinsics) {
1254
     if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1255
      warning("Base64 intrinsic requires EVEX instructions on this CPU");
1256
    FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1257
  }
1258

1259
  if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1260
    if (FLAG_IS_DEFAULT(UseFMA)) {
1261
      UseFMA = true;
1262
    }
1263
  } else if (UseFMA) {
1264
    warning("FMA instructions are not available on this CPU");
1265
    FLAG_SET_DEFAULT(UseFMA, false);
1266
  }
1267

1268
  if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1269
    UseMD5Intrinsics = true;
1270
  }
1271

1272
  if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1273
    if (FLAG_IS_DEFAULT(UseSHA)) {
1274
      UseSHA = true;
1275
    }
1276
  } else if (UseSHA) {
1277
    warning("SHA instructions are not available on this CPU");
1278
    FLAG_SET_DEFAULT(UseSHA, false);
1279
  }
1280

1281
  if (supports_sha() && supports_sse4_1() && UseSHA) {
1282
    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1283
      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1284
    }
1285
  } else if (UseSHA1Intrinsics) {
1286
    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1287
    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1288
  }
1289

1290
  if (supports_sse4_1() && UseSHA) {
1291
    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1292
      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1293
    }
1294
  } else if (UseSHA256Intrinsics) {
1295
    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1296
    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1297
  }
1298

1299
#ifdef _LP64
1300
  // These are only supported on 64-bit
1301
  if (UseSHA && supports_avx2() && supports_bmi2()) {
1302
    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1303
      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1304
    }
1305
  } else
1306
#endif
1307
  if (UseSHA512Intrinsics) {
1308
    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1309
    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1310
  }
1311

1312
  if (UseSHA3Intrinsics) {
1313
    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1314
    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1315
  }
1316

1317
  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1318
    FLAG_SET_DEFAULT(UseSHA, false);
1319
  }
1320

1321
#ifdef COMPILER2
1322
  if (UseFPUForSpilling) {
1323
    if (UseSSE < 2) {
1324
      // Only supported with SSE2+
1325
      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1326
    }
1327
  }
1328
#endif
1329

1330
#if COMPILER2_OR_JVMCI
1331
  int max_vector_size = 0;
1332
  if (UseSSE < 2) {
1333
    // Vectors (in XMM) are only supported with SSE2+
1334
    // SSE is always 2 on x64.
1335
    max_vector_size = 0;
1336
  } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1337
    // 16 byte vectors (in XMM) are supported with SSE2+
1338
    max_vector_size = 16;
1339
  } else if (UseAVX == 1 || UseAVX == 2) {
1340
    // 32 bytes vectors (in YMM) are only supported with AVX+
1341
    max_vector_size = 32;
1342
  } else if (UseAVX > 2) {
1343
    // 64 bytes vectors (in ZMM) are only supported with AVX 3
1344
    max_vector_size = 64;
1345
  }
1346

1347
#ifdef _LP64
1348
  int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1349
#else
1350
  int min_vector_size = 0;
1351
#endif
1352

1353
  if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1354
    if (MaxVectorSize < min_vector_size) {
1355
      warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1356
      FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1357
    }
1358
    if (MaxVectorSize > max_vector_size) {
1359
      warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1360
      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1361
    }
1362
    if (!is_power_of_2(MaxVectorSize)) {
1363
      warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1364
      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1365
    }
1366
  } else {
1367
    // If default, use highest supported configuration
1368
    FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1369
  }
1370

1371
#if defined(COMPILER2) && defined(ASSERT)
1372
  if (MaxVectorSize > 0) {
1373
    if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1374
      tty->print_cr("State of YMM registers after signal handle:");
1375
      int nreg = 2 LP64_ONLY(+2);
1376
      const char* ymm_name[4] = {"0", "7", "8", "15"};
1377
      for (int i = 0; i < nreg; i++) {
1378
        tty->print("YMM%s:", ymm_name[i]);
1379
        for (int j = 7; j >=0; j--) {
1380
          tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1381
        }
1382
        tty->cr();
1383
      }
1384
    }
1385
  }
1386
#endif // COMPILER2 && ASSERT
1387

1388
#ifdef _LP64
1389
  if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1390
    if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1391
      FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1392
    }
1393
  } else
1394
#endif
1395
  if (UsePoly1305Intrinsics) {
1396
    warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1397
    FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1398
  }
1399

1400
#ifdef _LP64
1401
  if (supports_avx512ifma() && supports_avx512vlbw()) {
1402
    if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1403
      FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1404
    }
1405
  } else
1406
#endif
1407
  if (UseIntPolyIntrinsics) {
1408
    warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1409
    FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1410
  }
1411

1412
#ifdef _LP64
1413
  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1414
    UseMultiplyToLenIntrinsic = true;
1415
  }
1416
  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1417
    UseSquareToLenIntrinsic = true;
1418
  }
1419
  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1420
    UseMulAddIntrinsic = true;
1421
  }
1422
  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1423
    UseMontgomeryMultiplyIntrinsic = true;
1424
  }
1425
  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1426
    UseMontgomerySquareIntrinsic = true;
1427
  }
1428
#else
1429
  if (UseMultiplyToLenIntrinsic) {
1430
    if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1431
      warning("multiplyToLen intrinsic is not available in 32-bit VM");
1432
    }
1433
    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1434
  }
1435
  if (UseMontgomeryMultiplyIntrinsic) {
1436
    if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1437
      warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1438
    }
1439
    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1440
  }
1441
  if (UseMontgomerySquareIntrinsic) {
1442
    if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1443
      warning("montgomerySquare intrinsic is not available in 32-bit VM");
1444
    }
1445
    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1446
  }
1447
  if (UseSquareToLenIntrinsic) {
1448
    if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1449
      warning("squareToLen intrinsic is not available in 32-bit VM");
1450
    }
1451
    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1452
  }
1453
  if (UseMulAddIntrinsic) {
1454
    if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1455
      warning("mulAdd intrinsic is not available in 32-bit VM");
1456
    }
1457
    FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1458
  }
1459
#endif // _LP64
1460
#endif // COMPILER2_OR_JVMCI
1461

1462
  // On new cpus instructions which update whole XMM register should be used
1463
  // to prevent partial register stall due to dependencies on high half.
1464
  //
1465
  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1466
  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1467
  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1468
  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1469

1470

1471
  if (is_zx()) { // ZX cpus specific settings
1472
    if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1473
      UseStoreImmI16 = false; // don't use it on ZX cpus
1474
    }
1475
    if ((cpu_family() == 6) || (cpu_family() == 7)) {
1476
      if (FLAG_IS_DEFAULT(UseAddressNop)) {
1477
        // Use it on all ZX cpus
1478
        UseAddressNop = true;
1479
      }
1480
    }
1481
    if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1482
      UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1483
    }
1484
    if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1485
      if (supports_sse3()) {
1486
        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1487
      } else {
1488
        UseXmmRegToRegMoveAll = false;
1489
      }
1490
    }
1491
    if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1492
#ifdef COMPILER2
1493
      if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1494
        // For new ZX cpus do the next optimization:
1495
        // don't align the beginning of a loop if there are enough instructions
1496
        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1497
        // in current fetch line (OptoLoopAlignment) or the padding
1498
        // is big (> MaxLoopPad).
1499
        // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1500
        // generated NOP instructions. 11 is the largest size of one
1501
        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1502
        MaxLoopPad = 11;
1503
      }
1504
#endif // COMPILER2
1505
      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1506
        UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1507
      }
1508
      if (supports_sse4_2()) { // new ZX cpus
1509
        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1510
          UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1511
        }
1512
      }
1513
      if (supports_sse4_2()) {
1514
        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1515
          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1516
        }
1517
      } else {
1518
        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1519
          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1520
        }
1521
        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1522
      }
1523
    }
1524

1525
    if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1526
      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1527
    }
1528
  }
1529

1530
  if (is_amd_family()) { // AMD cpus specific settings
1531
    if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1532
      // Use it on new AMD cpus starting from Opteron.
1533
      UseAddressNop = true;
1534
    }
1535
    if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1536
      // Use it on new AMD cpus starting from Opteron.
1537
      UseNewLongLShift = true;
1538
    }
1539
    if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1540
      if (supports_sse4a()) {
1541
        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1542
      } else {
1543
        UseXmmLoadAndClearUpper = false;
1544
      }
1545
    }
1546
    if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1547
      if (supports_sse4a()) {
1548
        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1549
      } else {
1550
        UseXmmRegToRegMoveAll = false;
1551
      }
1552
    }
1553
    if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1554
      if (supports_sse4a()) {
1555
        UseXmmI2F = true;
1556
      } else {
1557
        UseXmmI2F = false;
1558
      }
1559
    }
1560
    if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1561
      if (supports_sse4a()) {
1562
        UseXmmI2D = true;
1563
      } else {
1564
        UseXmmI2D = false;
1565
      }
1566
    }
1567
    if (supports_sse4_2()) {
1568
      if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1569
        FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1570
      }
1571
    } else {
1572
      if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1573
        warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1574
      }
1575
      FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1576
    }
1577

1578
    // some defaults for AMD family 15h
1579
    if (cpu_family() == 0x15) {
1580
      // On family 15h processors default is no sw prefetch
1581
      if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1582
        FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1583
      }
1584
      // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1585
      if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1586
        FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1587
      }
1588
      // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1589
      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1590
        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1591
      }
1592
      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1593
        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1594
      }
1595
    }
1596

1597
#ifdef COMPILER2
1598
    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1599
      // Limit vectors size to 16 bytes on AMD cpus < 17h.
1600
      FLAG_SET_DEFAULT(MaxVectorSize, 16);
1601
    }
1602
#endif // COMPILER2
1603

1604
    // Some defaults for AMD family >= 17h && Hygon family 18h
1605
    if (cpu_family() >= 0x17) {
1606
      // On family >=17h processors use XMM and UnalignedLoadStores
1607
      // for Array Copy
1608
      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1609
        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1610
      }
1611
      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1612
        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1613
      }
1614
#ifdef COMPILER2
1615
      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1616
        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1617
      }
1618
#endif
1619
    }
1620
  }
1621

1622
  if (is_intel()) { // Intel cpus specific settings
1623
    if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1624
      UseStoreImmI16 = false; // don't use it on Intel cpus
1625
    }
1626
    if (cpu_family() == 6 || cpu_family() == 15) {
1627
      if (FLAG_IS_DEFAULT(UseAddressNop)) {
1628
        // Use it on all Intel cpus starting from PentiumPro
1629
        UseAddressNop = true;
1630
      }
1631
    }
1632
    if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1633
      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1634
    }
1635
    if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1636
      if (supports_sse3()) {
1637
        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1638
      } else {
1639
        UseXmmRegToRegMoveAll = false;
1640
      }
1641
    }
1642
    if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1643
#ifdef COMPILER2
1644
      if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1645
        // For new Intel cpus do the next optimization:
1646
        // don't align the beginning of a loop if there are enough instructions
1647
        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1648
        // in current fetch line (OptoLoopAlignment) or the padding
1649
        // is big (> MaxLoopPad).
1650
        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1651
        // generated NOP instructions. 11 is the largest size of one
1652
        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1653
        MaxLoopPad = 11;
1654
      }
1655
#endif // COMPILER2
1656

1657
      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1658
        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1659
      }
1660
      if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1661
        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1662
          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1663
        }
1664
      }
1665
      if (supports_sse4_2()) {
1666
        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1667
          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1668
        }
1669
      } else {
1670
        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1671
          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1672
        }
1673
        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1674
      }
1675
    }
1676
    if (is_atom_family() || is_knights_family()) {
1677
#ifdef COMPILER2
1678
      if (FLAG_IS_DEFAULT(OptoScheduling)) {
1679
        OptoScheduling = true;
1680
      }
1681
#endif
1682
      if (supports_sse4_2()) { // Silvermont
1683
        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1684
          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1685
        }
1686
      }
1687
      if (FLAG_IS_DEFAULT(UseIncDec)) {
1688
        FLAG_SET_DEFAULT(UseIncDec, false);
1689
      }
1690
    }
1691
    if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1692
      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1693
    }
1694
#ifdef COMPILER2
1695
    if (UseAVX > 2) {
1696
      if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1697
          (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1698
           ArrayOperationPartialInlineSize != 0 &&
1699
           ArrayOperationPartialInlineSize != 16 &&
1700
           ArrayOperationPartialInlineSize != 32 &&
1701
           ArrayOperationPartialInlineSize != 64)) {
1702
        int inline_size = 0;
1703
        if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1704
          inline_size = 64;
1705
        } else if (MaxVectorSize >= 32) {
1706
          inline_size = 32;
1707
        } else if (MaxVectorSize >= 16) {
1708
          inline_size = 16;
1709
        }
1710
        if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1711
          warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1712
        }
1713
        ArrayOperationPartialInlineSize = inline_size;
1714
      }
1715

1716
      if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1717
        ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1718
        if (ArrayOperationPartialInlineSize) {
1719
          warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1720
        } else {
1721
          warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1722
        }
1723
      }
1724
    }
1725
#endif
1726
  }
1727

1728
#ifdef COMPILER2
1729
  if (FLAG_IS_DEFAULT(OptimizeFill)) {
1730
    if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1731
      OptimizeFill = false;
1732
    }
1733
  }
1734
#endif
1735

1736
#ifdef _LP64
1737
  if (UseSSE42Intrinsics) {
1738
    if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1739
      UseVectorizedMismatchIntrinsic = true;
1740
    }
1741
  } else if (UseVectorizedMismatchIntrinsic) {
1742
    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1743
      warning("vectorizedMismatch intrinsics are not available on this CPU");
1744
    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1745
  }
1746
  if (UseAVX >= 2) {
1747
    FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1748
  } else if (UseVectorizedHashCodeIntrinsic) {
1749
    if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1750
      warning("vectorizedHashCode intrinsics are not available on this CPU");
1751
    FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1752
  }
1753
#else
1754
  if (UseVectorizedMismatchIntrinsic) {
1755
    if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1756
      warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1757
    }
1758
    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1759
  }
1760
  if (UseVectorizedHashCodeIntrinsic) {
1761
    if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1762
      warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1763
    }
1764
    FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1765
  }
1766
#endif // _LP64
1767

1768
  // Use count leading zeros count instruction if available.
1769
  if (supports_lzcnt()) {
1770
    if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1771
      UseCountLeadingZerosInstruction = true;
1772
    }
1773
   } else if (UseCountLeadingZerosInstruction) {
1774
    warning("lzcnt instruction is not available on this CPU");
1775
    FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1776
  }
1777

1778
  // Use count trailing zeros instruction if available
1779
  if (supports_bmi1()) {
1780
    // tzcnt does not require VEX prefix
1781
    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1782
      if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1783
        // Don't use tzcnt if BMI1 is switched off on command line.
1784
        UseCountTrailingZerosInstruction = false;
1785
      } else {
1786
        UseCountTrailingZerosInstruction = true;
1787
      }
1788
    }
1789
  } else if (UseCountTrailingZerosInstruction) {
1790
    warning("tzcnt instruction is not available on this CPU");
1791
    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1792
  }
1793

1794
  // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1795
  // VEX prefix is generated only when AVX > 0.
1796
  if (supports_bmi1() && supports_avx()) {
1797
    if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1798
      UseBMI1Instructions = true;
1799
    }
1800
  } else if (UseBMI1Instructions) {
1801
    warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1802
    FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1803
  }
1804

1805
  if (supports_bmi2() && supports_avx()) {
1806
    if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1807
      UseBMI2Instructions = true;
1808
    }
1809
  } else if (UseBMI2Instructions) {
1810
    warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1811
    FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1812
  }
1813

1814
  // Use population count instruction if available.
1815
  if (supports_popcnt()) {
1816
    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1817
      UsePopCountInstruction = true;
1818
    }
1819
  } else if (UsePopCountInstruction) {
1820
    warning("POPCNT instruction is not available on this CPU");
1821
    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1822
  }
1823

1824
  // Use fast-string operations if available.
1825
  if (supports_erms()) {
1826
    if (FLAG_IS_DEFAULT(UseFastStosb)) {
1827
      UseFastStosb = true;
1828
    }
1829
  } else if (UseFastStosb) {
1830
    warning("fast-string operations are not available on this CPU");
1831
    FLAG_SET_DEFAULT(UseFastStosb, false);
1832
  }
1833

1834
  // For AMD Processors use XMM/YMM MOVDQU instructions
1835
  // for Object Initialization as default
1836
  if (is_amd() && cpu_family() >= 0x19) {
1837
    if (FLAG_IS_DEFAULT(UseFastStosb)) {
1838
      UseFastStosb = false;
1839
    }
1840
  }
1841

1842
#ifdef COMPILER2
1843
  if (is_intel() && MaxVectorSize > 16) {
1844
    if (FLAG_IS_DEFAULT(UseFastStosb)) {
1845
      UseFastStosb = false;
1846
    }
1847
  }
1848
#endif
1849

1850
  // Use XMM/YMM MOVDQU instruction for Object Initialization
1851
  if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1852
    if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1853
      UseXMMForObjInit = true;
1854
    }
1855
  } else if (UseXMMForObjInit) {
1856
    warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1857
    FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1858
  }
1859

1860
#ifdef COMPILER2
1861
  if (FLAG_IS_DEFAULT(AlignVector)) {
1862
    // Modern processors allow misaligned memory operations for vectors.
1863
    AlignVector = !UseUnalignedLoadStores;
1864
  }
1865
#endif // COMPILER2
1866

1867
  if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1868
    if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1869
      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1870
    } else if (!supports_sse() && supports_3dnow_prefetch()) {
1871
      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1872
    }
1873
  }
1874

1875
  // Allocation prefetch settings
1876
  int cache_line_size = checked_cast<int>(prefetch_data_size());
1877
  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1878
      (cache_line_size > AllocatePrefetchStepSize)) {
1879
    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1880
  }
1881

1882
  if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1883
    assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1884
    if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1885
      warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1886
    }
1887
    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1888
  }
1889

1890
  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1891
    bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1892
    FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1893
  }
1894

1895
  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1896
    if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1897
        supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1898
      FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1899
    }
1900
#ifdef COMPILER2
1901
    if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1902
      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1903
    }
1904
#endif
1905
  }
1906

1907
  if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1908
#ifdef COMPILER2
1909
    if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1910
      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1911
    }
1912
#endif
1913
  }
1914

1915
#ifdef _LP64
1916
  // Prefetch settings
1917

1918
  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1919
  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1920
  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1921
  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1922

1923
  // gc copy/scan is disabled if prefetchw isn't supported, because
1924
  // Prefetch::write emits an inlined prefetchw on Linux.
1925
  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1926
  // The used prefetcht0 instruction works for both amd64 and em64t.
1927

1928
  if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1929
    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1930
  }
1931
  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1932
    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1933
  }
1934
#endif
1935

1936
  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1937
     (cache_line_size > ContendedPaddingWidth))
1938
     ContendedPaddingWidth = cache_line_size;
1939

1940
  // This machine allows unaligned memory accesses
1941
  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1942
    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1943
  }
1944

1945
#ifndef PRODUCT
1946
  if (log_is_enabled(Info, os, cpu)) {
1947
    LogStream ls(Log(os, cpu)::info());
1948
    outputStream* log = &ls;
1949
    log->print_cr("Logical CPUs per core: %u",
1950
                  logical_processors_per_package());
1951
    log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1952
    log->print("UseSSE=%d", UseSSE);
1953
    if (UseAVX > 0) {
1954
      log->print("  UseAVX=%d", UseAVX);
1955
    }
1956
    if (UseAES) {
1957
      log->print("  UseAES=1");
1958
    }
1959
#ifdef COMPILER2
1960
    if (MaxVectorSize > 0) {
1961
      log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1962
    }
1963
#endif
1964
    log->cr();
1965
    log->print("Allocation");
1966
    if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1967
      log->print_cr(": no prefetching");
1968
    } else {
1969
      log->print(" prefetching: ");
1970
      if (UseSSE == 0 && supports_3dnow_prefetch()) {
1971
        log->print("PREFETCHW");
1972
      } else if (UseSSE >= 1) {
1973
        if (AllocatePrefetchInstr == 0) {
1974
          log->print("PREFETCHNTA");
1975
        } else if (AllocatePrefetchInstr == 1) {
1976
          log->print("PREFETCHT0");
1977
        } else if (AllocatePrefetchInstr == 2) {
1978
          log->print("PREFETCHT2");
1979
        } else if (AllocatePrefetchInstr == 3) {
1980
          log->print("PREFETCHW");
1981
        }
1982
      }
1983
      if (AllocatePrefetchLines > 1) {
1984
        log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1985
      } else {
1986
        log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1987
      }
1988
    }
1989

1990
    if (PrefetchCopyIntervalInBytes > 0) {
1991
      log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1992
    }
1993
    if (PrefetchScanIntervalInBytes > 0) {
1994
      log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1995
    }
1996
    if (ContendedPaddingWidth > 0) {
1997
      log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1998
    }
1999
  }
2000
#endif // !PRODUCT
2001
  if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
2002
      FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
2003
  }
2004
  if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
2005
      FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
2006
  }
2007
}
2008

2009
void VM_Version::print_platform_virtualization_info(outputStream* st) {
2010
  VirtualizationType vrt = VM_Version::get_detected_virtualization();
2011
  if (vrt == XenHVM) {
2012
    st->print_cr("Xen hardware-assisted virtualization detected");
2013
  } else if (vrt == KVM) {
2014
    st->print_cr("KVM virtualization detected");
2015
  } else if (vrt == VMWare) {
2016
    st->print_cr("VMWare virtualization detected");
2017
    VirtualizationSupport::print_virtualization_info(st);
2018
  } else if (vrt == HyperV) {
2019
    st->print_cr("Hyper-V virtualization detected");
2020
  } else if (vrt == HyperVRole) {
2021
    st->print_cr("Hyper-V role detected");
2022
  }
2023
}
2024

2025
bool VM_Version::compute_has_intel_jcc_erratum() {
2026
  if (!is_intel_family_core()) {
2027
    // Only Intel CPUs are affected.
2028
    return false;
2029
  }
2030
  // The following table of affected CPUs is based on the following document released by Intel:
2031
  // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2032
  switch (_model) {
2033
  case 0x8E:
2034
    // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2035
    // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2036
    // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2037
    // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2038
    // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2039
    // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2040
    // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2041
    // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2042
    // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2043
    return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2044
  case 0x4E:
2045
    // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2046
    // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2047
    // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2048
    return _stepping == 0x3;
2049
  case 0x55:
2050
    // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2051
    // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2052
    // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2053
    // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2054
    // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2055
    // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2056
    return _stepping == 0x4 || _stepping == 0x7;
2057
  case 0x5E:
2058
    // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2059
    // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2060
    return _stepping == 0x3;
2061
  case 0x9E:
2062
    // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2063
    // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2064
    // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2065
    // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2066
    // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2067
    // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2068
    // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2069
    // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2070
    // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2071
    // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2072
    // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2073
    // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2074
    // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2075
    // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2076
    return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2077
  case 0xA5:
2078
    // Not in Intel documentation.
2079
    // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2080
    return true;
2081
  case 0xA6:
2082
    // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2083
    return _stepping == 0x0;
2084
  case 0xAE:
2085
    // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2086
    return _stepping == 0xA;
2087
  default:
2088
    // If we are running on another intel machine not recognized in the table, we are okay.
2089
    return false;
2090
  }
2091
}
2092

2093
// On Xen, the cpuid instruction returns
2094
//  eax / registers[0]: Version of Xen
2095
//  ebx / registers[1]: chars 'XenV'
2096
//  ecx / registers[2]: chars 'MMXe'
2097
//  edx / registers[3]: chars 'nVMM'
2098
//
2099
// On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2100
//  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2101
//  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2102
//  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2103
//
2104
// more information :
2105
// https://kb.vmware.com/s/article/1009458
2106
//
2107
void VM_Version::check_virtualizations() {
2108
  uint32_t registers[4] = {0};
2109
  char signature[13] = {0};
2110

2111
  // Xen cpuid leaves can be found 0x100 aligned boundary starting
2112
  // from 0x40000000 until 0x40010000.
2113
  //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2114
  for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2115
    detect_virt_stub(leaf, registers);
2116
    memcpy(signature, &registers[1], 12);
2117

2118
    if (strncmp("VMwareVMware", signature, 12) == 0) {
2119
      Abstract_VM_Version::_detected_virtualization = VMWare;
2120
      // check for extended metrics from guestlib
2121
      VirtualizationSupport::initialize();
2122
    } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2123
      Abstract_VM_Version::_detected_virtualization = HyperV;
2124
#ifdef _WINDOWS
2125
      // CPUID leaf 0x40000007 is available to the root partition only.
2126
      // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2127
      //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2128
      detect_virt_stub(0x40000007, registers);
2129
      if ((registers[0] != 0x0) ||
2130
          (registers[1] != 0x0) ||
2131
          (registers[2] != 0x0) ||
2132
          (registers[3] != 0x0)) {
2133
        Abstract_VM_Version::_detected_virtualization = HyperVRole;
2134
      }
2135
#endif
2136
    } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2137
      Abstract_VM_Version::_detected_virtualization = KVM;
2138
    } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2139
      Abstract_VM_Version::_detected_virtualization = XenHVM;
2140
    }
2141
  }
2142
}
2143

2144
#ifdef COMPILER2
2145
// Determine if it's running on Cascade Lake using default options.
2146
bool VM_Version::is_default_intel_cascade_lake() {
2147
  return FLAG_IS_DEFAULT(UseAVX) &&
2148
         FLAG_IS_DEFAULT(MaxVectorSize) &&
2149
         UseAVX > 2 &&
2150
         is_intel_cascade_lake();
2151
}
2152
#endif
2153

2154
bool VM_Version::is_intel_cascade_lake() {
2155
  return is_intel_skylake() && _stepping >= 5;
2156
}
2157

2158
// avx3_threshold() sets the threshold at which 64-byte instructions are used
2159
// for implementing the array copy and clear operations.
2160
// The Intel platforms that supports the serialize instruction
2161
// has improved implementation of 64-byte load/stores and so the default
2162
// threshold is set to 0 for these platforms.
2163
int VM_Version::avx3_threshold() {
2164
  return (is_intel_family_core() &&
2165
          supports_serialize() &&
2166
          FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2167
}
2168

2169
#if defined(_LP64)
2170
void VM_Version::clear_apx_test_state() {
2171
  clear_apx_test_state_stub();
2172
}
2173
#endif
2174

2175
static bool _vm_version_initialized = false;
2176

2177
void VM_Version::initialize() {
2178
  ResourceMark rm;
2179
  // Making this stub must be FIRST use of assembler
2180
  stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2181
  if (stub_blob == nullptr) {
2182
    vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2183
  }
2184
  CodeBuffer c(stub_blob);
2185
  VM_Version_StubGenerator g(&c);
2186

2187
  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2188
                                     g.generate_get_cpu_info());
2189
  detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2190
                                     g.generate_detect_virt());
2191

2192
#if defined(_LP64)
2193
  clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2194
                                     g.clear_apx_test_state());
2195
#endif
2196
  get_processor_features();
2197

2198
  LP64_ONLY(Assembler::precompute_instructions();)
2199

2200
  if (VM_Version::supports_hv()) { // Supports hypervisor
2201
    check_virtualizations();
2202
  }
2203
  _vm_version_initialized = true;
2204
}
2205

2206
typedef enum {
2207
   CPU_FAMILY_8086_8088  = 0,
2208
   CPU_FAMILY_INTEL_286  = 2,
2209
   CPU_FAMILY_INTEL_386  = 3,
2210
   CPU_FAMILY_INTEL_486  = 4,
2211
   CPU_FAMILY_PENTIUM    = 5,
2212
   CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2213
   CPU_FAMILY_PENTIUM_4  = 0xF
2214
} FamilyFlag;
2215

2216
typedef enum {
2217
  RDTSCP_FLAG  = 0x08000000, // bit 27
2218
  INTEL64_FLAG = 0x20000000  // bit 29
2219
} _featureExtendedEdxFlag;
2220

2221
typedef enum {
2222
   FPU_FLAG     = 0x00000001,
2223
   VME_FLAG     = 0x00000002,
2224
   DE_FLAG      = 0x00000004,
2225
   PSE_FLAG     = 0x00000008,
2226
   TSC_FLAG     = 0x00000010,
2227
   MSR_FLAG     = 0x00000020,
2228
   PAE_FLAG     = 0x00000040,
2229
   MCE_FLAG     = 0x00000080,
2230
   CX8_FLAG     = 0x00000100,
2231
   APIC_FLAG    = 0x00000200,
2232
   SEP_FLAG     = 0x00000800,
2233
   MTRR_FLAG    = 0x00001000,
2234
   PGE_FLAG     = 0x00002000,
2235
   MCA_FLAG     = 0x00004000,
2236
   CMOV_FLAG    = 0x00008000,
2237
   PAT_FLAG     = 0x00010000,
2238
   PSE36_FLAG   = 0x00020000,
2239
   PSNUM_FLAG   = 0x00040000,
2240
   CLFLUSH_FLAG = 0x00080000,
2241
   DTS_FLAG     = 0x00200000,
2242
   ACPI_FLAG    = 0x00400000,
2243
   MMX_FLAG     = 0x00800000,
2244
   FXSR_FLAG    = 0x01000000,
2245
   SSE_FLAG     = 0x02000000,
2246
   SSE2_FLAG    = 0x04000000,
2247
   SS_FLAG      = 0x08000000,
2248
   HTT_FLAG     = 0x10000000,
2249
   TM_FLAG      = 0x20000000
2250
} FeatureEdxFlag;
2251

2252
static BufferBlob* cpuid_brand_string_stub_blob;
2253
static const int   cpuid_brand_string_stub_size = 550;
2254

2255
extern "C" {
2256
  typedef void (*getCPUIDBrandString_stub_t)(void*);
2257
}
2258

2259
static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2260

2261
// VM_Version statics
2262
enum {
2263
  ExtendedFamilyIdLength_INTEL = 16,
2264
  ExtendedFamilyIdLength_AMD   = 24
2265
};
2266

2267
const size_t VENDOR_LENGTH = 13;
2268
const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2269
static char* _cpu_brand_string = nullptr;
2270
static int64_t _max_qualified_cpu_frequency = 0;
2271

2272
static int _no_of_threads = 0;
2273
static int _no_of_cores = 0;
2274

2275
const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2276
  "8086/8088",
2277
  "",
2278
  "286",
2279
  "386",
2280
  "486",
2281
  "Pentium",
2282
  "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2283
  "",
2284
  "",
2285
  "",
2286
  "",
2287
  "",
2288
  "",
2289
  "",
2290
  "",
2291
  "Pentium 4"
2292
};
2293

2294
const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2295
  "",
2296
  "",
2297
  "",
2298
  "",
2299
  "5x86",
2300
  "K5/K6",
2301
  "Athlon/AthlonXP",
2302
  "",
2303
  "",
2304
  "",
2305
  "",
2306
  "",
2307
  "",
2308
  "",
2309
  "",
2310
  "Opteron/Athlon64",
2311
  "Opteron QC/Phenom",  // Barcelona et.al.
2312
  "",
2313
  "",
2314
  "",
2315
  "",
2316
  "",
2317
  "",
2318
  "Zen"
2319
};
2320
// Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2321
// September 2013, Vol 3C Table 35-1
2322
const char* const _model_id_pentium_pro[] = {
2323
  "",
2324
  "Pentium Pro",
2325
  "",
2326
  "Pentium II model 3",
2327
  "",
2328
  "Pentium II model 5/Xeon/Celeron",
2329
  "Celeron",
2330
  "Pentium III/Pentium III Xeon",
2331
  "Pentium III/Pentium III Xeon",
2332
  "Pentium M model 9",    // Yonah
2333
  "Pentium III, model A",
2334
  "Pentium III, model B",
2335
  "",
2336
  "Pentium M model D",    // Dothan
2337
  "",
2338
  "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2339
  "",
2340
  "",
2341
  "",
2342
  "",
2343
  "",
2344
  "",
2345
  "Celeron",              // 0x16 Celeron 65nm
2346
  "Core 2",               // 0x17 Penryn / Harpertown
2347
  "",
2348
  "",
2349
  "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2350
  "Atom",                 // 0x1B Z5xx series Silverthorn
2351
  "",
2352
  "Core 2",               // 0x1D Dunnington (6-core)
2353
  "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2354
  "",
2355
  "",
2356
  "",
2357
  "",
2358
  "",
2359
  "",
2360
  "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2361
  "",
2362
  "",
2363
  "",                     // 0x28
2364
  "",
2365
  "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2366
  "",
2367
  "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2368
  "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2369
  "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2370
  "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2371
  "",
2372
  "",
2373
  "",
2374
  "",
2375
  "",
2376
  "",
2377
  "",
2378
  "",
2379
  "",
2380
  "",
2381
  "Ivy Bridge",           // 0x3a
2382
  "",
2383
  "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2384
  "",                     // 0x3d "Next Generation Intel Core Processor"
2385
  "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2386
  "",                     // 0x3f "Future Generation Intel Xeon Processor"
2387
  "",
2388
  "",
2389
  "",
2390
  "",
2391
  "",
2392
  "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2393
  "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2394
  nullptr
2395
};
2396

2397
/* Brand ID is for back compatibility
2398
 * Newer CPUs uses the extended brand string */
2399
const char* const _brand_id[] = {
2400
  "",
2401
  "Celeron processor",
2402
  "Pentium III processor",
2403
  "Intel Pentium III Xeon processor",
2404
  "",
2405
  "",
2406
  "",
2407
  "",
2408
  "Intel Pentium 4 processor",
2409
  nullptr
2410
};
2411

2412

2413
const char* const _feature_edx_id[] = {
2414
  "On-Chip FPU",
2415
  "Virtual Mode Extensions",
2416
  "Debugging Extensions",
2417
  "Page Size Extensions",
2418
  "Time Stamp Counter",
2419
  "Model Specific Registers",
2420
  "Physical Address Extension",
2421
  "Machine Check Exceptions",
2422
  "CMPXCHG8B Instruction",
2423
  "On-Chip APIC",
2424
  "",
2425
  "Fast System Call",
2426
  "Memory Type Range Registers",
2427
  "Page Global Enable",
2428
  "Machine Check Architecture",
2429
  "Conditional Mov Instruction",
2430
  "Page Attribute Table",
2431
  "36-bit Page Size Extension",
2432
  "Processor Serial Number",
2433
  "CLFLUSH Instruction",
2434
  "",
2435
  "Debug Trace Store feature",
2436
  "ACPI registers in MSR space",
2437
  "Intel Architecture MMX Technology",
2438
  "Fast Float Point Save and Restore",
2439
  "Streaming SIMD extensions",
2440
  "Streaming SIMD extensions 2",
2441
  "Self-Snoop",
2442
  "Hyper Threading",
2443
  "Thermal Monitor",
2444
  "",
2445
  "Pending Break Enable"
2446
};
2447

2448
const char* const _feature_extended_edx_id[] = {
2449
  "",
2450
  "",
2451
  "",
2452
  "",
2453
  "",
2454
  "",
2455
  "",
2456
  "",
2457
  "",
2458
  "",
2459
  "",
2460
  "SYSCALL/SYSRET",
2461
  "",
2462
  "",
2463
  "",
2464
  "",
2465
  "",
2466
  "",
2467
  "",
2468
  "",
2469
  "Execute Disable Bit",
2470
  "",
2471
  "",
2472
  "",
2473
  "",
2474
  "",
2475
  "",
2476
  "RDTSCP",
2477
  "",
2478
  "Intel 64 Architecture",
2479
  "",
2480
  ""
2481
};
2482

2483
const char* const _feature_ecx_id[] = {
2484
  "Streaming SIMD Extensions 3",
2485
  "PCLMULQDQ",
2486
  "64-bit DS Area",
2487
  "MONITOR/MWAIT instructions",
2488
  "CPL Qualified Debug Store",
2489
  "Virtual Machine Extensions",
2490
  "Safer Mode Extensions",
2491
  "Enhanced Intel SpeedStep technology",
2492
  "Thermal Monitor 2",
2493
  "Supplemental Streaming SIMD Extensions 3",
2494
  "L1 Context ID",
2495
  "",
2496
  "Fused Multiply-Add",
2497
  "CMPXCHG16B",
2498
  "xTPR Update Control",
2499
  "Perfmon and Debug Capability",
2500
  "",
2501
  "Process-context identifiers",
2502
  "Direct Cache Access",
2503
  "Streaming SIMD extensions 4.1",
2504
  "Streaming SIMD extensions 4.2",
2505
  "x2APIC",
2506
  "MOVBE",
2507
  "Popcount instruction",
2508
  "TSC-Deadline",
2509
  "AESNI",
2510
  "XSAVE",
2511
  "OSXSAVE",
2512
  "AVX",
2513
  "F16C",
2514
  "RDRAND",
2515
  ""
2516
};
2517

2518
const char* const _feature_extended_ecx_id[] = {
2519
  "LAHF/SAHF instruction support",
2520
  "Core multi-processor legacy mode",
2521
  "",
2522
  "",
2523
  "",
2524
  "Advanced Bit Manipulations: LZCNT",
2525
  "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2526
  "Misaligned SSE mode",
2527
  "",
2528
  "",
2529
  "",
2530
  "",
2531
  "",
2532
  "",
2533
  "",
2534
  "",
2535
  "",
2536
  "",
2537
  "",
2538
  "",
2539
  "",
2540
  "",
2541
  "",
2542
  "",
2543
  "",
2544
  "",
2545
  "",
2546
  "",
2547
  "",
2548
  "",
2549
  "",
2550
  ""
2551
};
2552

2553
void VM_Version::initialize_tsc(void) {
2554
  ResourceMark rm;
2555

2556
  cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2557
  if (cpuid_brand_string_stub_blob == nullptr) {
2558
    vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2559
  }
2560
  CodeBuffer c(cpuid_brand_string_stub_blob);
2561
  VM_Version_StubGenerator g(&c);
2562
  getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2563
                                   g.generate_getCPUIDBrandString());
2564
}
2565

2566
const char* VM_Version::cpu_model_description(void) {
2567
  uint32_t cpu_family = extended_cpu_family();
2568
  uint32_t cpu_model = extended_cpu_model();
2569
  const char* model = nullptr;
2570

2571
  if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2572
    for (uint32_t i = 0; i <= cpu_model; i++) {
2573
      model = _model_id_pentium_pro[i];
2574
      if (model == nullptr) {
2575
        break;
2576
      }
2577
    }
2578
  }
2579
  return model;
2580
}
2581

2582
const char* VM_Version::cpu_brand_string(void) {
2583
  if (_cpu_brand_string == nullptr) {
2584
    _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2585
    if (nullptr == _cpu_brand_string) {
2586
      return nullptr;
2587
    }
2588
    int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2589
    if (ret_val != OS_OK) {
2590
      FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2591
      _cpu_brand_string = nullptr;
2592
    }
2593
  }
2594
  return _cpu_brand_string;
2595
}
2596

2597
const char* VM_Version::cpu_brand(void) {
2598
  const char*  brand  = nullptr;
2599

2600
  if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2601
    int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2602
    brand = _brand_id[0];
2603
    for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2604
      brand = _brand_id[i];
2605
    }
2606
  }
2607
  return brand;
2608
}
2609

2610
bool VM_Version::cpu_is_em64t(void) {
2611
  return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2612
}
2613

2614
bool VM_Version::is_netburst(void) {
2615
  return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2616
}
2617

2618
bool VM_Version::supports_tscinv_ext(void) {
2619
  if (!supports_tscinv_bit()) {
2620
    return false;
2621
  }
2622

2623
  if (is_intel()) {
2624
    return true;
2625
  }
2626

2627
  if (is_amd()) {
2628
    return !is_amd_Barcelona();
2629
  }
2630

2631
  if (is_hygon()) {
2632
    return true;
2633
  }
2634

2635
  return false;
2636
}
2637

2638
void VM_Version::resolve_cpu_information_details(void) {
2639

2640
  // in future we want to base this information on proper cpu
2641
  // and cache topology enumeration such as:
2642
  // Intel 64 Architecture Processor Topology Enumeration
2643
  // which supports system cpu and cache topology enumeration
2644
  // either using 2xAPICIDs or initial APICIDs
2645

2646
  // currently only rough cpu information estimates
2647
  // which will not necessarily reflect the exact configuration of the system
2648

2649
  // this is the number of logical hardware threads
2650
  // visible to the operating system
2651
  _no_of_threads = os::processor_count();
2652

2653
  // find out number of threads per cpu package
2654
  int threads_per_package = threads_per_core() * cores_per_cpu();
2655

2656
  // use amount of threads visible to the process in order to guess number of sockets
2657
  _no_of_sockets = _no_of_threads / threads_per_package;
2658

2659
  // process might only see a subset of the total number of threads
2660
  // from a single processor package. Virtualization/resource management for example.
2661
  // If so then just write a hard 1 as num of pkgs.
2662
  if (0 == _no_of_sockets) {
2663
    _no_of_sockets = 1;
2664
  }
2665

2666
  // estimate the number of cores
2667
  _no_of_cores = cores_per_cpu() * _no_of_sockets;
2668
}
2669

2670

2671
const char* VM_Version::cpu_family_description(void) {
2672
  int cpu_family_id = extended_cpu_family();
2673
  if (is_amd()) {
2674
    if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2675
      return _family_id_amd[cpu_family_id];
2676
    }
2677
  }
2678
  if (is_intel()) {
2679
    if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2680
      return cpu_model_description();
2681
    }
2682
    if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2683
      return _family_id_intel[cpu_family_id];
2684
    }
2685
  }
2686
  if (is_hygon()) {
2687
    return "Dhyana";
2688
  }
2689
  return "Unknown x86";
2690
}
2691

2692
int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2693
  assert(buf != nullptr, "buffer is null!");
2694
  assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2695

2696
  const char* cpu_type = nullptr;
2697
  const char* x64 = nullptr;
2698

2699
  if (is_intel()) {
2700
    cpu_type = "Intel";
2701
    x64 = cpu_is_em64t() ? " Intel64" : "";
2702
  } else if (is_amd()) {
2703
    cpu_type = "AMD";
2704
    x64 = cpu_is_em64t() ? " AMD64" : "";
2705
  } else if (is_hygon()) {
2706
    cpu_type = "Hygon";
2707
    x64 = cpu_is_em64t() ? " AMD64" : "";
2708
  } else {
2709
    cpu_type = "Unknown x86";
2710
    x64 = cpu_is_em64t() ? " x86_64" : "";
2711
  }
2712

2713
  jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2714
    cpu_type,
2715
    cpu_family_description(),
2716
    supports_ht() ? " (HT)" : "",
2717
    supports_sse3() ? " SSE3" : "",
2718
    supports_ssse3() ? " SSSE3" : "",
2719
    supports_sse4_1() ? " SSE4.1" : "",
2720
    supports_sse4_2() ? " SSE4.2" : "",
2721
    supports_sse4a() ? " SSE4A" : "",
2722
    is_netburst() ? " Netburst" : "",
2723
    is_intel_family_core() ? " Core" : "",
2724
    x64);
2725

2726
  return OS_OK;
2727
}
2728

2729
int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2730
  assert(buf != nullptr, "buffer is null!");
2731
  assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2732
  assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2733

2734
  // invoke newly generated asm code to fetch CPU Brand String
2735
  getCPUIDBrandString_stub(&_cpuid_info);
2736

2737
  // fetch results into buffer
2738
  *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2739
  *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2740
  *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2741
  *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2742
  *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2743
  *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2744
  *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2745
  *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2746
  *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2747
  *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2748
  *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2749
  *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2750

2751
  return OS_OK;
2752
}
2753

2754
size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2755
  guarantee(buf != nullptr, "buffer is null!");
2756
  guarantee(buf_len > 0, "buffer len not enough!");
2757

2758
  unsigned int flag = 0;
2759
  unsigned int fi = 0;
2760
  size_t       written = 0;
2761
  const char*  prefix = "";
2762

2763
#define WRITE_TO_BUF(string)                                                          \
2764
  {                                                                                   \
2765
    int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2766
    if (res < 0) {                                                                    \
2767
      return buf_len - 1;                                                             \
2768
    }                                                                                 \
2769
    written += res;                                                                   \
2770
    if (prefix[0] == '\0') {                                                          \
2771
      prefix = ", ";                                                                  \
2772
    }                                                                                 \
2773
  }
2774

2775
  for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2776
    if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2777
      continue; /* no hyperthreading */
2778
    } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2779
      continue; /* no fast system call */
2780
    }
2781
    if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2782
      WRITE_TO_BUF(_feature_edx_id[fi]);
2783
    }
2784
  }
2785

2786
  for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2787
    if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2788
      WRITE_TO_BUF(_feature_ecx_id[fi]);
2789
    }
2790
  }
2791

2792
  for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2793
    if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2794
      WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2795
    }
2796
  }
2797

2798
  for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2799
    if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2800
      WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2801
    }
2802
  }
2803

2804
  if (supports_tscinv_bit()) {
2805
      WRITE_TO_BUF("Invariant TSC");
2806
  }
2807

2808
  return written;
2809
}
2810

2811
/**
2812
 * Write a detailed description of the cpu to a given buffer, including
2813
 * feature set.
2814
 */
2815
int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2816
  assert(buf != nullptr, "buffer is null!");
2817
  assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2818

2819
  static const char* unknown = "<unknown>";
2820
  char               vendor_id[VENDOR_LENGTH];
2821
  const char*        family = nullptr;
2822
  const char*        model = nullptr;
2823
  const char*        brand = nullptr;
2824
  int                outputLen = 0;
2825

2826
  family = cpu_family_description();
2827
  if (family == nullptr) {
2828
    family = unknown;
2829
  }
2830

2831
  model = cpu_model_description();
2832
  if (model == nullptr) {
2833
    model = unknown;
2834
  }
2835

2836
  brand = cpu_brand_string();
2837

2838
  if (brand == nullptr) {
2839
    brand = cpu_brand();
2840
    if (brand == nullptr) {
2841
      brand = unknown;
2842
    }
2843
  }
2844

2845
  *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2846
  *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2847
  *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2848
  vendor_id[VENDOR_LENGTH-1] = '\0';
2849

2850
  outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2851
    "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2852
    "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2853
    "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2854
    "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2855
    "Supports: ",
2856
    brand,
2857
    vendor_id,
2858
    family,
2859
    extended_cpu_family(),
2860
    model,
2861
    extended_cpu_model(),
2862
    cpu_stepping(),
2863
    _cpuid_info.std_cpuid1_eax.bits.ext_family,
2864
    _cpuid_info.std_cpuid1_eax.bits.ext_model,
2865
    _cpuid_info.std_cpuid1_eax.bits.proc_type,
2866
    _cpuid_info.std_cpuid1_eax.value,
2867
    _cpuid_info.std_cpuid1_ebx.value,
2868
    _cpuid_info.std_cpuid1_ecx.value,
2869
    _cpuid_info.std_cpuid1_edx.value,
2870
    _cpuid_info.ext_cpuid1_eax,
2871
    _cpuid_info.ext_cpuid1_ebx,
2872
    _cpuid_info.ext_cpuid1_ecx,
2873
    _cpuid_info.ext_cpuid1_edx);
2874

2875
  if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2876
    if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2877
    return OS_ERR;
2878
  }
2879

2880
  cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2881

2882
  return OS_OK;
2883
}
2884

2885

2886
// Fill in Abstract_VM_Version statics
2887
void VM_Version::initialize_cpu_information() {
2888
  assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2889
  assert(!_initialized, "shouldn't be initialized yet");
2890
  resolve_cpu_information_details();
2891

2892
  // initialize cpu_name and cpu_desc
2893
  cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2894
  cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2895
  _initialized = true;
2896
}
2897

2898
/**
2899
 *  For information about extracting the frequency from the cpu brand string, please see:
2900
 *
2901
 *    Intel Processor Identification and the CPUID Instruction
2902
 *    Application Note 485
2903
 *    May 2012
2904
 *
2905
 * The return value is the frequency in Hz.
2906
 */
2907
int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2908
  const char* const brand_string = cpu_brand_string();
2909
  if (brand_string == nullptr) {
2910
    return 0;
2911
  }
2912
  const int64_t MEGA = 1000000;
2913
  int64_t multiplier = 0;
2914
  int64_t frequency = 0;
2915
  uint8_t idx = 0;
2916
  // The brand string buffer is at most 48 bytes.
2917
  // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2918
  for (; idx < 48-2; ++idx) {
2919
    // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2920
    // Search brand string for "yHz" where y is M, G, or T.
2921
    if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2922
      if (brand_string[idx] == 'M') {
2923
        multiplier = MEGA;
2924
      } else if (brand_string[idx] == 'G') {
2925
        multiplier = MEGA * 1000;
2926
      } else if (brand_string[idx] == 'T') {
2927
        multiplier = MEGA * MEGA;
2928
      }
2929
      break;
2930
    }
2931
  }
2932
  if (multiplier > 0) {
2933
    // Compute frequency (in Hz) from brand string.
2934
    if (brand_string[idx-3] == '.') { // if format is "x.xx"
2935
      frequency =  (brand_string[idx-4] - '0') * multiplier;
2936
      frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2937
      frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2938
    } else { // format is "xxxx"
2939
      frequency =  (brand_string[idx-4] - '0') * 1000;
2940
      frequency += (brand_string[idx-3] - '0') * 100;
2941
      frequency += (brand_string[idx-2] - '0') * 10;
2942
      frequency += (brand_string[idx-1] - '0');
2943
      frequency *= multiplier;
2944
    }
2945
  }
2946
  return frequency;
2947
}
2948

2949

2950
int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2951
  if (_max_qualified_cpu_frequency == 0) {
2952
    _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2953
  }
2954
  return _max_qualified_cpu_frequency;
2955
}
2956

2957
uint64_t VM_Version::CpuidInfo::feature_flags() const {
2958
  uint64_t result = 0;
2959
  if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2960
    result |= CPU_CX8;
2961
  if (std_cpuid1_edx.bits.cmov != 0)
2962
    result |= CPU_CMOV;
2963
  if (std_cpuid1_edx.bits.clflush != 0)
2964
    result |= CPU_FLUSH;
2965
#ifdef _LP64
2966
  // clflush should always be available on x86_64
2967
  // if not we are in real trouble because we rely on it
2968
  // to flush the code cache.
2969
  assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2970
#endif
2971
  if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2972
      ext_cpuid1_edx.bits.fxsr != 0))
2973
    result |= CPU_FXSR;
2974
  // HT flag is set for multi-core processors also.
2975
  if (threads_per_core() > 1)
2976
    result |= CPU_HT;
2977
  if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2978
      ext_cpuid1_edx.bits.mmx != 0))
2979
    result |= CPU_MMX;
2980
  if (std_cpuid1_edx.bits.sse != 0)
2981
    result |= CPU_SSE;
2982
  if (std_cpuid1_edx.bits.sse2 != 0)
2983
    result |= CPU_SSE2;
2984
  if (std_cpuid1_ecx.bits.sse3 != 0)
2985
    result |= CPU_SSE3;
2986
  if (std_cpuid1_ecx.bits.ssse3 != 0)
2987
    result |= CPU_SSSE3;
2988
  if (std_cpuid1_ecx.bits.sse4_1 != 0)
2989
    result |= CPU_SSE4_1;
2990
  if (std_cpuid1_ecx.bits.sse4_2 != 0)
2991
    result |= CPU_SSE4_2;
2992
  if (std_cpuid1_ecx.bits.popcnt != 0)
2993
    result |= CPU_POPCNT;
2994
  if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2995
      xem_xcr0_eax.bits.apx_f != 0) {
2996
    result |= CPU_APX_F;
2997
  }
2998
  if (std_cpuid1_ecx.bits.avx != 0 &&
2999
      std_cpuid1_ecx.bits.osxsave != 0 &&
3000
      xem_xcr0_eax.bits.sse != 0 &&
3001
      xem_xcr0_eax.bits.ymm != 0) {
3002
    result |= CPU_AVX;
3003
    result |= CPU_VZEROUPPER;
3004
    if (std_cpuid1_ecx.bits.f16c != 0)
3005
      result |= CPU_F16C;
3006
    if (sef_cpuid7_ebx.bits.avx2 != 0) {
3007
      result |= CPU_AVX2;
3008
      if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3009
        result |= CPU_AVX_IFMA;
3010
    }
3011
    if (sef_cpuid7_ecx.bits.gfni != 0)
3012
        result |= CPU_GFNI;
3013
    if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3014
        xem_xcr0_eax.bits.opmask != 0 &&
3015
        xem_xcr0_eax.bits.zmm512 != 0 &&
3016
        xem_xcr0_eax.bits.zmm32 != 0) {
3017
      result |= CPU_AVX512F;
3018
      if (sef_cpuid7_ebx.bits.avx512cd != 0)
3019
        result |= CPU_AVX512CD;
3020
      if (sef_cpuid7_ebx.bits.avx512dq != 0)
3021
        result |= CPU_AVX512DQ;
3022
      if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3023
        result |= CPU_AVX512_IFMA;
3024
      if (sef_cpuid7_ebx.bits.avx512pf != 0)
3025
        result |= CPU_AVX512PF;
3026
      if (sef_cpuid7_ebx.bits.avx512er != 0)
3027
        result |= CPU_AVX512ER;
3028
      if (sef_cpuid7_ebx.bits.avx512bw != 0)
3029
        result |= CPU_AVX512BW;
3030
      if (sef_cpuid7_ebx.bits.avx512vl != 0)
3031
        result |= CPU_AVX512VL;
3032
      if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3033
        result |= CPU_AVX512_VPOPCNTDQ;
3034
      if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3035
        result |= CPU_AVX512_VPCLMULQDQ;
3036
      if (sef_cpuid7_ecx.bits.vaes != 0)
3037
        result |= CPU_AVX512_VAES;
3038
      if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3039
        result |= CPU_AVX512_VNNI;
3040
      if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3041
        result |= CPU_AVX512_BITALG;
3042
      if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3043
        result |= CPU_AVX512_VBMI;
3044
      if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3045
        result |= CPU_AVX512_VBMI2;
3046
    }
3047
  }
3048
  if (std_cpuid1_ecx.bits.hv != 0)
3049
    result |= CPU_HV;
3050
  if (sef_cpuid7_ebx.bits.bmi1 != 0)
3051
    result |= CPU_BMI1;
3052
  if (std_cpuid1_edx.bits.tsc != 0)
3053
    result |= CPU_TSC;
3054
  if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3055
    result |= CPU_TSCINV_BIT;
3056
  if (std_cpuid1_ecx.bits.aes != 0)
3057
    result |= CPU_AES;
3058
  if (sef_cpuid7_ebx.bits.erms != 0)
3059
    result |= CPU_ERMS;
3060
  if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3061
    result |= CPU_FSRM;
3062
  if (std_cpuid1_ecx.bits.clmul != 0)
3063
    result |= CPU_CLMUL;
3064
  if (sef_cpuid7_ebx.bits.rtm != 0)
3065
    result |= CPU_RTM;
3066
  if (sef_cpuid7_ebx.bits.adx != 0)
3067
     result |= CPU_ADX;
3068
  if (sef_cpuid7_ebx.bits.bmi2 != 0)
3069
    result |= CPU_BMI2;
3070
  if (sef_cpuid7_ebx.bits.sha != 0)
3071
    result |= CPU_SHA;
3072
  if (std_cpuid1_ecx.bits.fma != 0)
3073
    result |= CPU_FMA;
3074
  if (sef_cpuid7_ebx.bits.clflushopt != 0)
3075
    result |= CPU_FLUSHOPT;
3076
  if (ext_cpuid1_edx.bits.rdtscp != 0)
3077
    result |= CPU_RDTSCP;
3078
  if (sef_cpuid7_ecx.bits.rdpid != 0)
3079
    result |= CPU_RDPID;
3080

3081
  // AMD|Hygon features.
3082
  if (is_amd_family()) {
3083
    if ((ext_cpuid1_edx.bits.tdnow != 0) ||
3084
        (ext_cpuid1_ecx.bits.prefetchw != 0))
3085
      result |= CPU_3DNOW_PREFETCH;
3086
    if (ext_cpuid1_ecx.bits.lzcnt != 0)
3087
      result |= CPU_LZCNT;
3088
    if (ext_cpuid1_ecx.bits.sse4a != 0)
3089
      result |= CPU_SSE4A;
3090
  }
3091

3092
  // Intel features.
3093
  if (is_intel()) {
3094
    if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3095
      result |= CPU_LZCNT;
3096
    }
3097
    if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3098
      result |= CPU_3DNOW_PREFETCH;
3099
    }
3100
    if (sef_cpuid7_ebx.bits.clwb != 0) {
3101
      result |= CPU_CLWB;
3102
    }
3103
    if (sef_cpuid7_edx.bits.serialize != 0)
3104
      result |= CPU_SERIALIZE;
3105
  }
3106

3107
  // ZX features.
3108
  if (is_zx()) {
3109
    if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3110
      result |= CPU_LZCNT;
3111
    }
3112
    if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3113
      result |= CPU_3DNOW_PREFETCH;
3114
    }
3115
  }
3116

3117
  // Protection key features.
3118
  if (sef_cpuid7_ecx.bits.pku != 0) {
3119
    result |= CPU_PKU;
3120
  }
3121
  if (sef_cpuid7_ecx.bits.ospke != 0) {
3122
    result |= CPU_OSPKE;
3123
  }
3124

3125
  // Control flow enforcement (CET) features.
3126
  if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3127
    result |= CPU_CET_SS;
3128
  }
3129
  if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3130
    result |= CPU_CET_IBT;
3131
  }
3132

3133
  // Composite features.
3134
  if (supports_tscinv_bit() &&
3135
      ((is_amd_family() && !is_amd_Barcelona()) ||
3136
       is_intel_tsc_synched_at_init())) {
3137
    result |= CPU_TSCINV;
3138
  }
3139

3140
  return result;
3141
}
3142

3143
bool VM_Version::os_supports_avx_vectors() {
3144
  bool retVal = false;
3145
  int nreg = 2 LP64_ONLY(+2);
3146
  if (supports_evex()) {
3147
    // Verify that OS save/restore all bits of EVEX registers
3148
    // during signal processing.
3149
    retVal = true;
3150
    for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3151
      if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3152
        retVal = false;
3153
        break;
3154
      }
3155
    }
3156
  } else if (supports_avx()) {
3157
    // Verify that OS save/restore all bits of AVX registers
3158
    // during signal processing.
3159
    retVal = true;
3160
    for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3161
      if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3162
        retVal = false;
3163
        break;
3164
      }
3165
    }
3166
    // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3167
    if (retVal == false) {
3168
      // Verify that OS save/restore all bits of EVEX registers
3169
      // during signal processing.
3170
      retVal = true;
3171
      for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3172
        if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3173
          retVal = false;
3174
          break;
3175
        }
3176
      }
3177
    }
3178
  }
3179
  return retVal;
3180
}
3181

3182
bool VM_Version::os_supports_apx_egprs() {
3183
  if (!supports_apx_f()) {
3184
    return false;
3185
  }
3186
  // Enable APX support for product builds after
3187
  // completion of planned features listed in JDK-8329030.
3188
#if !defined(PRODUCT)
3189
  if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3190
      _cpuid_info.apx_save[1] != egpr_test_value()) {
3191
    return false;
3192
  }
3193
  return true;
3194
#else
3195
  return false;
3196
#endif
3197
}
3198

3199
uint VM_Version::cores_per_cpu() {
3200
  uint result = 1;
3201
  if (is_intel()) {
3202
    bool supports_topology = supports_processor_topology();
3203
    if (supports_topology) {
3204
      result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3205
               _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3206
    }
3207
    if (!supports_topology || result == 0) {
3208
      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3209
    }
3210
  } else if (is_amd_family()) {
3211
    result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3212
  } else if (is_zx()) {
3213
    bool supports_topology = supports_processor_topology();
3214
    if (supports_topology) {
3215
      result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3216
               _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3217
    }
3218
    if (!supports_topology || result == 0) {
3219
      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3220
    }
3221
  }
3222
  return result;
3223
}
3224

3225
uint VM_Version::threads_per_core() {
3226
  uint result = 1;
3227
  if (is_intel() && supports_processor_topology()) {
3228
    result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3229
  } else if (is_zx() && supports_processor_topology()) {
3230
    result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3231
  } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3232
    if (cpu_family() >= 0x17) {
3233
      result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3234
    } else {
3235
      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3236
                 cores_per_cpu();
3237
    }
3238
  }
3239
  return (result == 0 ? 1 : result);
3240
}
3241

3242
uint VM_Version::L1_line_size() {
3243
  uint result = 0;
3244
  if (is_intel()) {
3245
    result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3246
  } else if (is_amd_family()) {
3247
    result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3248
  } else if (is_zx()) {
3249
    result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3250
  }
3251
  if (result < 32) // not defined ?
3252
    result = 32;   // 32 bytes by default on x86 and other x64
3253
  return result;
3254
}
3255

3256
bool VM_Version::is_intel_tsc_synched_at_init() {
3257
  if (is_intel_family_core()) {
3258
    uint32_t ext_model = extended_cpu_model();
3259
    if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3260
        ext_model == CPU_MODEL_WESTMERE_EP    ||
3261
        ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3262
        ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3263
      // <= 2-socket invariant tsc support. EX versions are usually used
3264
      // in > 2-socket systems and likely don't synchronize tscs at
3265
      // initialization.
3266
      // Code that uses tsc values must be prepared for them to arbitrarily
3267
      // jump forward or backward.
3268
      return true;
3269
    }
3270
  }
3271
  return false;
3272
}
3273

3274
int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3275
  // Hardware prefetching (distance/size in bytes):
3276
  // Pentium 3 -  64 /  32
3277
  // Pentium 4 - 256 / 128
3278
  // Athlon    -  64 /  32 ????
3279
  // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3280
  // Core      - 128 /  64
3281
  //
3282
  // Software prefetching (distance in bytes / instruction with best score):
3283
  // Pentium 3 - 128 / prefetchnta
3284
  // Pentium 4 - 512 / prefetchnta
3285
  // Athlon    - 128 / prefetchnta
3286
  // Opteron   - 256 / prefetchnta
3287
  // Core      - 256 / prefetchnta
3288
  // It will be used only when AllocatePrefetchStyle > 0
3289

3290
  if (is_amd_family()) { // AMD | Hygon
3291
    if (supports_sse2()) {
3292
      return 256; // Opteron
3293
    } else {
3294
      return 128; // Athlon
3295
    }
3296
  } else { // Intel
3297
    if (supports_sse3() && cpu_family() == 6) {
3298
      if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3299
        return 192;
3300
      } else if (use_watermark_prefetch) { // watermark prefetching on Core
3301
#ifdef _LP64
3302
        return 384;
3303
#else
3304
        return 320;
3305
#endif
3306
      }
3307
    }
3308
    if (supports_sse2()) {
3309
      if (cpu_family() == 6) {
3310
        return 256; // Pentium M, Core, Core2
3311
      } else {
3312
        return 512; // Pentium 4
3313
      }
3314
    } else {
3315
      return 128; // Pentium 3 (and all other old CPUs)
3316
    }
3317
  }
3318
}
3319

3320
bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3321
  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3322
  switch (id) {
3323
  case vmIntrinsics::_floatToFloat16:
3324
  case vmIntrinsics::_float16ToFloat:
3325
    if (!supports_float16()) {
3326
      return false;
3327
    }
3328
    break;
3329
  default:
3330
    break;
3331
  }
3332
  return true;
3333
}
3334

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.