2
// Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
3
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
// This code is free software; you can redistribute it and/or modify it
6
// under the terms of the GNU General Public License version 2 only, as
7
// published by the Free Software Foundation.
9
// This code is distributed in the hope that it will be useful, but WITHOUT
10
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
// version 2 for more details (a copy is included in the LICENSE file that
13
// accompanied this code).
15
// You should have received a copy of the GNU General Public License version
16
// 2 along with this work; if not, write to the Free Software Foundation,
17
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
// or visit www.oracle.com if you need additional information or have any
25
// X86 Architecture Description File
27
//----------REGISTER DEFINITION BLOCK------------------------------------------
28
// This information is used by the matcher and the register allocator to
29
// describe individual registers and classes of registers within the target
33
//----------Architecture Description Register Definitions----------------------
35
// "reg_def" name ( register save type, C convention save type,
36
// ideal register type, encoding );
37
// Register Save Types:
39
// NS = No-Save: The register allocator assumes that these registers
40
// can be used without saving upon entry to the method, &
41
// that they do not need to be saved at call sites.
43
// SOC = Save-On-Call: The register allocator assumes that these registers
44
// can be used without saving upon entry to the method,
45
// but that they must be saved at call sites.
47
// SOE = Save-On-Entry: The register allocator assumes that these registers
48
// must be saved before using them upon entry to the
49
// method, but they do not need to be saved at call
52
// AS = Always-Save: The register allocator assumes that these registers
53
// must be saved before using them upon entry to the
54
// method, & that they must be saved at call sites.
56
// Ideal Register Type is used to determine how to save & restore a
57
// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58
// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
60
// The encoding number is the actual bit-pattern placed into the opcodes.
63
// Previously set EBX, ESI, and EDI as save-on-entry for java code
64
// Turn off SOE in java-code due to frequent use of uncommon-traps.
65
// Now that allocator is better, turn on ESI and EDI as SOE registers.
67
reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68
reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69
reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70
reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71
// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72
reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73
reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74
reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75
reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
77
// Float registers. We treat TOS/FPR0 special. It is invisible to the
78
// allocator, and only shows up in the encodings.
79
reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80
reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81
// Ok so here's the trick FPR1 is really st(0) except in the midst
82
// of emission of assembly for a machnode. During the emission the fpu stack
83
// is pushed making FPR1 == st(1) temporarily. However at any safepoint
84
// the stack will not have this element so FPR1 == st(0) from the
85
// oopMap viewpoint. This same weirdness with numbering causes
86
// instruction encoding to have to play games with the register
87
// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88
// where it does flt->flt moves to see an example
90
reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91
reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92
reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93
reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94
reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95
reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96
reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97
reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98
reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99
reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100
reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101
reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102
reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103
reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
105
// Empty fill registers, which are never used, but supply alignment to xmm regs
107
reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108
reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109
reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110
reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111
reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112
reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113
reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114
reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
116
// Specify priority of register selection within phases of register
117
// allocation. Highest priority is first. A useful heuristic is to
118
// give registers a low priority when they are required by machine
119
// instructions, like EAX and EDX. Registers which are used as
120
// pairs must fall on an even boundary (witness the FPR#L's in this list).
121
// For the Intel integer registers, the equivalent Long pairs are
122
// EDX:EAX, EBX:ECX, and EDI:EBP.
123
alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
124
FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125
FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126
FPR6L, FPR6H, FPR7L, FPR7H,
127
FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
130
//----------Architecture Description Register Classes--------------------------
131
// Several register classes are automatically defined based upon information in
132
// this architecture description.
133
// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
134
// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
136
// Class for no registers (empty set).
139
// Class for all registers
140
reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
141
// Class for all registers (excluding EBP)
142
reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
143
// Dynamic register class that selects at runtime between register classes
144
// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
145
// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
146
reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
148
// Class for general registers
149
reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
150
// Class for general registers (excluding EBP).
151
// It is also safe for use by tailjumps (we don't want to allocate in ebp).
152
// Used also if the PreserveFramePointer flag is true.
153
reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
154
// Dynamic register class that selects between int_reg and int_reg_no_ebp.
155
reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
157
// Class of "X" registers
158
reg_class int_x_reg(EBX, ECX, EDX, EAX);
160
// Class of registers that can appear in an address with no offset.
161
// EBP and ESP require an extra instruction byte for zero offset.
162
// Used in fast-unlock
163
reg_class p_reg(EDX, EDI, ESI, EBX);
165
// Class for general registers excluding ECX
166
reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
167
// Class for general registers excluding ECX (and EBP)
168
reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
169
// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
170
reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
172
// Class for general registers excluding EAX
173
reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
175
// Class for general registers excluding EAX and EBX.
176
reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
177
// Class for general registers excluding EAX and EBX (and EBP)
178
reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
179
// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
180
reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
182
// Class of EAX (for multiply and divide operations)
183
reg_class eax_reg(EAX);
185
// Class of EBX (for atomic add)
186
reg_class ebx_reg(EBX);
188
// Class of ECX (for shift and JCXZ operations and cmpLTMask)
189
reg_class ecx_reg(ECX);
191
// Class of EDX (for multiply and divide operations)
192
reg_class edx_reg(EDX);
194
// Class of EDI (for synchronization)
195
reg_class edi_reg(EDI);
197
// Class of ESI (for synchronization)
198
reg_class esi_reg(ESI);
200
// Singleton class for stack pointer
201
reg_class sp_reg(ESP);
203
// Singleton class for instruction pointer
204
// reg_class ip_reg(EIP);
206
// Class of integer register pairs
207
reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
208
// Class of integer register pairs (excluding EBP and EDI);
209
reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
210
// Dynamic register class that selects between long_reg and long_reg_no_ebp.
211
reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
213
// Class of integer register pairs that aligns with calling convention
214
reg_class eadx_reg( EAX,EDX );
215
reg_class ebcx_reg( ECX,EBX );
216
reg_class ebpd_reg( EBP,EDI );
218
// Not AX or DX, used in divides
219
reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
220
// Not AX or DX (and neither EBP), used in divides
221
reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
222
// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
223
reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
225
// Floating point registers. Notice FPR0 is not a choice.
226
// FPR0 is not ever allocated; we use clever encodings to fake
227
// a 2-address instructions out of Intels FP stack.
228
reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
230
reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
231
FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
234
reg_class fp_flt_reg0( FPR1L );
235
reg_class fp_dbl_reg0( FPR1L,FPR1H );
236
reg_class fp_dbl_reg1( FPR2L,FPR2H );
237
reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
238
FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
243
//----------SOURCE BLOCK-------------------------------------------------------
244
// This is a block of C++ code which provides values, functions, and
245
// definitions necessary in the rest of the architecture description
247
// Must be visible to the DFA in dfa_x86_32.cpp
248
extern bool is_operand_hi32_zero(Node* n);
252
#define RELOC_IMM32 Assembler::imm_operand
253
#define RELOC_DISP32 Assembler::disp32_operand
257
// How to find the high register of a Long pair, given the low register
258
#define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
259
#define HIGH_FROM_LOW_ENC(x) ((x)+2)
261
// These masks are used to provide 128-bit aligned bitmasks to the XMM
262
// instructions, to allow sign-masking or sign-bit flipping. They allow
263
// fast versions of NegF/NegD and AbsF/AbsD.
265
void reg_mask_init() {}
267
// Note: 'double' and 'long long' have 32-bits alignment on x86.
268
static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
269
// Use the expression (adr)&(~0xF) to provide 128-bits aligned address
270
// of 128-bits operands for SSE instructions.
271
jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
272
// Store the value to a 128-bits operand.
278
// Buffer for 128-bits masks used by SSE instructions.
279
static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
281
// Static initialization during VM startup.
282
static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
283
static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
284
static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
285
static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
287
// Offset hacking within calls.
288
static int pre_call_resets_size() {
290
Compile* C = Compile::current();
291
if (C->in_24_bit_fp_mode()) {
294
if (VM_Version::supports_vzeroupper()) {
295
size += 3; // vzeroupper
300
// !!!!! Special hack to get all type of calls to specify the byte offset
301
// from the start of the call to the point where the return address
303
int MachCallStaticJavaNode::ret_addr_offset() {
304
return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points
307
int MachCallDynamicJavaNode::ret_addr_offset() {
308
return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points
311
static int sizeof_FFree_Float_Stack_All = -1;
313
int MachCallRuntimeNode::ret_addr_offset() {
314
assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
315
return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
319
// Compute padding required for nodes which need alignment
322
// The address of the call instruction needs to be 4-byte aligned to
323
// ensure that it does not span a cache line so that it can be patched.
324
int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
325
current_offset += pre_call_resets_size(); // skip fldcw, if any
326
current_offset += 1; // skip call opcode byte
327
return align_up(current_offset, alignment_required()) - current_offset;
330
// The address of the call instruction needs to be 4-byte aligned to
331
// ensure that it does not span a cache line so that it can be patched.
332
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
333
current_offset += pre_call_resets_size(); // skip fldcw, if any
334
current_offset += 5; // skip MOV instruction
335
current_offset += 1; // skip call opcode byte
336
return align_up(current_offset, alignment_required()) - current_offset;
340
void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
341
unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
346
void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
347
unsigned char c = (unsigned char)( f1 | f2 );
352
void emit_opcode(C2_MacroAssembler *masm, int code) {
353
__ emit_int8((unsigned char) code);
356
// EMIT_OPCODE() w/ relocation information
357
void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
358
__ relocate(__ inst_mark() + offset, reloc);
359
emit_opcode(masm, code);
363
void emit_d8(C2_MacroAssembler *masm, int d8) {
364
__ emit_int8((unsigned char) d8);
368
void emit_d16(C2_MacroAssembler *masm, int d16) {
373
void emit_d32(C2_MacroAssembler *masm, int d32) {
377
// emit 32 bit value and construct relocation entry from relocInfo::relocType
378
void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
380
__ relocate(__ inst_mark(), reloc, format);
384
// emit 32 bit value and construct relocation entry from RelocationHolder
385
void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
388
if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
389
assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
392
__ relocate(__ inst_mark(), rspec, format);
396
// Access stack slot for load or store
397
void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
398
emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src])
399
if( -128 <= disp && disp <= 127 ) {
400
emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte
401
emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte
402
emit_d8 (masm, disp); // Displacement // R/M byte
404
emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte
405
emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte
406
emit_d32(masm, disp); // Displacement // R/M byte
410
// rRegI ereg, memory mem) %{ // emit_reg_mem
411
void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
412
// There is no index & no scale, use form without SIB byte
413
if ((index == 0x4) &&
414
(scale == 0) && (base != ESP_enc)) {
415
// If no displacement, mode is 0x0; unless base is [EBP]
416
if ( (displace == 0) && (base != EBP_enc) ) {
417
emit_rm(masm, 0x0, reg_encoding, base);
419
else { // If 8-bit displacement, mode 0x1
420
if ((displace >= -128) && (displace <= 127)
421
&& (disp_reloc == relocInfo::none) ) {
422
emit_rm(masm, 0x1, reg_encoding, base);
423
emit_d8(masm, displace);
425
else { // If 32-bit displacement
426
if (base == -1) { // Special flag for absolute address
427
emit_rm(masm, 0x0, reg_encoding, 0x5);
428
// (manual lies; no SIB needed here)
429
if ( disp_reloc != relocInfo::none ) {
430
emit_d32_reloc(masm, displace, disp_reloc, 1);
432
emit_d32 (masm, displace);
435
else { // Normal base + offset
436
emit_rm(masm, 0x2, reg_encoding, base);
437
if ( disp_reloc != relocInfo::none ) {
438
emit_d32_reloc(masm, displace, disp_reloc, 1);
440
emit_d32 (masm, displace);
446
else { // Else, encode with the SIB byte
447
// If no displacement, mode is 0x0; unless base is [EBP]
448
if (displace == 0 && (base != EBP_enc)) { // If no displacement
449
emit_rm(masm, 0x0, reg_encoding, 0x4);
450
emit_rm(masm, scale, index, base);
452
else { // If 8-bit displacement, mode 0x1
453
if ((displace >= -128) && (displace <= 127)
454
&& (disp_reloc == relocInfo::none) ) {
455
emit_rm(masm, 0x1, reg_encoding, 0x4);
456
emit_rm(masm, scale, index, base);
457
emit_d8(masm, displace);
459
else { // If 32-bit displacement
461
emit_rm(masm, 0x2, reg_encoding, 0x4);
462
emit_rm(masm, scale, index, 0x04);
464
emit_rm(masm, 0x2, reg_encoding, 0x4);
465
emit_rm(masm, scale, index, base);
467
if ( disp_reloc != relocInfo::none ) {
468
emit_d32_reloc(masm, displace, disp_reloc, 1);
470
emit_d32 (masm, displace);
478
void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
479
if( dst_encoding == src_encoding ) {
480
// reg-reg copy, use an empty encoding
482
emit_opcode( masm, 0x8B );
483
emit_rm(masm, 0x3, dst_encoding, src_encoding );
487
void emit_cmpfp_fixup(MacroAssembler* masm) {
489
__ jccb(Assembler::noParity, exit);
492
// comiss/ucomiss instructions set ZF,PF,CF flags and
493
// zero OF,AF,SF for NaN values.
494
// Fixup flags by zeroing ZF,PF so that compare of NaN
495
// values returns 'less than' result (CF is set).
496
// Leave the rest of flags unchanged.
499
// |S|Z|r|A|r|P|r|C| (r - reserved bit)
500
// 0 0 1 0 1 0 1 1 (0x2B)
502
__ andl(Address(rsp, 0), 0xffffff2b);
507
static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
510
__ jcc(Assembler::parity, done);
511
__ jcc(Assembler::below, done);
512
__ setb(Assembler::notEqual, dst);
518
//=============================================================================
519
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
521
int ConstantTable::calculate_table_base_offset() const {
522
return 0; // absolute addressing, no offset
525
bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
526
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
527
ShouldNotReachHere();
530
void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
534
uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
539
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
540
st->print("# MachConstantBaseNode (empty encoding)");
545
//=============================================================================
547
void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
550
int framesize = C->output()->frame_size_in_bytes();
551
int bangsize = C->output()->bang_size_in_bytes();
552
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
553
// Remove wordSize for return addr which is already pushed.
554
framesize -= wordSize;
556
if (C->output()->need_stack_bang(bangsize)) {
557
framesize -= wordSize;
558
st->print("# stack bang (%d bytes)", bangsize);
560
st->print("PUSH EBP\t# Save EBP");
561
if (PreserveFramePointer) {
563
st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
567
st->print("SUB ESP, #%d\t# Create frame",framesize);
570
st->print("SUB ESP, #%d\t# Create frame",framesize);
572
framesize -= wordSize;
573
st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize);
574
if (PreserveFramePointer) {
576
st->print("MOV EBP, ESP\t# Save the caller's SP into EBP");
579
st->print("ADD EBP, #%d", framesize);
584
if (VerifyStackAtCalls) {
586
framesize -= wordSize;
587
st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
590
if( C->in_24_bit_fp_mode() ) {
592
st->print("FLDCW \t# load 24 bit fpu control word");
594
if (UseSSE >= 2 && VerifyFPU) {
596
st->print("# verify FPU stack (must be clean on entry)");
600
if (VerifyStackAtCalls) {
602
st->print("# stack alignment check");
610
void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
613
int framesize = C->output()->frame_size_in_bytes();
614
int bangsize = C->output()->bang_size_in_bytes();
616
__ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
618
C->output()->set_frame_complete(__ offset());
620
if (C->has_mach_constant_base_node()) {
621
// NOTE: We set the table base offset here because users might be
622
// emitted before MachConstantBaseNode.
623
ConstantTable& constant_table = C->output()->constant_table();
624
constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
628
uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
629
return MachNode::size(ra_); // too many variables; just compute it the hard way
632
int MachPrologNode::reloc() const {
633
return 0; // a large enough number
636
//=============================================================================
638
void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
640
int framesize = C->output()->frame_size_in_bytes();
641
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
642
// Remove two words for return addr and rbp,
643
framesize -= 2*wordSize;
645
if (C->max_vector_size() > 16) {
646
st->print("VZEROUPPER");
647
st->cr(); st->print("\t");
649
if (C->in_24_bit_fp_mode()) {
650
st->print("FLDCW standard control word");
651
st->cr(); st->print("\t");
654
st->print("ADD ESP,%d\t# Destroy frame",framesize);
655
st->cr(); st->print("\t");
657
st->print_cr("POPL EBP"); st->print("\t");
658
if (do_polling() && C->is_method_compilation()) {
659
st->print("CMPL rsp, poll_offset[thread] \n\t"
660
"JA #safepoint_stub\t"
661
"# Safepoint: poll for GC");
666
void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
669
if (C->max_vector_size() > 16) {
670
// Clear upper bits of YMM registers when current compiled code uses
671
// wide vectors to avoid AVX <-> SSE transition penalty during call.
674
// If method set FPU control word, restore to standard control word
675
if (C->in_24_bit_fp_mode()) {
676
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
679
int framesize = C->output()->frame_size_in_bytes();
680
assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
681
// Remove two words for return addr and rbp,
682
framesize -= 2*wordSize;
684
// Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
686
if (framesize >= 128) {
687
emit_opcode(masm, 0x81); // add SP, #framesize
688
emit_rm(masm, 0x3, 0x00, ESP_enc);
689
emit_d32(masm, framesize);
690
} else if (framesize) {
691
emit_opcode(masm, 0x83); // add SP, #framesize
692
emit_rm(masm, 0x3, 0x00, ESP_enc);
693
emit_d8(masm, framesize);
696
emit_opcode(masm, 0x58 | EBP_enc);
698
if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
699
__ reserved_stack_check();
702
if (do_polling() && C->is_method_compilation()) {
703
Register thread = as_Register(EBX_enc);
704
__ get_thread(thread);
706
Label* code_stub = &dummy_label;
707
if (!C->output()->in_scratch_emit_size()) {
708
C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
709
C->output()->add_stub(stub);
710
code_stub = &stub->entry();
713
__ relocate(relocInfo::poll_return_type);
714
__ clear_inst_mark();
715
__ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
719
uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
720
return MachNode::size(ra_); // too many variables; just compute it
724
int MachEpilogNode::reloc() const {
725
return 0; // a large enough number
728
const Pipeline * MachEpilogNode::pipeline() const {
729
return MachNode::pipeline_class();
732
//=============================================================================
734
enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
735
static enum RC rc_class( OptoReg::Name reg ) {
737
if( !OptoReg::is_valid(reg) ) return rc_bad;
738
if (OptoReg::is_stack(reg)) return rc_stack;
740
VMReg r = OptoReg::as_VMReg(reg);
741
if (r->is_Register()) return rc_int;
742
if (r->is_FloatRegister()) {
743
assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
746
if (r->is_KRegister()) return rc_kreg;
747
assert(r->is_XMMRegister(), "must be");
751
static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
752
int opcode, const char *op_str, int size, outputStream* st ) {
754
masm->set_inst_mark();
755
emit_opcode (masm, opcode );
756
encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
757
masm->clear_inst_mark();
759
} else if( !do_size ) {
760
if( size != 0 ) st->print("\n\t");
761
if( opcode == 0x8B || opcode == 0x89 ) { // MOV
762
if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
763
else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
764
} else { // FLD, FST, PUSH, POP
765
st->print("%s [ESP + #%d]",op_str,offset);
769
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
770
return size+3+offset_size;
773
// Helper for XMM registers. Extra opcode bits, limited syntax.
774
static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
775
int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
776
int in_size_in_bits = Assembler::EVEX_32bit;
777
int evex_encoding = 0;
778
if (reg_lo+1 == reg_hi) {
779
in_size_in_bits = Assembler::EVEX_64bit;
780
evex_encoding = Assembler::VEX_W;
783
// EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
784
// it maps more cases to single byte displacement
786
if (reg_lo+1 == reg_hi) { // double move?
788
__ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
790
__ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
794
__ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
796
__ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
800
} else if (!do_size) {
801
if (size != 0) st->print("\n\t");
802
if (reg_lo+1 == reg_hi) { // double move?
803
if (is_load) st->print("%s %s,[ESP + #%d]",
804
UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
805
Matcher::regName[reg_lo], offset);
806
else st->print("MOVSD [ESP + #%d],%s",
807
offset, Matcher::regName[reg_lo]);
809
if (is_load) st->print("MOVSS %s,[ESP + #%d]",
810
Matcher::regName[reg_lo], offset);
811
else st->print("MOVSS [ESP + #%d],%s",
812
offset, Matcher::regName[reg_lo]);
816
bool is_single_byte = false;
817
if ((UseAVX > 2) && (offset != 0)) {
818
is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
822
offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
824
offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
826
size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
827
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
828
return size+5+offset_size;
832
static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
833
int src_hi, int dst_hi, int size, outputStream* st ) {
835
// EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
837
if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
838
__ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
839
as_XMMRegister(Matcher::_regEncode[src_lo]));
841
__ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
842
as_XMMRegister(Matcher::_regEncode[src_lo]));
845
} else if (!do_size) {
846
if (size != 0) st->print("\n\t");
847
if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
848
if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
849
st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
851
st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
854
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
855
st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
857
st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
862
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
863
// Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes.
864
int sz = (UseAVX > 2) ? 6 : 4;
865
if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
866
UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
870
static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
871
int src_hi, int dst_hi, int size, outputStream* st ) {
874
// EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
876
__ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
877
as_Register(Matcher::_regEncode[src_lo]));
879
} else if (!do_size) {
880
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
883
return (UseAVX> 2) ? 6 : 4;
887
static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
888
int src_hi, int dst_hi, int size, outputStream* st ) {
891
// EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
893
__ movdl(as_Register(Matcher::_regEncode[dst_lo]),
894
as_XMMRegister(Matcher::_regEncode[src_lo]));
896
} else if (!do_size) {
897
st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
900
return (UseAVX> 2) ? 6 : 4;
903
static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
905
emit_opcode(masm, 0x8B );
906
emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
908
} else if( !do_size ) {
909
if( size != 0 ) st->print("\n\t");
910
st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
916
static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
917
int offset, int size, outputStream* st ) {
918
if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
920
emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
921
emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
923
} else if( !do_size ) {
924
if( size != 0 ) st->print("\n\t");
925
st->print("FLD %s",Matcher::regName[src_lo]);
931
int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
934
if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
935
op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
937
} else { // 32-bit store
938
op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
940
assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
943
return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
946
// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
947
static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
948
int src_hi, int dst_hi, uint ireg, outputStream* st);
950
void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
951
int stack_offset, int reg, uint ireg, outputStream* st);
953
static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
954
int dst_offset, uint ireg, outputStream* st) {
958
__ pushl(Address(rsp, src_offset));
959
__ popl (Address(rsp, dst_offset));
962
__ pushl(Address(rsp, src_offset));
963
__ popl (Address(rsp, dst_offset));
964
__ pushl(Address(rsp, src_offset+4));
965
__ popl (Address(rsp, dst_offset+4));
968
__ movdqu(Address(rsp, -16), xmm0);
969
__ movdqu(xmm0, Address(rsp, src_offset));
970
__ movdqu(Address(rsp, dst_offset), xmm0);
971
__ movdqu(xmm0, Address(rsp, -16));
974
__ vmovdqu(Address(rsp, -32), xmm0);
975
__ vmovdqu(xmm0, Address(rsp, src_offset));
976
__ vmovdqu(Address(rsp, dst_offset), xmm0);
977
__ vmovdqu(xmm0, Address(rsp, -32));
980
__ evmovdquq(Address(rsp, -64), xmm0, 2);
981
__ evmovdquq(xmm0, Address(rsp, src_offset), 2);
982
__ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
983
__ evmovdquq(xmm0, Address(rsp, -64), 2);
986
ShouldNotReachHere();
992
st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
994
src_offset, dst_offset);
997
st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
998
"popq [rsp + #%d]\n\t"
999
"pushl [rsp + #%d]\n\t"
1001
src_offset, dst_offset, src_offset+4, dst_offset+4);
1004
st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1005
"movdqu xmm0, [rsp + #%d]\n\t"
1006
"movdqu [rsp + #%d], xmm0\n\t"
1007
"movdqu xmm0, [rsp - #16]",
1008
src_offset, dst_offset);
1011
st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1012
"vmovdqu xmm0, [rsp + #%d]\n\t"
1013
"vmovdqu [rsp + #%d], xmm0\n\t"
1014
"vmovdqu xmm0, [rsp - #32]",
1015
src_offset, dst_offset);
1018
st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1019
"vmovdqu xmm0, [rsp + #%d]\n\t"
1020
"vmovdqu [rsp + #%d], xmm0\n\t"
1021
"vmovdqu xmm0, [rsp - #64]",
1022
src_offset, dst_offset);
1025
ShouldNotReachHere();
1031
uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1032
// Get registers to move
1033
OptoReg::Name src_second = ra_->get_reg_second(in(1));
1034
OptoReg::Name src_first = ra_->get_reg_first(in(1));
1035
OptoReg::Name dst_second = ra_->get_reg_second(this );
1036
OptoReg::Name dst_first = ra_->get_reg_first(this );
1038
enum RC src_second_rc = rc_class(src_second);
1039
enum RC src_first_rc = rc_class(src_first);
1040
enum RC dst_second_rc = rc_class(dst_second);
1041
enum RC dst_first_rc = rc_class(dst_first);
1043
assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1045
// Generate spill code!
1048
if( src_first == dst_first && src_second == dst_second )
1049
return size; // Self copy, no move
1051
if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
1052
uint ireg = ideal_reg();
1053
assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1054
assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1055
assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1056
if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1058
int src_offset = ra_->reg2offset(src_first);
1059
int dst_offset = ra_->reg2offset(dst_first);
1060
vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
1061
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1062
vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
1063
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1064
int stack_offset = ra_->reg2offset(dst_first);
1065
vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
1066
} else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1067
int stack_offset = ra_->reg2offset(src_first);
1068
vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
1070
ShouldNotReachHere();
1075
// --------------------------------------
1076
// Check for mem-mem move. push/pop to move.
1077
if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1078
if( src_second == dst_first ) { // overlapping stack copy ranges
1079
assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1080
size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1081
size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1082
src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
1085
size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
1086
size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
1087
if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1088
size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
1089
size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
1094
// --------------------------------------
1095
// Check for integer reg-reg copy
1096
if( src_first_rc == rc_int && dst_first_rc == rc_int )
1097
size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
1099
// Check for integer store
1100
if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1101
size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1103
// Check for integer load
1104
if( src_first_rc == rc_stack && dst_first_rc == rc_int )
1105
size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1107
// Check for integer reg-xmm reg copy
1108
if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1109
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1110
"no 64 bit integer-float reg moves" );
1111
return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1113
// --------------------------------------
1114
// Check for float reg-reg copy
1115
if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1116
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1117
(src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1120
// Note the mucking with the register encode to compensate for the 0/1
1121
// indexing issue mentioned in a comment in the reg_def sections
1122
// for FPR registers many lines above here.
1124
if( src_first != FPR1L_num ) {
1125
emit_opcode (masm, 0xD9 ); // FLD ST(i)
1126
emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
1127
emit_opcode (masm, 0xDD ); // FSTP ST(i)
1128
emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] );
1130
emit_opcode (masm, 0xDD ); // FST ST(i)
1131
emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
1134
} else if( !do_size ) {
1135
if( size != 0 ) st->print("\n\t");
1136
if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1137
else st->print( "FST %s", Matcher::regName[dst_first]);
1140
return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1143
// Check for float store
1144
if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1145
return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1148
// Check for float load
1149
if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1150
int offset = ra_->reg2offset(src_first);
1153
if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1156
} else { // 32-bit load
1159
assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1162
masm->set_inst_mark();
1163
emit_opcode (masm, op );
1164
encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1165
emit_opcode (masm, 0xDD ); // FSTP ST(i)
1166
emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] );
1167
masm->clear_inst_mark();
1169
} else if( !do_size ) {
1170
if( size != 0 ) st->print("\n\t");
1171
st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1174
int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1175
return size + 3+offset_size+2;
1178
// Check for xmm reg-reg copy
1179
if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1180
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1181
(src_first+1 == src_second && dst_first+1 == dst_second),
1182
"no non-adjacent float-moves" );
1183
return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1186
// Check for xmm reg-integer reg copy
1187
if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1188
assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1189
"no 64 bit float-integer reg moves" );
1190
return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1193
// Check for xmm store
1194
if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1195
return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
1198
// Check for float xmm load
1199
if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1200
return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1203
// Copy from float reg to xmm reg
1204
if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
1205
// copy to the top of stack from floating point reg
1206
// and use LEA to preserve flags
1208
emit_opcode(masm,0x8D); // LEA ESP,[ESP-8]
1209
emit_rm(masm, 0x1, ESP_enc, 0x04);
1210
emit_rm(masm, 0x0, 0x04, ESP_enc);
1213
} else if( !do_size ) {
1214
if( size != 0 ) st->print("\n\t");
1215
st->print("LEA ESP,[ESP-8]");
1220
size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1222
// Copy from the temp memory to the xmm reg.
1223
size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
1226
emit_opcode(masm,0x8D); // LEA ESP,[ESP+8]
1227
emit_rm(masm, 0x1, ESP_enc, 0x04);
1228
emit_rm(masm, 0x0, 0x04, ESP_enc);
1231
} else if( !do_size ) {
1232
if( size != 0 ) st->print("\n\t");
1233
st->print("LEA ESP,[ESP+8]");
1240
// AVX-512 opmask specific spilling.
1241
if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
1242
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1243
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1244
int offset = ra_->reg2offset(src_first);
1245
if (masm != nullptr) {
1246
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1249
st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset);
1255
if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
1256
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1257
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1258
int offset = ra_->reg2offset(dst_first);
1259
if (masm != nullptr) {
1260
__ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
1263
st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]);
1269
if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
1274
if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
1279
if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
1280
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1281
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1282
if (masm != nullptr) {
1283
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
1286
st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
1292
assert( size > 0, "missed a case" );
1294
// --------------------------------------------------------------------
1295
// Check for second bits still needing moving.
1296
if( src_second == dst_second )
1297
return size; // Self copy; no move
1298
assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1300
// Check for second word int-int move
1301
if( src_second_rc == rc_int && dst_second_rc == rc_int )
1302
return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
1304
// Check for second word integer store
1305
if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1306
return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1308
// Check for second word integer load
1309
if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1310
return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1313
return 0; // Mute compiler
1317
void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1318
implementation( nullptr, ra_, false, st );
1322
void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1323
implementation( masm, ra_, false, nullptr );
1326
uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1327
return MachNode::size(ra_);
1331
//=============================================================================
1333
void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1334
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1335
int reg = ra_->get_reg_first(this);
1336
st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1340
void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1341
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1342
int reg = ra_->get_encode(this);
1343
if( offset >= 128 ) {
1344
emit_opcode(masm, 0x8D); // LEA reg,[SP+offset]
1345
emit_rm(masm, 0x2, reg, 0x04);
1346
emit_rm(masm, 0x0, 0x04, ESP_enc);
1347
emit_d32(masm, offset);
1350
emit_opcode(masm, 0x8D); // LEA reg,[SP+offset]
1351
emit_rm(masm, 0x1, reg, 0x04);
1352
emit_rm(masm, 0x0, 0x04, ESP_enc);
1353
emit_d8(masm, offset);
1357
uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1358
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1359
if( offset >= 128 ) {
1367
//=============================================================================
1369
void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1370
st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1371
st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1372
st->print_cr("\tNOP");
1373
st->print_cr("\tNOP");
1374
if( !OptoBreakpoint )
1375
st->print_cr("\tNOP");
1379
void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1380
__ ic_check(CodeEntryAlignment);
1383
uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1384
return MachNode::size(ra_); // too many variables; just compute it
1389
//=============================================================================
1391
// Vector calling convention not supported.
1392
bool Matcher::supports_vector_calling_convention() {
1396
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1398
return OptoRegPair(0, 0);
1401
// Is this branch offset short enough that a short branch can be used?
1403
// NOTE: If the platform does not provide any short branch variants, then
1404
// this method should return false for offset 0.
1405
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1406
// The passed offset is relative to address of the branch.
1407
// On 86 a branch displacement is calculated relative to address
1408
// of a next instruction.
1411
// the short version of jmpConUCF2 contains multiple branches,
1412
// making the reach slightly less
1413
if (rule == jmpConUCF2_rule)
1414
return (-126 <= offset && offset <= 125);
1415
return (-128 <= offset && offset <= 127);
1418
// Return whether or not this register is ever used as an argument. This
1419
// function is used on startup to build the trampoline stubs in generateOptoStub.
1420
// Registers not mentioned will be killed by the VM call in the trampoline, and
1421
// arguments in those registers not be available to the callee.
1422
bool Matcher::can_be_java_arg( int reg ) {
1423
if( reg == ECX_num || reg == EDX_num ) return true;
1424
if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true;
1425
if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1429
bool Matcher::is_spillable_arg( int reg ) {
1430
return can_be_java_arg(reg);
1433
uint Matcher::int_pressure_limit()
1435
return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
1438
uint Matcher::float_pressure_limit()
1440
return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
1443
bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1444
// Use hardware integer DIV instruction when
1445
// it is faster than a code which use multiply.
1446
// Only when constant divisor fits into 32 bit
1447
// (min_jint is excluded to get only correct
1448
// positive 32 bit values from negative).
1449
return VM_Version::has_fast_idiv() &&
1450
(divisor == (int)divisor && divisor != min_jint);
1453
// Register for DIVI projection of divmodI
1454
RegMask Matcher::divI_proj_mask() {
1455
return EAX_REG_mask();
1458
// Register for MODI projection of divmodI
1459
RegMask Matcher::modI_proj_mask() {
1460
return EDX_REG_mask();
1463
// Register for DIVL projection of divmodL
1464
RegMask Matcher::divL_proj_mask() {
1465
ShouldNotReachHere();
1469
// Register for MODL projection of divmodL
1470
RegMask Matcher::modL_proj_mask() {
1471
ShouldNotReachHere();
1475
const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1476
return NO_REG_mask();
1479
// Returns true if the high 32 bits of the value is known to be zero.
1480
bool is_operand_hi32_zero(Node* n) {
1481
int opc = n->Opcode();
1482
if (opc == Op_AndL) {
1483
Node* o2 = n->in(2);
1484
if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1488
if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1496
//----------ENCODING BLOCK-----------------------------------------------------
1497
// This block specifies the encoding classes used by the compiler to output
1498
// byte streams. Encoding classes generate functions which are called by
1499
// Machine Instruction Nodes in order to generate the bit encoding of the
1500
// instruction. Operands specify their base encoding interface with the
1501
// interface keyword. There are currently supported four interfaces,
1502
// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1503
// operand to generate a function which returns its register number when
1504
// queried. CONST_INTER causes an operand to generate a function which
1505
// returns the value of the constant when queried. MEMORY_INTER causes an
1506
// operand to generate four functions which return the Base Register, the
1507
// Index Register, the Scale Value, and the Offset Value of the operand when
1508
// queried. COND_INTER causes an operand to generate six functions which
1509
// return the encoding code (ie - encoding bits for the instruction)
1510
// associated with each basic boolean condition for a conditional instruction.
1511
// Instructions specify two basic values for encoding. They use the
1512
// ins_encode keyword to specify their encoding class (which must be one of
1513
// the class names specified in the encoding block), and they use the
1514
// opcode keyword to specify, in order, their primary, secondary, and
1515
// tertiary opcode. Only the opcode sections which a particular instruction
1516
// needs for encoding need to be specified.
1518
// Build emit functions for each basic byte or larger field in the intel
1519
// encoding scheme (opcode, rm, sib, immediate), and call them from C++
1520
// code in the enc_class source block. Emit functions will live in the
1521
// main source block for now. In future, we can generalize this by
1522
// adding a syntax that specifies the sizes of fields in an order,
1523
// so that the adlc can build the emit functions automagically
1525
// Set instruction mark in MacroAssembler. This is used only in
1526
// instructions that emit bytes directly to the CodeBuffer wraped
1527
// in the MacroAssembler. Should go away once all "instruct" are
1528
// patched to emit bytes only using methods in MacroAssembler.
1529
enc_class SetInstMark %{
1533
enc_class ClearInstMark %{
1534
__ clear_inst_mark();
1537
// Emit primary opcode
1539
emit_opcode(masm, $primary);
1542
// Emit secondary opcode
1544
emit_opcode(masm, $secondary);
1547
// Emit opcode directly
1548
enc_class Opcode(immI d8) %{
1549
emit_opcode(masm, $d8$$constant);
1552
enc_class SizePrefix %{
1553
emit_opcode(masm,0x66);
1556
enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1557
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1560
enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many)
1561
emit_opcode(masm,$opcode$$constant);
1562
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1565
enc_class mov_r32_imm0( rRegI dst ) %{
1566
emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1567
emit_d32 ( masm, 0x0 ); // imm32==0x0
1570
enc_class cdq_enc %{
1571
// Full implementation of Java idiv and irem; checks for
1572
// special case as described in JVM spec., p.243 & p.271.
1574
// normal case special case
1576
// input : rax,: dividend min_int
1579
// output: rax,: quotient (= rax, idiv reg) min_int
1580
// rdx: remainder (= rax, irem reg) 0
1584
// 81 F8 00 00 00 80 cmp rax,80000000h
1585
// 0F 85 0B 00 00 00 jne normal_case
1586
// 33 D2 xor rdx,edx
1587
// 83 F9 FF cmp rcx,0FFh
1588
// 0F 84 03 00 00 00 je done
1591
// F7 F9 idiv rax,ecx
1594
emit_opcode(masm,0x81); emit_d8(masm,0xF8);
1595
emit_opcode(masm,0x00); emit_d8(masm,0x00);
1596
emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h
1597
emit_opcode(masm,0x0F); emit_d8(masm,0x85);
1598
emit_opcode(masm,0x0B); emit_d8(masm,0x00);
1599
emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case
1600
emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx
1601
emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
1602
emit_opcode(masm,0x0F); emit_d8(masm,0x84);
1603
emit_opcode(masm,0x03); emit_d8(masm,0x00);
1604
emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done
1606
emit_opcode(masm,0x99); // cdq
1607
// idiv (note: must be emitted by the user of this rule)
1611
// Dense encoding for older common ops
1612
enc_class Opc_plus(immI opcode, rRegI reg) %{
1613
emit_opcode(masm, $opcode$$constant + $reg$$reg);
1617
// Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1618
enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1619
// Check for 8-bit immediate, and set sign extend bit in opcode
1620
if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1621
emit_opcode(masm, $primary | 0x02);
1623
else { // If 32-bit immediate
1624
emit_opcode(masm, $primary);
1628
enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m
1629
// Emit primary opcode and set sign-extend bit
1630
// Check for 8-bit immediate, and set sign extend bit in opcode
1631
if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1632
emit_opcode(masm, $primary | 0x02); }
1633
else { // If 32-bit immediate
1634
emit_opcode(masm, $primary);
1636
// Emit r/m byte with secondary opcode, after primary opcode.
1637
emit_rm(masm, 0x3, $secondary, $dst$$reg);
1640
enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1641
// Check for 8-bit immediate, and set sign extend bit in opcode
1642
if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1643
$$$emit8$imm$$constant;
1645
else { // If 32-bit immediate
1647
$$$emit32$imm$$constant;
1651
enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1652
// Emit primary opcode and set sign-extend bit
1653
// Check for 8-bit immediate, and set sign extend bit in opcode
1654
int con = (int)$imm$$constant; // Throw away top bits
1655
emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1656
// Emit r/m byte with secondary opcode, after primary opcode.
1657
emit_rm(masm, 0x3, $secondary, $dst$$reg);
1658
if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
1659
else emit_d32(masm,con);
1662
enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1663
// Emit primary opcode and set sign-extend bit
1664
// Check for 8-bit immediate, and set sign extend bit in opcode
1665
int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1666
emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1667
// Emit r/m byte with tertiary opcode, after primary opcode.
1668
emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
1669
if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
1670
else emit_d32(masm,con);
1673
enc_class OpcSReg (rRegI dst) %{ // BSWAP
1674
emit_cc(masm, $secondary, $dst$$reg );
1677
enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1678
int destlo = $dst$$reg;
1679
int desthi = HIGH_FROM_LOW_ENC(destlo);
1681
emit_opcode(masm, 0x0F);
1682
emit_cc(masm, 0xC8, destlo);
1684
emit_opcode(masm, 0x0F);
1685
emit_cc(masm, 0xC8, desthi);
1687
emit_opcode(masm, 0x87);
1688
emit_rm(masm, 0x3, destlo, desthi);
1691
enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1692
emit_rm(masm, 0x3, $secondary, $div$$reg );
1695
enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1697
emit_cc(masm, $secondary, $cop$$cmpcode);
1700
enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1701
int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1702
emit_d8(masm, op >> 8 );
1703
emit_d8(masm, op & 255);
1706
// emulate a CMOV with a conditional branch around a MOV
1707
enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1708
// Invert sense of branch from sense of CMOV
1709
emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
1710
emit_d8( masm, $brOffs$$constant );
1713
enc_class enc_PartialSubtypeCheck( ) %{
1714
Register Redi = as_Register(EDI_enc); // result register
1715
Register Reax = as_Register(EAX_enc); // super class
1716
Register Recx = as_Register(ECX_enc); // killed
1717
Register Resi = as_Register(ESI_enc); // sub class
1720
__ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1722
/*set_cond_codes:*/ true);
1724
__ xorptr(Redi, Redi);
1729
enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1730
int start = __ offset();
1733
__ verify_FPU(0, "must be empty in SSE2+ mode");
1736
// External c_calling_convention expects the FPU stack to be 'clean'.
1737
// Compiled code leaves it dirty. Do cleanup now.
1738
__ empty_FPU_stack();
1740
if (sizeof_FFree_Float_Stack_All == -1) {
1741
sizeof_FFree_Float_Stack_All = __ offset() - start;
1743
assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1747
enc_class Verify_FPU_For_Leaf %{
1749
__ verify_FPU( -3, "Returning from Runtime Leaf call");
1753
enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1754
// This is the instruction starting address for relocation info.
1757
// CALL directly to the runtime
1758
emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1759
runtime_call_Relocation::spec(), RELOC_IMM32 );
1760
__ clear_inst_mark();
1764
BasicType rt = tf()->return_type();
1766
if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1767
// A C runtime call where the return value is unused. In SSE2+
1768
// mode the result needs to be removed from the FPU stack. It's
1769
// likely that this function call could be removed by the
1770
// optimizer if the C function is a pure function.
1772
} else if (rt == T_FLOAT) {
1773
__ lea(rsp, Address(rsp, -4));
1774
__ fstp_s(Address(rsp, 0));
1775
__ movflt(xmm0, Address(rsp, 0));
1776
__ lea(rsp, Address(rsp, 4));
1777
} else if (rt == T_DOUBLE) {
1778
__ lea(rsp, Address(rsp, -8));
1779
__ fstp_d(Address(rsp, 0));
1780
__ movdbl(xmm0, Address(rsp, 0));
1781
__ lea(rsp, Address(rsp, 8));
1786
enc_class pre_call_resets %{
1787
// If method sets FPU control word restore it here
1788
debug_only(int off0 = __ offset());
1789
if (ra_->C->in_24_bit_fp_mode()) {
1790
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
1792
// Clear upper bits of YMM registers when current compiled code uses
1793
// wide vectors to avoid AVX <-> SSE transition penalty during call.
1795
debug_only(int off1 = __ offset());
1796
assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1799
enc_class post_call_FPU %{
1800
// If method sets FPU control word do it here also
1801
if (Compile::current()->in_24_bit_fp_mode()) {
1802
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
1806
enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1807
// CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1808
// who we intended to call.
1813
emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1814
runtime_call_Relocation::spec(),
1816
__ clear_inst_mark();
1819
int method_index = resolved_method_index(masm);
1820
RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1821
: static_call_Relocation::spec(method_index);
1822
emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1823
rspec, RELOC_DISP32);
1825
address mark = __ inst_mark();
1826
if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
1827
// Calls of the same statically bound method can share
1828
// a stub to the interpreter.
1829
__ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
1830
__ clear_inst_mark();
1832
// Emit stubs for static call.
1833
address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
1834
__ clear_inst_mark();
1835
if (stub == nullptr) {
1836
ciEnv::current()->record_failure("CodeCache is full");
1843
enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1844
__ ic_call((address)$meth$$method, resolved_method_index(masm));
1848
enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1849
int disp = in_bytes(Method::from_compiled_offset());
1850
assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1852
// CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1855
emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte
1856
emit_d8(masm, disp); // Displacement
1857
__ clear_inst_mark();
1861
enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1863
emit_rm(masm, 0x3, $secondary, $dst$$reg);
1864
$$$emit8$shift$$constant;
1867
enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate
1868
// Load immediate does not have a zero or sign extended version
1869
// for 8-bit immediates
1870
emit_opcode(masm, 0xB8 + $dst$$reg);
1871
$$$emit32$src$$constant;
1874
enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate
1875
// Load immediate does not have a zero or sign extended version
1876
// for 8-bit immediates
1877
emit_opcode(masm, $primary + $dst$$reg);
1878
$$$emit32$src$$constant;
1881
enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1882
// Load immediate does not have a zero or sign extended version
1883
// for 8-bit immediates
1884
int dst_enc = $dst$$reg;
1885
int src_con = $src$$constant & 0x0FFFFFFFFL;
1888
emit_opcode(masm, 0x33);
1889
emit_rm(masm, 0x3, dst_enc, dst_enc);
1891
emit_opcode(masm, $primary + dst_enc);
1892
emit_d32(masm, src_con);
1896
enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1897
// Load immediate does not have a zero or sign extended version
1898
// for 8-bit immediates
1899
int dst_enc = $dst$$reg + 2;
1900
int src_con = ((julong)($src$$constant)) >> 32;
1903
emit_opcode(masm, 0x33);
1904
emit_rm(masm, 0x3, dst_enc, dst_enc);
1906
emit_opcode(masm, $primary + dst_enc);
1907
emit_d32(masm, src_con);
1912
// Encode a reg-reg copy. If it is useless, then empty encoding.
1913
enc_class enc_Copy( rRegI dst, rRegI src ) %{
1914
encode_Copy( masm, $dst$$reg, $src$$reg );
1917
enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1918
encode_Copy( masm, $dst$$reg, $src$$reg );
1921
enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many)
1922
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1925
enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
1927
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1930
enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
1932
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1935
enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
1936
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1939
enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
1940
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1943
enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
1944
emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
1947
enc_class Con32 (immI src) %{ // Con32(storeImmI)
1949
$$$emit32$src$$constant;
1952
enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
1953
// Output Float immediate bits
1954
jfloat jf = $src$$constant;
1955
int jf_as_bits = jint_cast( jf );
1956
emit_d32(masm, jf_as_bits);
1959
enc_class Con32F_as_bits(immF src) %{ // storeX_imm
1960
// Output Float immediate bits
1961
jfloat jf = $src$$constant;
1962
int jf_as_bits = jint_cast( jf );
1963
emit_d32(masm, jf_as_bits);
1966
enc_class Con16 (immI src) %{ // Con16(storeImmI)
1968
$$$emit16$src$$constant;
1971
enc_class Con_d32(immI src) %{
1972
emit_d32(masm,$src$$constant);
1975
enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
1976
// Output immediate memory reference
1977
emit_rm(masm, 0x00, $t1$$reg, 0x05 );
1978
emit_d32(masm, 0x00);
1981
enc_class lock_prefix( ) %{
1982
emit_opcode(masm,0xF0); // [Lock]
1985
// Cmp-xchg long value.
1986
// Note: we need to swap rbx, and rcx before and after the
1987
// cmpxchg8 instruction because the instruction uses
1988
// rcx as the high order word of the new value to store but
1989
// our register encoding uses rbx,.
1990
enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
1993
emit_opcode(masm,0x87);
1994
emit_opcode(masm,0xD9);
1996
emit_opcode(masm,0xF0);
1998
emit_opcode(masm,0x0F);
1999
emit_opcode(masm,0xC7);
2000
emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2002
emit_opcode(masm,0x87);
2003
emit_opcode(masm,0xD9);
2006
enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2008
emit_opcode(masm,0xF0);
2011
emit_opcode(masm,0x0F);
2012
emit_opcode(masm,0xB1);
2013
emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2016
enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2018
emit_opcode(masm,0xF0);
2021
emit_opcode(masm,0x0F);
2022
emit_opcode(masm,0xB0);
2023
emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2026
enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2028
emit_opcode(masm,0xF0);
2031
emit_opcode(masm, 0x66);
2034
emit_opcode(masm,0x0F);
2035
emit_opcode(masm,0xB1);
2036
emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2039
enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2040
int res_encoding = $res$$reg;
2043
emit_opcode( masm, 0xB8 + res_encoding);
2044
emit_d32( masm, 0 );
2046
emit_opcode(masm,0x75);
2049
emit_opcode( masm, 0xB8 + res_encoding);
2050
emit_d32( masm, 1 );
2054
enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem
2055
int reg_encoding = $ereg$$reg;
2056
int base = $mem$$base;
2057
int index = $mem$$index;
2058
int scale = $mem$$scale;
2059
int displace = $mem$$disp;
2060
relocInfo::relocType disp_reloc = $mem->disp_reloc();
2061
encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2064
enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2065
int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo
2066
int base = $mem$$base;
2067
int index = $mem$$index;
2068
int scale = $mem$$scale;
2069
int displace = $mem$$disp + 4; // Offset is 4 further in memory
2070
assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2071
encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
2074
enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2076
if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2077
else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2078
emit_opcode(masm,0x0F);
2079
emit_opcode(masm,$tertiary);
2080
emit_rm(masm, 0x3, r1, r2);
2081
emit_d8(masm,$cnt$$constant);
2082
emit_d8(masm,$primary);
2083
emit_rm(masm, 0x3, $secondary, r1);
2084
emit_d8(masm,$cnt$$constant);
2087
enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2088
emit_opcode( masm, 0x8B ); // Move
2089
emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2090
if( $cnt$$constant > 32 ) { // Shift, if not by zero
2091
emit_d8(masm,$primary);
2092
emit_rm(masm, 0x3, $secondary, $dst$$reg);
2093
emit_d8(masm,$cnt$$constant-32);
2095
emit_d8(masm,$primary);
2096
emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
2100
enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2102
if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2103
else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2105
emit_opcode( masm, 0x8B ); // Move r1,r2
2106
emit_rm(masm, 0x3, r1, r2);
2107
if( $cnt$$constant > 32 ) { // Shift, if not by zero
2108
emit_opcode(masm,$primary);
2109
emit_rm(masm, 0x3, $secondary, r1);
2110
emit_d8(masm,$cnt$$constant-32);
2112
emit_opcode(masm,0x33); // XOR r2,r2
2113
emit_rm(masm, 0x3, r2, r2);
2116
// Clone of RegMem but accepts an extra parameter to access each
2117
// half of a double in memory; it never needs relocation info.
2118
enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2119
emit_opcode(masm,$opcode$$constant);
2120
int reg_encoding = $rm_reg$$reg;
2121
int base = $mem$$base;
2122
int index = $mem$$index;
2123
int scale = $mem$$scale;
2124
int displace = $mem$$disp + $disp_for_half$$constant;
2125
relocInfo::relocType disp_reloc = relocInfo::none;
2126
encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2129
// !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2131
// Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2132
// and it never needs relocation information.
2133
// Frequently used to move data between FPU's Stack Top and memory.
2134
enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2135
int rm_byte_opcode = $rm_opcode$$constant;
2136
int base = $mem$$base;
2137
int index = $mem$$index;
2138
int scale = $mem$$scale;
2139
int displace = $mem$$disp;
2140
assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2141
encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2144
enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2145
int rm_byte_opcode = $rm_opcode$$constant;
2146
int base = $mem$$base;
2147
int index = $mem$$index;
2148
int scale = $mem$$scale;
2149
int displace = $mem$$disp;
2150
relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2151
encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2154
enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea
2155
int reg_encoding = $dst$$reg;
2156
int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2157
int index = 0x04; // 0x04 indicates no index
2158
int scale = 0x00; // 0x00 indicates no scale
2159
int displace = $src1$$constant; // 0x00 indicates no displacement
2160
relocInfo::relocType disp_reloc = relocInfo::none;
2161
encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2164
enc_class min_enc (rRegI dst, rRegI src) %{ // MIN
2166
emit_opcode(masm,0x3B);
2167
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2168
// jmp dst < src around move
2169
emit_opcode(masm,0x7C);
2172
emit_opcode(masm,0x8B);
2173
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2176
enc_class max_enc (rRegI dst, rRegI src) %{ // MAX
2178
emit_opcode(masm,0x3B);
2179
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2180
// jmp dst > src around move
2181
emit_opcode(masm,0x7F);
2184
emit_opcode(masm,0x8B);
2185
emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2188
enc_class enc_FPR_store(memory mem, regDPR src) %{
2189
// If src is FPR1, we can just FST to store it.
2190
// Else we need to FLD it to FPR1, then FSTP to store/pop it.
2191
int reg_encoding = 0x2; // Just store
2192
int base = $mem$$base;
2193
int index = $mem$$index;
2194
int scale = $mem$$scale;
2195
int displace = $mem$$disp;
2196
relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2197
if( $src$$reg != FPR1L_enc ) {
2198
reg_encoding = 0x3; // Store & pop
2199
emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
2200
emit_d8( masm, 0xC0-1+$src$$reg );
2202
__ set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2203
emit_opcode(masm,$primary);
2204
encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2205
__ clear_inst_mark();
2208
enc_class neg_reg(rRegI dst) %{
2210
emit_opcode(masm,0xF7);
2211
emit_rm(masm, 0x3, 0x03, $dst$$reg );
2214
enc_class setLT_reg(eCXRegI dst) %{
2216
emit_opcode(masm,0x0F);
2217
emit_opcode(masm,0x9C);
2218
emit_rm( masm, 0x3, 0x4, $dst$$reg );
2221
enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2222
int tmpReg = $tmp$$reg;
2225
emit_opcode(masm,0x2B);
2226
emit_rm(masm, 0x3, $p$$reg, $q$$reg);
2228
emit_opcode(masm,0x1B);
2229
emit_rm(masm, 0x3, tmpReg, tmpReg);
2231
emit_opcode(masm,0x23);
2232
emit_rm(masm, 0x3, tmpReg, $y$$reg);
2234
emit_opcode(masm,0x03);
2235
emit_rm(masm, 0x3, $p$$reg, tmpReg);
2238
enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2240
emit_opcode(masm,0xF7);
2241
emit_rm(masm, 0x3, 0, ECX_enc);
2242
emit_d32(masm,0x20);
2244
emit_opcode(masm, 0x74);
2245
emit_d8(masm, 0x04);
2246
// MOV $dst.hi,$dst.lo
2247
emit_opcode( masm, 0x8B );
2248
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2250
emit_opcode(masm, 0x33);
2251
emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
2253
// SHLD $dst.hi,$dst.lo,$shift
2254
emit_opcode(masm,0x0F);
2255
emit_opcode(masm,0xA5);
2256
emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2257
// SHL $dst.lo,$shift"
2258
emit_opcode(masm,0xD3);
2259
emit_rm(masm, 0x3, 0x4, $dst$$reg );
2262
enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2264
emit_opcode(masm,0xF7);
2265
emit_rm(masm, 0x3, 0, ECX_enc);
2266
emit_d32(masm,0x20);
2268
emit_opcode(masm, 0x74);
2269
emit_d8(masm, 0x04);
2270
// MOV $dst.lo,$dst.hi
2271
emit_opcode( masm, 0x8B );
2272
emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2274
emit_opcode(masm, 0x33);
2275
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
2277
// SHRD $dst.lo,$dst.hi,$shift
2278
emit_opcode(masm,0x0F);
2279
emit_opcode(masm,0xAD);
2280
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2281
// SHR $dst.hi,$shift"
2282
emit_opcode(masm,0xD3);
2283
emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
2286
enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2288
emit_opcode(masm,0xF7);
2289
emit_rm(masm, 0x3, 0, ECX_enc);
2290
emit_d32(masm,0x20);
2292
emit_opcode(masm, 0x74);
2293
emit_d8(masm, 0x05);
2294
// MOV $dst.lo,$dst.hi
2295
emit_opcode( masm, 0x8B );
2296
emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2298
emit_opcode(masm, 0xC1);
2299
emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
2300
emit_d8(masm, 0x1F );
2302
// SHRD $dst.lo,$dst.hi,$shift
2303
emit_opcode(masm,0x0F);
2304
emit_opcode(masm,0xAD);
2305
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2306
// SAR $dst.hi,$shift"
2307
emit_opcode(masm,0xD3);
2308
emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
2312
// ----------------- Encodings for floating point unit -----------------
2313
// May leave result in FPU-TOS or FPU reg depending on opcodes
2314
enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2316
emit_rm(masm, 0x3, $secondary, $src$$reg );
2319
// Pop argument in FPR0 with FSTP ST(0)
2320
enc_class PopFPU() %{
2321
emit_opcode( masm, 0xDD );
2322
emit_d8( masm, 0xD8 );
2325
// !!!!! equivalent to Pop_Reg_F
2326
enc_class Pop_Reg_DPR( regDPR dst ) %{
2327
emit_opcode( masm, 0xDD ); // FSTP ST(i)
2328
emit_d8( masm, 0xD8+$dst$$reg );
2331
enc_class Push_Reg_DPR( regDPR dst ) %{
2332
emit_opcode( masm, 0xD9 );
2333
emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2336
enc_class strictfp_bias1( regDPR dst ) %{
2337
emit_opcode( masm, 0xDB ); // FLD m80real
2338
emit_opcode( masm, 0x2D );
2339
emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
2340
emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0
2341
emit_opcode( masm, 0xC8+$dst$$reg );
2344
enc_class strictfp_bias2( regDPR dst ) %{
2345
emit_opcode( masm, 0xDB ); // FLD m80real
2346
emit_opcode( masm, 0x2D );
2347
emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
2348
emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0
2349
emit_opcode( masm, 0xC8+$dst$$reg );
2352
// Special case for moving an integer register to a stack slot.
2353
enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2354
store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
2357
// Special case for moving a register to a stack slot.
2358
enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2359
// Opcode already emitted
2360
emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte
2361
emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte
2362
emit_d32(masm, $dst$$disp); // Displacement
2365
// Push the integer in stackSlot 'src' onto FP-stack
2366
enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2367
store_to_stackslot( masm, $primary, $secondary, $src$$disp );
2370
// Push FPU's TOS float to a stack-slot, and pop FPU-stack
2371
enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2372
store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
2375
// Same as Pop_Mem_F except for opcode
2376
// Push FPU's TOS double to a stack-slot, and pop FPU-stack
2377
enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2378
store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
2381
enc_class Pop_Reg_FPR( regFPR dst ) %{
2382
emit_opcode( masm, 0xDD ); // FSTP ST(i)
2383
emit_d8( masm, 0xD8+$dst$$reg );
2386
enc_class Push_Reg_FPR( regFPR dst ) %{
2387
emit_opcode( masm, 0xD9 ); // FLD ST(i-1)
2388
emit_d8( masm, 0xC0-1+$dst$$reg );
2391
// Push FPU's float to a stack-slot, and pop FPU-stack
2392
enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2394
if ($src$$reg != FPR1L_enc) {
2395
emit_opcode( masm, 0xD9 ); // FLD ST(i-1)
2396
emit_d8( masm, 0xC0-1+$src$$reg );
2399
store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2402
// Push FPU's double to a stack-slot, and pop FPU-stack
2403
enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2405
if ($src$$reg != FPR1L_enc) {
2406
emit_opcode( masm, 0xD9 ); // FLD ST(i-1)
2407
emit_d8( masm, 0xC0-1+$src$$reg );
2410
store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2413
// Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2414
enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2415
int pop = 0xD0 - 1; // -1 since we skip FLD
2416
if ($src$$reg != FPR1L_enc) {
2417
emit_opcode( masm, 0xD9 ); // FLD ST(src-1)
2418
emit_d8( masm, 0xC0-1+$src$$reg );
2421
emit_opcode( masm, 0xDD );
2422
emit_d8( masm, pop+$dst$$reg ); // FST<P> ST(i)
2426
enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2428
emit_opcode( masm, 0xD9 );
2429
emit_d8( masm, 0xC0-1+$dst$$reg );
2430
if ($src$$reg != FPR1L_enc) {
2432
emit_opcode (masm, 0xD9);
2433
emit_opcode (masm, 0xF7);
2434
// swap src with FPR1:
2435
// FXCH FPR1 with src
2436
emit_opcode(masm, 0xD9);
2437
emit_d8(masm, 0xC8-1+$src$$reg );
2439
emit_opcode (masm, 0xD9);
2440
emit_opcode (masm, 0xF6);
2444
enc_class Push_ModD_encoding(regD src0, regD src1) %{
2446
__ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2447
__ fld_d(Address(rsp, 0));
2448
__ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2449
__ fld_d(Address(rsp, 0));
2452
enc_class Push_ModF_encoding(regF src0, regF src1) %{
2454
__ movflt(Address(rsp, 0), $src1$$XMMRegister);
2455
__ fld_s(Address(rsp, 0));
2456
__ movflt(Address(rsp, 0), $src0$$XMMRegister);
2457
__ fld_s(Address(rsp, 0));
2460
enc_class Push_ResultD(regD dst) %{
2461
__ fstp_d(Address(rsp, 0));
2462
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
2466
enc_class Push_ResultF(regF dst, immI d8) %{
2467
__ fstp_s(Address(rsp, 0));
2468
__ movflt($dst$$XMMRegister, Address(rsp, 0));
2469
__ addptr(rsp, $d8$$constant);
2472
enc_class Push_SrcD(regD src) %{
2474
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
2475
__ fld_d(Address(rsp, 0));
2478
enc_class push_stack_temp_qword() %{
2482
enc_class pop_stack_temp_qword() %{
2486
enc_class push_xmm_to_fpr1(regD src) %{
2487
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
2488
__ fld_d(Address(rsp, 0));
2491
enc_class Push_Result_Mod_DPR( regDPR src) %{
2492
if ($src$$reg != FPR1L_enc) {
2494
emit_opcode (masm, 0xD9);
2495
emit_opcode (masm, 0xF7);
2496
// FXCH FPR1 with src
2497
emit_opcode(masm, 0xD9);
2498
emit_d8(masm, 0xC8-1+$src$$reg );
2500
emit_opcode (masm, 0xD9);
2501
emit_opcode (masm, 0xF6);
2505
enc_class fnstsw_sahf_skip_parity() %{
2507
emit_opcode( masm, 0xDF );
2508
emit_opcode( masm, 0xE0 );
2510
emit_opcode( masm, 0x9E );
2512
emit_opcode( masm, 0x7B );
2513
emit_opcode( masm, 0x05 );
2516
enc_class emitModDPR() %{
2517
// fprem must be iterative
2520
emit_opcode( masm, 0xD9 );
2521
emit_opcode( masm, 0xF8 );
2523
emit_opcode( masm, 0x9b );
2525
emit_opcode( masm, 0xDF );
2526
emit_opcode( masm, 0xE0 );
2528
emit_opcode( masm, 0x9E );
2530
emit_opcode( masm, 0x0F );
2531
emit_opcode( masm, 0x8A );
2532
emit_opcode( masm, 0xF4 );
2533
emit_opcode( masm, 0xFF );
2534
emit_opcode( masm, 0xFF );
2535
emit_opcode( masm, 0xFF );
2538
enc_class fpu_flags() %{
2540
emit_opcode( masm, 0xDF);
2541
emit_opcode( masm, 0xE0);
2543
emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate
2544
emit_opcode( masm, 0xA9 );
2545
emit_d16 ( masm, 0x0400 );
2546
// // // This sequence works, but stalls for 12-16 cycles on PPro
2547
// // test rax,0x0400
2548
// emit_opcode( masm, 0xA9 );
2549
// emit_d32 ( masm, 0x00000400 );
2551
// jz exit (no unordered comparison)
2552
emit_opcode( masm, 0x74 );
2553
emit_d8 ( masm, 0x02 );
2554
// mov ah,1 - treat as LT case (set carry flag)
2555
emit_opcode( masm, 0xB4 );
2556
emit_d8 ( masm, 0x01 );
2558
emit_opcode( masm, 0x9E);
2561
enc_class cmpF_P6_fixup() %{
2562
// Fixup the integer flags in case comparison involved a NaN
2564
// JNP exit (no unordered comparison, P-flag is set by NaN)
2565
emit_opcode( masm, 0x7B );
2566
emit_d8 ( masm, 0x03 );
2567
// MOV AH,1 - treat as LT case (set carry flag)
2568
emit_opcode( masm, 0xB4 );
2569
emit_d8 ( masm, 0x01 );
2571
emit_opcode( masm, 0x9E);
2572
// NOP // target for branch to avoid branch to branch
2573
emit_opcode( masm, 0x90);
2578
// movl(dst, nan_result);
2579
// jcc(Assembler::parity, exit);
2580
// movl(dst, less_result);
2581
// jcc(Assembler::below, exit);
2582
// movl(dst, equal_result);
2583
// jcc(Assembler::equal, exit);
2584
// movl(dst, greater_result);
2587
// greater_result = -1;
2591
enc_class CmpF_Result(rRegI dst) %{
2593
emit_opcode( masm, 0xDF);
2594
emit_opcode( masm, 0xE0);
2596
emit_opcode( masm, 0x9E);
2597
// movl(dst, nan_result);
2598
emit_opcode( masm, 0xB8 + $dst$$reg);
2599
emit_d32( masm, -1 );
2600
// jcc(Assembler::parity, exit);
2601
emit_opcode( masm, 0x7A );
2602
emit_d8 ( masm, 0x13 );
2603
// movl(dst, less_result);
2604
emit_opcode( masm, 0xB8 + $dst$$reg);
2605
emit_d32( masm, -1 );
2606
// jcc(Assembler::below, exit);
2607
emit_opcode( masm, 0x72 );
2608
emit_d8 ( masm, 0x0C );
2609
// movl(dst, equal_result);
2610
emit_opcode( masm, 0xB8 + $dst$$reg);
2611
emit_d32( masm, 0 );
2612
// jcc(Assembler::equal, exit);
2613
emit_opcode( masm, 0x74 );
2614
emit_d8 ( masm, 0x05 );
2615
// movl(dst, greater_result);
2616
emit_opcode( masm, 0xB8 + $dst$$reg);
2617
emit_d32( masm, 1 );
2621
// Compare the longs and set flags
2622
// BROKEN! Do Not use as-is
2623
enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2624
// CMP $src1.hi,$src2.hi
2625
emit_opcode( masm, 0x3B );
2626
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2628
emit_opcode(masm,0x75);
2630
// CMP $src1.lo,$src2.lo
2631
emit_opcode( masm, 0x3B );
2632
emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2636
enc_class convert_int_long( regL dst, rRegI src ) %{
2638
int dst_encoding = $dst$$reg;
2639
int src_encoding = $src$$reg;
2640
encode_Copy( masm, dst_encoding , src_encoding );
2642
encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
2644
emit_opcode( masm, 0xC1 );
2645
emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
2646
emit_d8(masm, 0x1F );
2649
enc_class convert_long_double( eRegL src ) %{
2651
emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2653
emit_opcode(masm, 0x50+$src$$reg );
2654
// fild 64-bits at [SP]
2655
emit_opcode(masm,0xdf);
2656
emit_d8(masm, 0x6C);
2657
emit_d8(masm, 0x24);
2658
emit_d8(masm, 0x00);
2660
emit_opcode(masm, 0x83); // add SP, #8
2661
emit_rm(masm, 0x3, 0x00, ESP_enc);
2665
enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2666
// IMUL EDX:EAX,$src1
2667
emit_opcode( masm, 0xF7 );
2668
emit_rm( masm, 0x3, 0x5, $src1$$reg );
2670
int shift_count = ((int)$cnt$$constant) - 32;
2671
if (shift_count > 0) {
2672
emit_opcode(masm, 0xC1);
2673
emit_rm(masm, 0x3, 7, $dst$$reg );
2674
emit_d8(masm, shift_count);
2678
// this version doesn't have add sp, 8
2679
enc_class convert_long_double2( eRegL src ) %{
2681
emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2683
emit_opcode(masm, 0x50+$src$$reg );
2684
// fild 64-bits at [SP]
2685
emit_opcode(masm,0xdf);
2686
emit_d8(masm, 0x6C);
2687
emit_d8(masm, 0x24);
2688
emit_d8(masm, 0x00);
2691
enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2692
// Basic idea: long = (long)int * (long)int
2693
// IMUL EDX:EAX, src
2694
emit_opcode( masm, 0xF7 );
2695
emit_rm( masm, 0x3, 0x5, $src$$reg);
2698
enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2699
// Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
2701
emit_opcode( masm, 0xF7 );
2702
emit_rm( masm, 0x3, 0x4, $src$$reg);
2705
enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2706
// Basic idea: lo(result) = lo(x_lo * y_lo)
2707
// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2709
encode_Copy( masm, $tmp$$reg, $src$$reg );
2711
emit_opcode( masm, 0x0F );
2712
emit_opcode( masm, 0xAF );
2713
emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2715
encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
2717
emit_opcode( masm, 0x0F );
2718
emit_opcode( masm, 0xAF );
2719
emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2721
emit_opcode( masm, 0x03 );
2722
emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2723
// MUL EDX:EAX,$src.lo
2724
emit_opcode( masm, 0xF7 );
2725
emit_rm( masm, 0x3, 0x4, $src$$reg );
2727
emit_opcode( masm, 0x03 );
2728
emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
2731
enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2732
// Basic idea: lo(result) = lo(src * y_lo)
2733
// hi(result) = hi(src * y_lo) + lo(src * y_hi)
2734
// IMUL $tmp,EDX,$src
2735
emit_opcode( masm, 0x6B );
2736
emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2737
emit_d8( masm, (int)$src$$constant );
2739
emit_opcode(masm, 0xB8 + EDX_enc);
2740
emit_d32( masm, (int)$src$$constant );
2742
emit_opcode( masm, 0xF7 );
2743
emit_rm( masm, 0x3, 0x4, EDX_enc );
2745
emit_opcode( masm, 0x03 );
2746
emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
2749
enc_class long_div( eRegL src1, eRegL src2 ) %{
2751
emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2753
emit_opcode(masm, 0x50+$src1$$reg );
2755
emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2757
emit_opcode(masm, 0x50+$src2$$reg );
2758
// CALL directly to the runtime
2760
emit_opcode(masm,0xE8); // Call into runtime
2761
emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2762
__ clear_inst_mark();
2765
emit_opcode(masm, 0x83); // add SP, #framesize
2766
emit_rm(masm, 0x3, 0x00, ESP_enc);
2770
enc_class long_mod( eRegL src1, eRegL src2 ) %{
2772
emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2774
emit_opcode(masm, 0x50+$src1$$reg );
2776
emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2778
emit_opcode(masm, 0x50+$src2$$reg );
2779
// CALL directly to the runtime
2781
emit_opcode(masm,0xE8); // Call into runtime
2782
emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2783
__ clear_inst_mark();
2786
emit_opcode(masm, 0x83); // add SP, #framesize
2787
emit_rm(masm, 0x3, 0x00, ESP_enc);
2791
enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2793
emit_opcode(masm, 0x8B);
2794
emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
2796
emit_opcode(masm, 0x0B);
2797
emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
2800
enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2801
// CMP $src1.lo,$src2.lo
2802
emit_opcode( masm, 0x3B );
2803
emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2805
emit_cc(masm, 0x70, 0x5);
2807
// CMP $src1.hi,$src2.hi
2808
emit_opcode( masm, 0x3B );
2809
emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2812
enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2813
// CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2814
emit_opcode( masm, 0x3B );
2815
emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2816
// MOV $tmp,$src1.hi
2817
emit_opcode( masm, 0x8B );
2818
emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
2819
// SBB $tmp,$src2.hi\t! Compute flags for long compare
2820
emit_opcode( masm, 0x1B );
2821
emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
2824
enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2826
emit_opcode(masm,0x33); // XOR
2827
emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
2829
emit_opcode( masm, 0x3B );
2830
emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
2832
emit_opcode( masm, 0x1B );
2833
emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
2836
// Sniff, sniff... smells like Gnu Superoptimizer
2837
enc_class neg_long( eRegL dst ) %{
2838
emit_opcode(masm,0xF7); // NEG hi
2839
emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2840
emit_opcode(masm,0xF7); // NEG lo
2841
emit_rm (masm,0x3, 0x3, $dst$$reg );
2842
emit_opcode(masm,0x83); // SBB hi,0
2843
emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2847
enc_class enc_pop_rdx() %{
2848
emit_opcode(masm,0x5A);
2851
enc_class enc_rethrow() %{
2853
emit_opcode(masm, 0xE9); // jmp entry
2854
emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
2855
runtime_call_Relocation::spec(), RELOC_IMM32 );
2856
__ clear_inst_mark();
2861
// Convert a double to an int. Java semantics require we do complex
2862
// manglelations in the corner cases. So we set the rounding mode to
2863
// 'zero', store the darned double down as an int, and reset the
2864
// rounding mode to 'nearest'. The hardware throws an exception which
2865
// patches up the correct value directly to the stack.
2866
enc_class DPR2I_encoding( regDPR src ) %{
2867
// Flip to round-to-zero mode. We attempted to allow invalid-op
2868
// exceptions here, so that a NAN or other corner-case value will
2869
// thrown an exception (but normal values get converted at full speed).
2870
// However, I2C adapters and other float-stack manglers leave pending
2871
// invalid-op exceptions hanging. We would have to clear them before
2872
// enabling them and that is more expensive than just testing for the
2873
// invalid value Intel stores down in the corner cases.
2874
emit_opcode(masm,0xD9); // FLDCW trunc
2875
emit_opcode(masm,0x2D);
2876
emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2878
emit_opcode(masm,0x83); // SUB ESP,4
2879
emit_opcode(masm,0xEC);
2881
// Encoding assumes a double has been pushed into FPR0.
2882
// Store down the double as an int, popping the FPU stack
2883
emit_opcode(masm,0xDB); // FISTP [ESP]
2884
emit_opcode(masm,0x1C);
2886
// Restore the rounding mode; mask the exception
2887
emit_opcode(masm,0xD9); // FLDCW std/24-bit mode
2888
emit_opcode(masm,0x2D);
2889
emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
2890
? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2891
: (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2893
// Load the converted int; adjust CPU stack
2894
emit_opcode(masm,0x58); // POP EAX
2895
emit_opcode(masm,0x3D); // CMP EAX,imm
2896
emit_d32 (masm,0x80000000); // 0x80000000
2897
emit_opcode(masm,0x75); // JNE around_slow_call
2898
emit_d8 (masm,0x07); // Size of slow_call
2899
// Push src onto stack slow-path
2900
emit_opcode(masm,0xD9 ); // FLD ST(i)
2901
emit_d8 (masm,0xC0-1+$src$$reg );
2902
// CALL directly to the runtime
2904
emit_opcode(masm,0xE8); // Call into runtime
2905
emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2906
__ clear_inst_mark();
2911
enc_class DPR2L_encoding( regDPR src ) %{
2912
emit_opcode(masm,0xD9); // FLDCW trunc
2913
emit_opcode(masm,0x2D);
2914
emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2916
emit_opcode(masm,0x83); // SUB ESP,8
2917
emit_opcode(masm,0xEC);
2919
// Encoding assumes a double has been pushed into FPR0.
2920
// Store down the double as a long, popping the FPU stack
2921
emit_opcode(masm,0xDF); // FISTP [ESP]
2922
emit_opcode(masm,0x3C);
2924
// Restore the rounding mode; mask the exception
2925
emit_opcode(masm,0xD9); // FLDCW std/24-bit mode
2926
emit_opcode(masm,0x2D);
2927
emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
2928
? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2929
: (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2931
// Load the converted int; adjust CPU stack
2932
emit_opcode(masm,0x58); // POP EAX
2933
emit_opcode(masm,0x5A); // POP EDX
2934
emit_opcode(masm,0x81); // CMP EDX,imm
2935
emit_d8 (masm,0xFA); // rdx
2936
emit_d32 (masm,0x80000000); // 0x80000000
2937
emit_opcode(masm,0x75); // JNE around_slow_call
2938
emit_d8 (masm,0x07+4); // Size of slow_call
2939
emit_opcode(masm,0x85); // TEST EAX,EAX
2940
emit_opcode(masm,0xC0); // 2/rax,/rax,
2941
emit_opcode(masm,0x75); // JNE around_slow_call
2942
emit_d8 (masm,0x07); // Size of slow_call
2943
// Push src onto stack slow-path
2944
emit_opcode(masm,0xD9 ); // FLD ST(i)
2945
emit_d8 (masm,0xC0-1+$src$$reg );
2946
// CALL directly to the runtime
2948
emit_opcode(masm,0xE8); // Call into runtime
2949
emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2950
__ clear_inst_mark();
2955
enc_class FMul_ST_reg( eRegFPR src1 ) %{
2956
// Operand was loaded from memory into fp ST (stack top)
2957
// FMUL ST,$src /* D8 C8+i */
2958
emit_opcode(masm, 0xD8);
2959
emit_opcode(masm, 0xC8 + $src1$$reg);
2962
enc_class FAdd_ST_reg( eRegFPR src2 ) %{
2963
// FADDP ST,src2 /* D8 C0+i */
2964
emit_opcode(masm, 0xD8);
2965
emit_opcode(masm, 0xC0 + $src2$$reg);
2966
//could use FADDP src2,fpST /* DE C0+i */
2969
enc_class FAddP_reg_ST( eRegFPR src2 ) %{
2970
// FADDP src2,ST /* DE C0+i */
2971
emit_opcode(masm, 0xDE);
2972
emit_opcode(masm, 0xC0 + $src2$$reg);
2975
enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
2976
// Operand has been loaded into fp ST (stack top)
2978
emit_opcode(masm, 0xD8);
2979
emit_opcode(masm, 0xE0 + $src1$$reg);
2982
emit_opcode(masm, 0xD8);
2983
emit_opcode(masm, 0xF0 + $src2$$reg);
2986
enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
2987
// Operand was loaded from memory into fp ST (stack top)
2988
// FADD ST,$src /* D8 C0+i */
2989
emit_opcode(masm, 0xD8);
2990
emit_opcode(masm, 0xC0 + $src1$$reg);
2992
// FMUL ST,src2 /* D8 C*+i */
2993
emit_opcode(masm, 0xD8);
2994
emit_opcode(masm, 0xC8 + $src2$$reg);
2998
enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
2999
// Operand was loaded from memory into fp ST (stack top)
3000
// FADD ST,$src /* D8 C0+i */
3001
emit_opcode(masm, 0xD8);
3002
emit_opcode(masm, 0xC0 + $src1$$reg);
3004
// FMULP src2,ST /* DE C8+i */
3005
emit_opcode(masm, 0xDE);
3006
emit_opcode(masm, 0xC8 + $src2$$reg);
3009
// Atomically load the volatile long
3010
enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3011
emit_opcode(masm,0xDF);
3012
int rm_byte_opcode = 0x05;
3013
int base = $mem$$base;
3014
int index = $mem$$index;
3015
int scale = $mem$$scale;
3016
int displace = $mem$$disp;
3017
relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3018
encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3019
store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
3022
// Volatile Store Long. Must be atomic, so move it into
3023
// the FP TOS and then do a 64-bit FIST. Has to probe the
3024
// target address before the store (for null-ptr checks)
3025
// so the memory operand is used twice in the encoding.
3026
enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3027
store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
3028
__ set_inst_mark(); // Mark start of FIST in case $mem has an oop
3029
emit_opcode(masm,0xDF);
3030
int rm_byte_opcode = 0x07;
3031
int base = $mem$$base;
3032
int index = $mem$$index;
3033
int scale = $mem$$scale;
3034
int displace = $mem$$disp;
3035
relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3036
encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3037
__ clear_inst_mark();
3043
//----------FRAME--------------------------------------------------------------
3044
// Definition of frame structure and management information.
3046
// S T A C K L A Y O U T Allocators stack-slot number
3047
// | (to get allocators register number
3048
// G Owned by | | v add OptoReg::stack0())
3050
// o | +--------+ pad to even-align allocators stack-slot
3051
// w V | pad0 | numbers; owned by CALLER
3052
// t -----------+--------+----> Matcher::_in_arg_limit, unaligned
3054
// | | args | 4 Holes in incoming args owned by SELF
3057
// V | | old out| Empty on Intel, window on Sparc
3058
// | old |preserve| Must be even aligned.
3059
// | SP-+--------+----> Matcher::_old_SP, even aligned
3060
// | | in | 3 area for Intel ret address
3061
// Owned by |preserve| Empty on Sparc.
3063
// | | pad2 | 2 pad to align old SP
3066
// | +--------+----> OptoReg::stack0(), even aligned
3067
// | | pad1 | 11 pad to align new SP
3070
// | | spills | 9 spills
3071
// V | | 8 (pad0 slot for callee)
3072
// -----------+--------+----> Matcher::_out_arg_limit, unaligned
3074
// | | args | 6 Holes in outgoing args owned by CALLEE
3075
// Owned by +--------+
3076
// CALLEE | new out| 6 Empty on Intel, window on Sparc
3077
// | new |preserve| Must be even-aligned.
3078
// | SP-+--------+----> Matcher::_new_SP, even aligned
3081
// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
3082
// known from SELF's arguments and the Java calling convention.
3083
// Region 6-7 is determined per call site.
3084
// Note 2: If the calling convention leaves holes in the incoming argument
3085
// area, those holes are owned by SELF. Holes in the outgoing area
3086
// are owned by the CALLEE. Holes should not be necessary in the
3087
// incoming area, as the Java calling convention is completely under
3088
// the control of the AD file. Doubles can be sorted and packed to
3089
// avoid holes. Holes in the outgoing arguments may be necessary for
3090
// varargs C calling conventions.
3091
// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
3092
// even aligned with pad0 as needed.
3093
// Region 6 is even aligned. Region 6-7 is NOT even aligned;
3094
// region 6-11 is even aligned; it may be padded out more so that
3095
// the region from SP to FP meets the minimum stack alignment.
3098
// These three registers define part of the calling convention
3099
// between compiled code and the interpreter.
3100
inline_cache_reg(EAX); // Inline Cache Register
3102
// Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3103
cisc_spilling_operand_name(indOffset32);
3105
// Number of stack slots consumed by locking an object
3106
sync_stack_slots(1);
3108
// Compiled code's Frame Pointer
3110
// Interpreter stores its frame pointer in a register which is
3111
// stored to the stack by I2CAdaptors.
3112
// I2CAdaptors convert from interpreted java to compiled java.
3113
interpreter_frame_pointer(EBP);
3115
// Stack alignment requirement
3116
// Alignment size in bytes (128-bit -> 16 bytes)
3117
stack_alignment(StackAlignmentInBytes);
3119
// Number of outgoing stack slots killed above the out_preserve_stack_slots
3120
// for calls to C. Supports the var-args backing area for register parms.
3121
varargs_C_out_slots_killed(0);
3123
// The after-PROLOG location of the return address. Location of
3124
// return address specifies a type (REG or STACK) and a number
3125
// representing the register number (i.e. - use a register name) or
3127
// Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3128
// Otherwise, it is above the locks and verification slot and alignment word
3129
return_addr(STACK - 1 +
3130
align_up((Compile::current()->in_preserve_stack_slots() +
3131
Compile::current()->fixed_slots()),
3132
stack_alignment_in_slots()));
3134
// Location of C & interpreter return values
3136
assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3137
static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3138
static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3140
// in SSE2+ mode we want to keep the FPU stack clean so pretend
3141
// that C functions return float and double results in XMM0.
3142
if( ideal_reg == Op_RegD && UseSSE>=2 )
3143
return OptoRegPair(XMM0b_num,XMM0_num);
3144
if( ideal_reg == Op_RegF && UseSSE>=2 )
3145
return OptoRegPair(OptoReg::Bad,XMM0_num);
3147
return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3150
// Location of return values
3152
assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3153
static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num };
3154
static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3155
if( ideal_reg == Op_RegD && UseSSE>=2 )
3156
return OptoRegPair(XMM0b_num,XMM0_num);
3157
if( ideal_reg == Op_RegF && UseSSE>=1 )
3158
return OptoRegPair(OptoReg::Bad,XMM0_num);
3159
return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3164
//----------ATTRIBUTES---------------------------------------------------------
3165
//----------Operand Attributes-------------------------------------------------
3166
op_attrib op_cost(0); // Required cost attribute
3168
//----------Instruction Attributes---------------------------------------------
3169
ins_attrib ins_cost(100); // Required cost attribute
3170
ins_attrib ins_size(8); // Required size attribute (in bits)
3171
ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3172
// non-matching short branch variant of some
3174
ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2)
3175
// specifies the alignment that some part of the instruction (not
3176
// necessarily the start) requires. If > 1, a compute_padding()
3177
// function must be provided for the instruction
3179
//----------OPERANDS-----------------------------------------------------------
3180
// Operand definitions must precede instruction definitions for correct parsing
3181
// in the ADLC because operands constitute user defined types which are used in
3182
// instruction definitions.
3184
//----------Simple Operands----------------------------------------------------
3185
// Immediate Operands
3192
interface(CONST_INTER);
3195
// Constant for test vs zero
3197
predicate(n->get_int() == 0);
3202
interface(CONST_INTER);
3205
// Constant for increment
3207
predicate(n->get_int() == 1);
3212
interface(CONST_INTER);
3215
// Constant for decrement
3217
predicate(n->get_int() == -1);
3222
interface(CONST_INTER);
3225
// Valid scale values for addressing modes
3227
predicate(0 <= n->get_int() && (n->get_int() <= 3));
3231
interface(CONST_INTER);
3235
predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3240
interface(CONST_INTER);
3244
predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3249
interface(CONST_INTER);
3253
predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3258
interface(CONST_INTER);
3261
// Int Immediate non-negative
3264
predicate(n->get_int() >= 0);
3269
interface(CONST_INTER);
3272
// Constant for long shifts
3274
predicate( n->get_int() == 32 );
3279
interface(CONST_INTER);
3282
operand immI_1_31() %{
3283
predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3288
interface(CONST_INTER);
3291
operand immI_32_63() %{
3292
predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3297
interface(CONST_INTER);
3301
predicate( n->get_int() == 2 );
3306
interface(CONST_INTER);
3310
predicate( n->get_int() == 3 );
3315
interface(CONST_INTER);
3320
predicate(n->get_int() == 4);
3325
interface(CONST_INTER);
3330
predicate(n->get_int() == 8);
3335
interface(CONST_INTER);
3344
interface(CONST_INTER);
3347
// Null Pointer Immediate
3349
predicate( n->get_ptr() == 0 );
3354
interface(CONST_INTER);
3363
interface(CONST_INTER);
3366
// Long Immediate zero
3368
predicate( n->get_long() == 0L );
3373
interface(CONST_INTER);
3376
// Long Immediate zero
3378
predicate( n->get_long() == -1L );
3383
interface(CONST_INTER);
3386
// Long immediate from 0 to 127.
3387
// Used for a shorter form of long mul by 10.
3388
operand immL_127() %{
3389
predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3394
interface(CONST_INTER);
3397
// Long Immediate: low 32-bit mask
3398
operand immL_32bits() %{
3399
predicate(n->get_long() == 0xFFFFFFFFL);
3404
interface(CONST_INTER);
3407
// Long Immediate: low 32-bit mask
3409
predicate(n->get_long() == (int)(n->get_long()));
3414
interface(CONST_INTER);
3417
//Double Immediate zero
3419
// Do additional (and counter-intuitive) test against NaN to work around VC++
3420
// bug that generates code such that NaNs compare equal to 0.0
3421
predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3426
interface(CONST_INTER);
3429
// Double Immediate one
3431
predicate( UseSSE<=1 && n->getd() == 1.0 );
3436
interface(CONST_INTER);
3441
predicate(UseSSE<=1);
3446
interface(CONST_INTER);
3450
predicate(UseSSE>=2);
3455
interface(CONST_INTER);
3458
// Double Immediate zero
3460
// Do additional (and counter-intuitive) test against NaN to work around VC++
3461
// bug that generates code such that NaNs compare equal to 0.0 AND do not
3462
// compare equal to -0.0.
3463
predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3467
interface(CONST_INTER);
3470
// Float Immediate zero
3472
predicate(UseSSE == 0 && n->getf() == 0.0F);
3477
interface(CONST_INTER);
3480
// Float Immediate one
3482
predicate(UseSSE == 0 && n->getf() == 1.0F);
3487
interface(CONST_INTER);
3492
predicate( UseSSE == 0 );
3497
interface(CONST_INTER);
3502
predicate(UseSSE >= 1);
3507
interface(CONST_INTER);
3510
// Float Immediate zero. Zero and not -0.0
3512
predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3517
interface(CONST_INTER);
3520
// Immediates for special shifts (sign extend)
3522
// Constants for increment
3524
predicate( n->get_int() == 16 );
3528
interface(CONST_INTER);
3532
predicate( n->get_int() == 24 );
3536
interface(CONST_INTER);
3539
// Constant for byte-wide masking
3540
operand immI_255() %{
3541
predicate( n->get_int() == 255 );
3545
interface(CONST_INTER);
3548
// Constant for short-wide masking
3549
operand immI_65535() %{
3550
predicate(n->get_int() == 65535);
3554
interface(CONST_INTER);
3559
constraint(ALLOC_IN_RC(vectmask_reg));
3562
interface(REG_INTER);
3568
constraint(ALLOC_IN_RC(int_reg));
3579
interface(REG_INTER);
3582
// Subset of Integer Register
3583
operand xRegI(rRegI reg) %{
3584
constraint(ALLOC_IN_RC(int_x_reg));
3592
interface(REG_INTER);
3596
operand eAXRegI(xRegI reg) %{
3597
constraint(ALLOC_IN_RC(eax_reg));
3602
interface(REG_INTER);
3606
operand eBXRegI(xRegI reg) %{
3607
constraint(ALLOC_IN_RC(ebx_reg));
3612
interface(REG_INTER);
3615
operand eCXRegI(xRegI reg) %{
3616
constraint(ALLOC_IN_RC(ecx_reg));
3621
interface(REG_INTER);
3624
operand eDXRegI(xRegI reg) %{
3625
constraint(ALLOC_IN_RC(edx_reg));
3630
interface(REG_INTER);
3633
operand eDIRegI(xRegI reg) %{
3634
constraint(ALLOC_IN_RC(edi_reg));
3639
interface(REG_INTER);
3642
operand nadxRegI() %{
3643
constraint(ALLOC_IN_RC(nadx_reg));
3651
interface(REG_INTER);
3655
constraint(ALLOC_IN_RC(ncx_reg));
3663
interface(REG_INTER);
3666
// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3668
operand eSIRegI(xRegI reg) %{
3669
constraint(ALLOC_IN_RC(esi_reg));
3674
interface(REG_INTER);
3679
constraint(ALLOC_IN_RC(any_reg));
3688
interface(REG_INTER);
3692
constraint(ALLOC_IN_RC(int_reg));
3700
interface(REG_INTER);
3704
constraint(ALLOC_IN_RC(int_reg));
3712
interface(REG_INTER);
3715
// On windows95, EBP is not safe to use for implicit null tests.
3716
operand eRegP_no_EBP() %{
3717
constraint(ALLOC_IN_RC(int_reg_no_ebp));
3726
interface(REG_INTER);
3730
constraint(ALLOC_IN_RC(p_reg));
3738
interface(REG_INTER);
3742
// Return a pointer value
3743
operand eAXRegP(eRegP reg) %{
3744
constraint(ALLOC_IN_RC(eax_reg));
3747
interface(REG_INTER);
3751
operand eBXRegP(eRegP reg) %{
3752
constraint(ALLOC_IN_RC(ebx_reg));
3755
interface(REG_INTER);
3758
// Tail-call (interprocedural jump) to interpreter
3759
operand eCXRegP(eRegP reg) %{
3760
constraint(ALLOC_IN_RC(ecx_reg));
3763
interface(REG_INTER);
3766
operand eDXRegP(eRegP reg) %{
3767
constraint(ALLOC_IN_RC(edx_reg));
3770
interface(REG_INTER);
3773
operand eSIRegP(eRegP reg) %{
3774
constraint(ALLOC_IN_RC(esi_reg));
3777
interface(REG_INTER);
3781
operand eDIRegP(eRegP reg) %{
3782
constraint(ALLOC_IN_RC(edi_reg));
3785
interface(REG_INTER);
3789
constraint(ALLOC_IN_RC(long_reg));
3794
interface(REG_INTER);
3797
operand eADXRegL( eRegL reg ) %{
3798
constraint(ALLOC_IN_RC(eadx_reg));
3801
format %{ "EDX:EAX" %}
3802
interface(REG_INTER);
3805
operand eBCXRegL( eRegL reg ) %{
3806
constraint(ALLOC_IN_RC(ebcx_reg));
3809
format %{ "EBX:ECX" %}
3810
interface(REG_INTER);
3813
operand eBDPRegL( eRegL reg ) %{
3814
constraint(ALLOC_IN_RC(ebpd_reg));
3817
format %{ "EBP:EDI" %}
3818
interface(REG_INTER);
3820
// Special case for integer high multiply
3821
operand eADXRegL_low_only() %{
3822
constraint(ALLOC_IN_RC(eadx_reg));
3826
interface(REG_INTER);
3829
// Flags register, used as output of compare instructions
3830
operand rFlagsReg() %{
3831
constraint(ALLOC_IN_RC(int_flags));
3834
format %{ "EFLAGS" %}
3835
interface(REG_INTER);
3838
// Flags register, used as output of compare instructions
3839
operand eFlagsReg() %{
3840
constraint(ALLOC_IN_RC(int_flags));
3843
format %{ "EFLAGS" %}
3844
interface(REG_INTER);
3847
// Flags register, used as output of FLOATING POINT compare instructions
3848
operand eFlagsRegU() %{
3849
constraint(ALLOC_IN_RC(int_flags));
3852
format %{ "EFLAGS_U" %}
3853
interface(REG_INTER);
3856
operand eFlagsRegUCF() %{
3857
constraint(ALLOC_IN_RC(int_flags));
3861
format %{ "EFLAGS_U_CF" %}
3862
interface(REG_INTER);
3865
// Condition Code Register used by long compare
3866
operand flagsReg_long_LTGE() %{
3867
constraint(ALLOC_IN_RC(int_flags));
3869
format %{ "FLAGS_LTGE" %}
3870
interface(REG_INTER);
3872
operand flagsReg_long_EQNE() %{
3873
constraint(ALLOC_IN_RC(int_flags));
3875
format %{ "FLAGS_EQNE" %}
3876
interface(REG_INTER);
3878
operand flagsReg_long_LEGT() %{
3879
constraint(ALLOC_IN_RC(int_flags));
3881
format %{ "FLAGS_LEGT" %}
3882
interface(REG_INTER);
3885
// Condition Code Register used by unsigned long compare
3886
operand flagsReg_ulong_LTGE() %{
3887
constraint(ALLOC_IN_RC(int_flags));
3889
format %{ "FLAGS_U_LTGE" %}
3890
interface(REG_INTER);
3892
operand flagsReg_ulong_EQNE() %{
3893
constraint(ALLOC_IN_RC(int_flags));
3895
format %{ "FLAGS_U_EQNE" %}
3896
interface(REG_INTER);
3898
operand flagsReg_ulong_LEGT() %{
3899
constraint(ALLOC_IN_RC(int_flags));
3901
format %{ "FLAGS_U_LEGT" %}
3902
interface(REG_INTER);
3905
// Float register operands
3907
predicate( UseSSE < 2 );
3908
constraint(ALLOC_IN_RC(fp_dbl_reg));
3913
interface(REG_INTER);
3916
operand regDPR1(regDPR reg) %{
3917
predicate( UseSSE < 2 );
3918
constraint(ALLOC_IN_RC(fp_dbl_reg0));
3921
interface(REG_INTER);
3924
operand regDPR2(regDPR reg) %{
3925
predicate( UseSSE < 2 );
3926
constraint(ALLOC_IN_RC(fp_dbl_reg1));
3929
interface(REG_INTER);
3932
operand regnotDPR1(regDPR reg) %{
3933
predicate( UseSSE < 2 );
3934
constraint(ALLOC_IN_RC(fp_dbl_notreg0));
3937
interface(REG_INTER);
3940
// Float register operands
3942
predicate( UseSSE < 2 );
3943
constraint(ALLOC_IN_RC(fp_flt_reg));
3947
interface(REG_INTER);
3950
// Float register operands
3951
operand regFPR1(regFPR reg) %{
3952
predicate( UseSSE < 2 );
3953
constraint(ALLOC_IN_RC(fp_flt_reg0));
3956
interface(REG_INTER);
3959
// XMM Float register operands
3961
predicate( UseSSE>=1 );
3962
constraint(ALLOC_IN_RC(float_reg_legacy));
3965
interface(REG_INTER);
3969
predicate( UseSSE>=1 );
3970
constraint(ALLOC_IN_RC(float_reg_legacy));
3973
interface(REG_INTER);
3976
// Float register operands
3978
constraint(ALLOC_IN_RC(float_reg_vl));
3982
interface(REG_INTER);
3985
// XMM Double register operands
3987
predicate( UseSSE>=2 );
3988
constraint(ALLOC_IN_RC(double_reg_legacy));
3991
interface(REG_INTER);
3994
// Double register operands
3996
predicate( UseSSE>=2 );
3997
constraint(ALLOC_IN_RC(double_reg_legacy));
4000
interface(REG_INTER);
4004
constraint(ALLOC_IN_RC(double_reg_vl));
4008
interface(REG_INTER);
4011
//----------Memory Operands----------------------------------------------------
4012
// Direct Memory Operand
4013
operand direct(immP addr) %{
4016
format %{ "[$addr]" %}
4017
interface(MEMORY_INTER) %{
4025
// Indirect Memory Operand
4026
operand indirect(eRegP reg) %{
4027
constraint(ALLOC_IN_RC(int_reg));
4030
format %{ "[$reg]" %}
4031
interface(MEMORY_INTER) %{
4039
// Indirect Memory Plus Short Offset Operand
4040
operand indOffset8(eRegP reg, immI8 off) %{
4041
match(AddP reg off);
4043
format %{ "[$reg + $off]" %}
4044
interface(MEMORY_INTER) %{
4052
// Indirect Memory Plus Long Offset Operand
4053
operand indOffset32(eRegP reg, immI off) %{
4054
match(AddP reg off);
4056
format %{ "[$reg + $off]" %}
4057
interface(MEMORY_INTER) %{
4065
// Indirect Memory Plus Long Offset Operand
4066
operand indOffset32X(rRegI reg, immP off) %{
4067
match(AddP off reg);
4069
format %{ "[$reg + $off]" %}
4070
interface(MEMORY_INTER) %{
4078
// Indirect Memory Plus Index Register Plus Offset Operand
4079
operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4080
match(AddP (AddP reg ireg) off);
4083
format %{"[$reg + $off + $ireg]" %}
4084
interface(MEMORY_INTER) %{
4092
// Indirect Memory Plus Index Register Plus Offset Operand
4093
operand indIndex(eRegP reg, rRegI ireg) %{
4094
match(AddP reg ireg);
4097
format %{"[$reg + $ireg]" %}
4098
interface(MEMORY_INTER) %{
4106
// // -------------------------------------------------------------------------
4107
// // 486 architecture doesn't support "scale * index + offset" with out a base
4108
// // -------------------------------------------------------------------------
4109
// // Scaled Memory Operands
4110
// // Indirect Memory Times Scale Plus Offset Operand
4111
// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4112
// match(AddP off (LShiftI ireg scale));
4115
// format %{"[$off + $ireg << $scale]" %}
4116
// interface(MEMORY_INTER) %{
4124
// Indirect Memory Times Scale Plus Index Register
4125
operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4126
match(AddP reg (LShiftI ireg scale));
4129
format %{"[$reg + $ireg << $scale]" %}
4130
interface(MEMORY_INTER) %{
4138
// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4139
operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4140
match(AddP (AddP reg (LShiftI ireg scale)) off);
4143
format %{"[$reg + $off + $ireg << $scale]" %}
4144
interface(MEMORY_INTER) %{
4152
//----------Load Long Memory Operands------------------------------------------
4153
// The load-long idiom will use it's address expression again after loading
4154
// the first word of the long. If the load-long destination overlaps with
4155
// registers used in the addressing expression, the 2nd half will be loaded
4156
// from a clobbered address. Fix this by requiring that load-long use
4157
// address registers that do not overlap with the load-long target.
4160
operand load_long_RegP() %{
4161
constraint(ALLOC_IN_RC(esi_reg));
4166
interface(REG_INTER);
4169
// Indirect Memory Operand Long
4170
operand load_long_indirect(load_long_RegP reg) %{
4171
constraint(ALLOC_IN_RC(esi_reg));
4174
format %{ "[$reg]" %}
4175
interface(MEMORY_INTER) %{
4183
// Indirect Memory Plus Long Offset Operand
4184
operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4185
match(AddP reg off);
4187
format %{ "[$reg + $off]" %}
4188
interface(MEMORY_INTER) %{
4196
opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4199
//----------Special Memory Operands--------------------------------------------
4200
// Stack Slot Operand - This operand is used for loading and storing temporary
4201
// values on the stack where a match requires a value to
4202
// flow through memory.
4203
operand stackSlotP(sRegP reg) %{
4204
constraint(ALLOC_IN_RC(stack_slots));
4205
// No match rule because this operand is only generated in matching
4206
format %{ "[$reg]" %}
4207
interface(MEMORY_INTER) %{
4209
index(0x4); // No Index
4210
scale(0x0); // No Scale
4211
disp($reg); // Stack Offset
4215
operand stackSlotI(sRegI reg) %{
4216
constraint(ALLOC_IN_RC(stack_slots));
4217
// No match rule because this operand is only generated in matching
4218
format %{ "[$reg]" %}
4219
interface(MEMORY_INTER) %{
4221
index(0x4); // No Index
4222
scale(0x0); // No Scale
4223
disp($reg); // Stack Offset
4227
operand stackSlotF(sRegF reg) %{
4228
constraint(ALLOC_IN_RC(stack_slots));
4229
// No match rule because this operand is only generated in matching
4230
format %{ "[$reg]" %}
4231
interface(MEMORY_INTER) %{
4233
index(0x4); // No Index
4234
scale(0x0); // No Scale
4235
disp($reg); // Stack Offset
4239
operand stackSlotD(sRegD reg) %{
4240
constraint(ALLOC_IN_RC(stack_slots));
4241
// No match rule because this operand is only generated in matching
4242
format %{ "[$reg]" %}
4243
interface(MEMORY_INTER) %{
4245
index(0x4); // No Index
4246
scale(0x0); // No Scale
4247
disp($reg); // Stack Offset
4251
operand stackSlotL(sRegL reg) %{
4252
constraint(ALLOC_IN_RC(stack_slots));
4253
// No match rule because this operand is only generated in matching
4254
format %{ "[$reg]" %}
4255
interface(MEMORY_INTER) %{
4257
index(0x4); // No Index
4258
scale(0x0); // No Scale
4259
disp($reg); // Stack Offset
4263
//----------Conditional Branch Operands----------------------------------------
4264
// Comparison Op - This is the operation of the comparison, and is limited to
4265
// the following set of codes:
4266
// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4268
// Other attributes of the comparison, such as unsignedness, are specified
4269
// by the comparison instruction that sets a condition code flags register.
4270
// That result is represented by a flags operand whose subtype is appropriate
4271
// to the unsignedness (etc.) of the comparison.
4273
// Later, the instruction which matches both the Comparison Op (a Bool) and
4274
// the flags (produced by the Cmp) specifies the coding of the comparison op
4275
// by matching a specific subtype of Bool operand below, such as cmpOpU.
4282
interface(COND_INTER) %{
4284
not_equal(0x5, "ne");
4286
greater_equal(0xD, "ge");
4287
less_equal(0xE, "le");
4290
no_overflow(0x1, "no");
4294
// Comparison Code, unsigned compare. Used by FP also, with
4295
// C2 (unordered) turned into GT or LT already. The other bits
4296
// C0 and C3 are turned into Carry & Zero flags.
4301
interface(COND_INTER) %{
4303
not_equal(0x5, "ne");
4305
greater_equal(0x3, "nb");
4306
less_equal(0x6, "be");
4307
greater(0x7, "nbe");
4309
no_overflow(0x1, "no");
4313
// Floating comparisons that don't require any fixup for the unordered case
4314
operand cmpOpUCF() %{
4316
predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4317
n->as_Bool()->_test._test == BoolTest::ge ||
4318
n->as_Bool()->_test._test == BoolTest::le ||
4319
n->as_Bool()->_test._test == BoolTest::gt);
4321
interface(COND_INTER) %{
4323
not_equal(0x5, "ne");
4325
greater_equal(0x3, "nb");
4326
less_equal(0x6, "be");
4327
greater(0x7, "nbe");
4329
no_overflow(0x1, "no");
4334
// Floating comparisons that can be fixed up with extra conditional jumps
4335
operand cmpOpUCF2() %{
4337
predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4338
n->as_Bool()->_test._test == BoolTest::eq);
4340
interface(COND_INTER) %{
4342
not_equal(0x5, "ne");
4344
greater_equal(0x3, "nb");
4345
less_equal(0x6, "be");
4346
greater(0x7, "nbe");
4348
no_overflow(0x1, "no");
4352
// Comparison Code for FP conditional move
4353
operand cmpOp_fcmov() %{
4356
predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4357
n->as_Bool()->_test._test != BoolTest::no_overflow);
4359
interface(COND_INTER) %{
4363
greater_equal(0x1C0);
4366
overflow(0x0, "o"); // not really supported by the instruction
4367
no_overflow(0x1, "no"); // not really supported by the instruction
4371
// Comparison Code used in long compares
4372
operand cmpOp_commute() %{
4376
interface(COND_INTER) %{
4378
not_equal(0x5, "ne");
4380
greater_equal(0xE, "le");
4381
less_equal(0xD, "ge");
4384
no_overflow(0x1, "no");
4388
// Comparison Code used in unsigned long compares
4389
operand cmpOpU_commute() %{
4393
interface(COND_INTER) %{
4395
not_equal(0x5, "ne");
4397
greater_equal(0x6, "be");
4398
less_equal(0x3, "nb");
4401
no_overflow(0x1, "no");
4405
//----------OPERAND CLASSES----------------------------------------------------
4406
// Operand Classes are groups of operands that are used as to simplify
4407
// instruction definitions by not requiring the AD writer to specify separate
4408
// instructions for every form of operand when the instruction accepts
4409
// multiple operand types with the same basic encoding and format. The classic
4410
// case of this is memory operands.
4412
opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4413
indIndex, indIndexScale, indIndexScaleOffset);
4415
// Long memory operations are encoded in 2 instructions and a +4 offset.
4416
// This means some kind of offset is always required and you cannot use
4417
// an oop as the offset (done when working on static globals).
4418
opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4419
indIndex, indIndexScale, indIndexScaleOffset);
4422
//----------PIPELINE-----------------------------------------------------------
4423
// Rules which define the behavior of the target architectures pipeline.
4426
//----------ATTRIBUTES---------------------------------------------------------
4428
variable_size_instructions; // Fixed size instructions
4429
max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4430
instruction_unit_size = 1; // An instruction is 1 bytes long
4431
instruction_fetch_unit_size = 16; // The processor fetches one line
4432
instruction_fetch_units = 1; // of 16 bytes
4434
// List of nop instructions
4438
//----------RESOURCES----------------------------------------------------------
4439
// Resources are the functional units available to the machine
4441
// Generic P2/P3 pipeline
4442
// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4443
// 3 instructions decoded per cycle.
4444
// 2 load/store ops per cycle, 1 branch, 1 FPU,
4445
// 2 ALU op, only ALU0 handles mul/div instructions.
4446
resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4447
MS0, MS1, MEM = MS0 | MS1,
4449
ALU0, ALU1, ALU = ALU0 | ALU1 );
4451
//----------PIPELINE DESCRIPTION-----------------------------------------------
4452
// Pipeline Description specifies the stages in the machine's pipeline
4454
// Generic P2/P3 pipeline
4455
pipe_desc(S0, S1, S2, S3, S4, S5);
4457
//----------PIPELINE CLASSES---------------------------------------------------
4458
// Pipeline Classes describe the stages in which input and output are
4459
// referenced by the hardware pipeline.
4461
// Naming convention: ialu or fpu
4463
// Then: _reg if there is a 2nd register
4464
// Then: _long if it's a pair of instructions implementing a long
4465
// Then: _fat if it requires the big decoder
4466
// Or: _mem if it requires the big decoder and a memory unit.
4468
// Integer ALU reg operation
4469
pipe_class ialu_reg(rRegI dst) %{
4473
DECODE : S0; // any decoder
4474
ALU : S3; // any alu
4477
// Long ALU reg operation
4478
pipe_class ialu_reg_long(eRegL dst) %{
4479
instruction_count(2);
4482
DECODE : S0(2); // any 2 decoders
4483
ALU : S3(2); // both alus
4486
// Integer ALU reg operation using big decoder
4487
pipe_class ialu_reg_fat(rRegI dst) %{
4491
D0 : S0; // big decoder only
4492
ALU : S3; // any alu
4495
// Long ALU reg operation using big decoder
4496
pipe_class ialu_reg_long_fat(eRegL dst) %{
4497
instruction_count(2);
4500
D0 : S0(2); // big decoder only; twice
4501
ALU : S3(2); // any 2 alus
4504
// Integer ALU reg-reg operation
4505
pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4509
DECODE : S0; // any decoder
4510
ALU : S3; // any alu
4513
// Long ALU reg-reg operation
4514
pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4515
instruction_count(2);
4518
DECODE : S0(2); // any 2 decoders
4519
ALU : S3(2); // both alus
4522
// Integer ALU reg-reg operation
4523
pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4527
D0 : S0; // big decoder only
4528
ALU : S3; // any alu
4531
// Long ALU reg-reg operation
4532
pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4533
instruction_count(2);
4536
D0 : S0(2); // big decoder only; twice
4537
ALU : S3(2); // both alus
4540
// Integer ALU reg-mem operation
4541
pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4545
D0 : S0; // big decoder only
4546
ALU : S4; // any alu
4547
MEM : S3; // any mem
4550
// Long ALU reg-mem operation
4551
pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4552
instruction_count(2);
4555
D0 : S0(2); // big decoder only; twice
4556
ALU : S4(2); // any 2 alus
4557
MEM : S3(2); // both mems
4560
// Integer mem operation (prefetch)
4561
pipe_class ialu_mem(memory mem)
4565
D0 : S0; // big decoder only
4566
MEM : S3; // any mem
4569
// Integer Store to Memory
4570
pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4574
D0 : S0; // big decoder only
4575
ALU : S4; // any alu
4579
// Long Store to Memory
4580
pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4581
instruction_count(2);
4584
D0 : S0(2); // big decoder only; twice
4585
ALU : S4(2); // any 2 alus
4586
MEM : S3(2); // Both mems
4589
// Integer Store to Memory
4590
pipe_class ialu_mem_imm(memory mem) %{
4593
D0 : S0; // big decoder only
4594
ALU : S4; // any alu
4598
// Integer ALU0 reg-reg operation
4599
pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4603
D0 : S0; // Big decoder only
4604
ALU0 : S3; // only alu0
4607
// Integer ALU0 reg-mem operation
4608
pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4612
D0 : S0; // big decoder only
4613
ALU0 : S4; // ALU0 only
4614
MEM : S3; // any mem
4617
// Integer ALU reg-reg operation
4618
pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4623
DECODE : S0; // any decoder
4624
ALU : S3; // any alu
4627
// Integer ALU reg-imm operation
4628
pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4632
DECODE : S0; // any decoder
4633
ALU : S3; // any alu
4636
// Integer ALU reg-mem operation
4637
pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4642
D0 : S0; // big decoder only
4643
ALU : S4; // any alu
4647
// Conditional move reg-reg
4648
pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4649
instruction_count(4);
4653
DECODE : S0(4); // any decoder
4656
// Conditional move reg-reg
4657
pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4662
DECODE : S0; // any decoder
4665
// Conditional move reg-mem
4666
pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4671
DECODE : S0; // any decoder
4675
// Conditional move reg-reg long
4676
pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4681
DECODE : S0(2); // any 2 decoders
4684
// Conditional move double reg-reg
4685
pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4690
DECODE : S0; // any decoder
4693
// Float reg-reg operation
4694
pipe_class fpu_reg(regDPR dst) %{
4695
instruction_count(2);
4697
DECODE : S0(2); // any 2 decoders
4701
// Float reg-reg operation
4702
pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4703
instruction_count(2);
4706
DECODE : S0(2); // any 2 decoders
4710
// Float reg-reg operation
4711
pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4712
instruction_count(3);
4716
DECODE : S0(3); // any 3 decoders
4720
// Float reg-reg operation
4721
pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4722
instruction_count(4);
4727
DECODE : S0(4); // any 3 decoders
4731
// Float reg-reg operation
4732
pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4733
instruction_count(4);
4738
DECODE : S1(3); // any 3 decoders
4739
D0 : S0; // Big decoder only
4744
// Float reg-mem operation
4745
pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4746
instruction_count(2);
4749
D0 : S0; // big decoder only
4750
DECODE : S1; // any decoder for FPU POP
4752
MEM : S3; // any mem
4755
// Float reg-mem operation
4756
pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4757
instruction_count(3);
4761
D0 : S0; // big decoder only
4762
DECODE : S1(2); // any decoder for FPU POP
4764
MEM : S3; // any mem
4767
// Float mem-reg operation
4768
pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4769
instruction_count(2);
4772
DECODE : S0; // any decoder for FPU PUSH
4773
D0 : S1; // big decoder only
4775
MEM : S3; // any mem
4778
pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4779
instruction_count(3);
4783
DECODE : S0(2); // any decoder for FPU PUSH
4784
D0 : S1; // big decoder only
4786
MEM : S3; // any mem
4789
pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4790
instruction_count(3);
4794
DECODE : S0; // any decoder for FPU PUSH
4795
D0 : S0(2); // big decoder only
4797
MEM : S3(2); // any mem
4800
pipe_class fpu_mem_mem(memory dst, memory src1) %{
4801
instruction_count(2);
4804
D0 : S0(2); // big decoder only
4805
MEM : S3(2); // any mem
4808
pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4809
instruction_count(3);
4813
D0 : S0(3); // big decoder only
4815
MEM : S3(3); // any mem
4818
pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4819
instruction_count(3);
4822
DECODE : S0; // any decoder for FPU PUSH
4823
D0 : S0(2); // big decoder only
4825
MEM : S3(2); // any mem
4828
// Float load constant
4829
pipe_class fpu_reg_con(regDPR dst) %{
4830
instruction_count(2);
4832
D0 : S0; // big decoder only for the load
4833
DECODE : S1; // any decoder for FPU POP
4835
MEM : S3; // any mem
4838
// Float load constant
4839
pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4840
instruction_count(3);
4843
D0 : S0; // big decoder only for the load
4844
DECODE : S1(2); // any decoder for FPU POP
4846
MEM : S3; // any mem
4849
// UnConditional branch
4850
pipe_class pipe_jmp( label labl ) %{
4855
// Conditional branch
4856
pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
4863
pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
4864
instruction_count(1); force_serialization;
4866
heap_ptr : S3(read);
4875
// Generic big/slow expanded idiom
4876
pipe_class pipe_slow( ) %{
4877
instruction_count(10); multiple_bundles; force_serialization;
4883
// The real do-nothing guy
4884
pipe_class empty( ) %{
4885
instruction_count(0);
4888
// Define the class for the Nop node
4895
//----------INSTRUCTIONS-------------------------------------------------------
4897
// match -- States which machine-independent subtree may be replaced
4898
// by this instruction.
4899
// ins_cost -- The estimated cost of this instruction is used by instruction
4900
// selection to identify a minimum cost tree of machine
4901
// instructions that matches a tree of machine-independent
4903
// format -- A string providing the disassembly for this instruction.
4904
// The value of an instruction's operand may be inserted
4905
// by referring to it with a '$' prefix.
4906
// opcode -- Three instruction opcodes may be provided. These are referred
4907
// to within an encode class as $primary, $secondary, and $tertiary
4908
// respectively. The primary opcode is commonly used to
4909
// indicate the type of machine instruction, while secondary
4910
// and tertiary are often used for prefix options or addressing
4912
// ins_encode -- A list of encode classes with parameters. The encode class
4913
// name must have been defined in an 'enc_class' specification
4914
// in the encode section of the architecture description.
4916
// Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
4918
instruct MoveF2LEG(legRegF dst, regF src) %{
4920
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
4922
ShouldNotReachHere();
4924
ins_pipe( fpu_reg_reg );
4928
instruct MoveLEG2F(regF dst, legRegF src) %{
4930
format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
4932
ShouldNotReachHere();
4934
ins_pipe( fpu_reg_reg );
4938
instruct MoveF2VL(vlRegF dst, regF src) %{
4940
format %{ "movss $dst,$src\t! load float (4 bytes)" %}
4942
ShouldNotReachHere();
4944
ins_pipe( fpu_reg_reg );
4948
instruct MoveVL2F(regF dst, vlRegF src) %{
4950
format %{ "movss $dst,$src\t! load float (4 bytes)" %}
4952
ShouldNotReachHere();
4954
ins_pipe( fpu_reg_reg );
4960
instruct MoveD2LEG(legRegD dst, regD src) %{
4962
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
4964
ShouldNotReachHere();
4966
ins_pipe( fpu_reg_reg );
4970
instruct MoveLEG2D(regD dst, legRegD src) %{
4972
format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
4974
ShouldNotReachHere();
4976
ins_pipe( fpu_reg_reg );
4980
instruct MoveD2VL(vlRegD dst, regD src) %{
4982
format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
4984
ShouldNotReachHere();
4986
ins_pipe( fpu_reg_reg );
4990
instruct MoveVL2D(regD dst, vlRegD src) %{
4992
format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
4994
ShouldNotReachHere();
4996
ins_pipe( fpu_reg_reg );
4999
//----------BSWAP-Instruction--------------------------------------------------
5000
instruct bytes_reverse_int(rRegI dst) %{
5001
match(Set dst (ReverseBytesI dst));
5003
format %{ "BSWAP $dst" %}
5005
ins_encode( OpcP, OpcSReg(dst) );
5006
ins_pipe( ialu_reg );
5009
instruct bytes_reverse_long(eRegL dst) %{
5010
match(Set dst (ReverseBytesL dst));
5012
format %{ "BSWAP $dst.lo\n\t"
5014
"XCHG $dst.lo $dst.hi" %}
5017
ins_encode( bswap_long_bytes(dst) );
5018
ins_pipe( ialu_reg_reg);
5021
instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5022
match(Set dst (ReverseBytesUS dst));
5025
format %{ "BSWAP $dst\n\t"
5026
"SHR $dst,16\n\t" %}
5028
__ bswapl($dst$$Register);
5029
__ shrl($dst$$Register, 16);
5031
ins_pipe( ialu_reg );
5034
instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5035
match(Set dst (ReverseBytesS dst));
5038
format %{ "BSWAP $dst\n\t"
5039
"SAR $dst,16\n\t" %}
5041
__ bswapl($dst$$Register);
5042
__ sarl($dst$$Register, 16);
5044
ins_pipe( ialu_reg );
5048
//---------- Zeros Count Instructions ------------------------------------------
5050
instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5051
predicate(UseCountLeadingZerosInstruction);
5052
match(Set dst (CountLeadingZerosI src));
5055
format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %}
5057
__ lzcntl($dst$$Register, $src$$Register);
5062
instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5063
predicate(!UseCountLeadingZerosInstruction);
5064
match(Set dst (CountLeadingZerosI src));
5067
format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t"
5074
Register Rdst = $dst$$Register;
5075
Register Rsrc = $src$$Register;
5077
__ bsrl(Rdst, Rsrc);
5078
__ jccb(Assembler::notZero, skip);
5082
__ addl(Rdst, BitsPerInt - 1);
5087
instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5088
predicate(UseCountLeadingZerosInstruction);
5089
match(Set dst (CountLeadingZerosL src));
5090
effect(TEMP dst, KILL cr);
5092
format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t"
5094
"LZCNT $dst, $src.lo\n\t"
5098
Register Rdst = $dst$$Register;
5099
Register Rsrc = $src$$Register;
5101
__ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5102
__ jccb(Assembler::carryClear, done);
5103
__ lzcntl(Rdst, Rsrc);
5104
__ addl(Rdst, BitsPerInt);
5110
instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5111
predicate(!UseCountLeadingZerosInstruction);
5112
match(Set dst (CountLeadingZerosL src));
5113
effect(TEMP dst, KILL cr);
5115
format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t"
5116
"JZ msw_is_zero\n\t"
5120
"BSR $dst, $src.lo\n\t"
5127
Register Rdst = $dst$$Register;
5128
Register Rsrc = $src$$Register;
5131
__ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5132
__ jccb(Assembler::zero, msw_is_zero);
5133
__ addl(Rdst, BitsPerInt);
5135
__ bind(msw_is_zero);
5136
__ bsrl(Rdst, Rsrc);
5137
__ jccb(Assembler::notZero, not_zero);
5141
__ addl(Rdst, BitsPerLong - 1);
5146
instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5147
predicate(UseCountTrailingZerosInstruction);
5148
match(Set dst (CountTrailingZerosI src));
5151
format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %}
5153
__ tzcntl($dst$$Register, $src$$Register);
5158
instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5159
predicate(!UseCountTrailingZerosInstruction);
5160
match(Set dst (CountTrailingZerosI src));
5163
format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t"
5168
Register Rdst = $dst$$Register;
5170
__ bsfl(Rdst, $src$$Register);
5171
__ jccb(Assembler::notZero, done);
5172
__ movl(Rdst, BitsPerInt);
5178
instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5179
predicate(UseCountTrailingZerosInstruction);
5180
match(Set dst (CountTrailingZerosL src));
5181
effect(TEMP dst, KILL cr);
5183
format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t"
5185
"TZCNT $dst, $src.hi\n\t"
5189
Register Rdst = $dst$$Register;
5190
Register Rsrc = $src$$Register;
5192
__ tzcntl(Rdst, Rsrc);
5193
__ jccb(Assembler::carryClear, done);
5194
__ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5195
__ addl(Rdst, BitsPerInt);
5201
instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5202
predicate(!UseCountTrailingZerosInstruction);
5203
match(Set dst (CountTrailingZerosL src));
5204
effect(TEMP dst, KILL cr);
5206
format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t"
5208
"BSF $dst, $src.hi\n\t"
5209
"JNZ msw_not_zero\n\t"
5215
Register Rdst = $dst$$Register;
5216
Register Rsrc = $src$$Register;
5219
__ bsfl(Rdst, Rsrc);
5220
__ jccb(Assembler::notZero, done);
5221
__ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5222
__ jccb(Assembler::notZero, msw_not_zero);
5223
__ movl(Rdst, BitsPerInt);
5224
__ bind(msw_not_zero);
5225
__ addl(Rdst, BitsPerInt);
5232
//---------- Population Count Instructions -------------------------------------
5234
instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5235
predicate(UsePopCountInstruction);
5236
match(Set dst (PopCountI src));
5239
format %{ "POPCNT $dst, $src" %}
5241
__ popcntl($dst$$Register, $src$$Register);
5246
instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5247
predicate(UsePopCountInstruction);
5248
match(Set dst (PopCountI (LoadI mem)));
5251
format %{ "POPCNT $dst, $mem" %}
5253
__ popcntl($dst$$Register, $mem$$Address);
5258
// Note: Long.bitCount(long) returns an int.
5259
instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5260
predicate(UsePopCountInstruction);
5261
match(Set dst (PopCountL src));
5262
effect(KILL cr, TEMP tmp, TEMP dst);
5264
format %{ "POPCNT $dst, $src.lo\n\t"
5265
"POPCNT $tmp, $src.hi\n\t"
5268
__ popcntl($dst$$Register, $src$$Register);
5269
__ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5270
__ addl($dst$$Register, $tmp$$Register);
5275
// Note: Long.bitCount(long) returns an int.
5276
instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5277
predicate(UsePopCountInstruction);
5278
match(Set dst (PopCountL (LoadL mem)));
5279
effect(KILL cr, TEMP tmp, TEMP dst);
5281
format %{ "POPCNT $dst, $mem\n\t"
5282
"POPCNT $tmp, $mem+4\n\t"
5285
//__ popcntl($dst$$Register, $mem$$Address$$first);
5286
//__ popcntl($tmp$$Register, $mem$$Address$$second);
5287
__ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5288
__ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5289
__ addl($dst$$Register, $tmp$$Register);
5295
//----------Load/Store/Move Instructions---------------------------------------
5296
//----------Load Instructions--------------------------------------------------
5297
// Load Byte (8bit signed)
5298
instruct loadB(xRegI dst, memory mem) %{
5299
match(Set dst (LoadB mem));
5302
format %{ "MOVSX8 $dst,$mem\t# byte" %}
5305
__ movsbl($dst$$Register, $mem$$Address);
5308
ins_pipe(ialu_reg_mem);
5311
// Load Byte (8bit signed) into Long Register
5312
instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5313
match(Set dst (ConvI2L (LoadB mem)));
5317
format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5318
"MOV $dst.hi,$dst.lo\n\t"
5322
__ movsbl($dst$$Register, $mem$$Address);
5323
__ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5324
__ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5327
ins_pipe(ialu_reg_mem);
5330
// Load Unsigned Byte (8bit UNsigned)
5331
instruct loadUB(xRegI dst, memory mem) %{
5332
match(Set dst (LoadUB mem));
5335
format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5338
__ movzbl($dst$$Register, $mem$$Address);
5341
ins_pipe(ialu_reg_mem);
5344
// Load Unsigned Byte (8 bit UNsigned) into Long Register
5345
instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5346
match(Set dst (ConvI2L (LoadUB mem)));
5350
format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5351
"XOR $dst.hi,$dst.hi" %}
5354
Register Rdst = $dst$$Register;
5355
__ movzbl(Rdst, $mem$$Address);
5356
__ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5359
ins_pipe(ialu_reg_mem);
5362
// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5363
instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5364
match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5367
format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5368
"XOR $dst.hi,$dst.hi\n\t"
5369
"AND $dst.lo,right_n_bits($mask, 8)" %}
5371
Register Rdst = $dst$$Register;
5372
__ movzbl(Rdst, $mem$$Address);
5373
__ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5374
__ andl(Rdst, $mask$$constant & right_n_bits(8));
5376
ins_pipe(ialu_reg_mem);
5379
// Load Short (16bit signed)
5380
instruct loadS(rRegI dst, memory mem) %{
5381
match(Set dst (LoadS mem));
5384
format %{ "MOVSX $dst,$mem\t# short" %}
5387
__ movswl($dst$$Register, $mem$$Address);
5390
ins_pipe(ialu_reg_mem);
5393
// Load Short (16 bit signed) to Byte (8 bit signed)
5394
instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5395
match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5398
format %{ "MOVSX $dst, $mem\t# short -> byte" %}
5400
__ movsbl($dst$$Register, $mem$$Address);
5402
ins_pipe(ialu_reg_mem);
5405
// Load Short (16bit signed) into Long Register
5406
instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5407
match(Set dst (ConvI2L (LoadS mem)));
5411
format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t"
5412
"MOV $dst.hi,$dst.lo\n\t"
5416
__ movswl($dst$$Register, $mem$$Address);
5417
__ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5418
__ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5421
ins_pipe(ialu_reg_mem);
5424
// Load Unsigned Short/Char (16bit unsigned)
5425
instruct loadUS(rRegI dst, memory mem) %{
5426
match(Set dst (LoadUS mem));
5429
format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %}
5432
__ movzwl($dst$$Register, $mem$$Address);
5435
ins_pipe(ialu_reg_mem);
5438
// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5439
instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5440
match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5443
format %{ "MOVSX $dst, $mem\t# ushort -> byte" %}
5445
__ movsbl($dst$$Register, $mem$$Address);
5447
ins_pipe(ialu_reg_mem);
5450
// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5451
instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5452
match(Set dst (ConvI2L (LoadUS mem)));
5456
format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t"
5457
"XOR $dst.hi,$dst.hi" %}
5460
__ movzwl($dst$$Register, $mem$$Address);
5461
__ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5464
ins_pipe(ialu_reg_mem);
5467
// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5468
instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5469
match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5472
format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5473
"XOR $dst.hi,$dst.hi" %}
5475
Register Rdst = $dst$$Register;
5476
__ movzbl(Rdst, $mem$$Address);
5477
__ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5479
ins_pipe(ialu_reg_mem);
5482
// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5483
instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5484
match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5487
format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5488
"XOR $dst.hi,$dst.hi\n\t"
5489
"AND $dst.lo,right_n_bits($mask, 16)" %}
5491
Register Rdst = $dst$$Register;
5492
__ movzwl(Rdst, $mem$$Address);
5493
__ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5494
__ andl(Rdst, $mask$$constant & right_n_bits(16));
5496
ins_pipe(ialu_reg_mem);
5500
instruct loadI(rRegI dst, memory mem) %{
5501
match(Set dst (LoadI mem));
5504
format %{ "MOV $dst,$mem\t# int" %}
5507
__ movl($dst$$Register, $mem$$Address);
5510
ins_pipe(ialu_reg_mem);
5513
// Load Integer (32 bit signed) to Byte (8 bit signed)
5514
instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5515
match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5518
format %{ "MOVSX $dst, $mem\t# int -> byte" %}
5520
__ movsbl($dst$$Register, $mem$$Address);
5522
ins_pipe(ialu_reg_mem);
5525
// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5526
instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5527
match(Set dst (AndI (LoadI mem) mask));
5530
format %{ "MOVZX $dst, $mem\t# int -> ubyte" %}
5532
__ movzbl($dst$$Register, $mem$$Address);
5534
ins_pipe(ialu_reg_mem);
5537
// Load Integer (32 bit signed) to Short (16 bit signed)
5538
instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5539
match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5542
format %{ "MOVSX $dst, $mem\t# int -> short" %}
5544
__ movswl($dst$$Register, $mem$$Address);
5546
ins_pipe(ialu_reg_mem);
5549
// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5550
instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5551
match(Set dst (AndI (LoadI mem) mask));
5554
format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %}
5556
__ movzwl($dst$$Register, $mem$$Address);
5558
ins_pipe(ialu_reg_mem);
5561
// Load Integer into Long Register
5562
instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5563
match(Set dst (ConvI2L (LoadI mem)));
5567
format %{ "MOV $dst.lo,$mem\t# int -> long\n\t"
5568
"MOV $dst.hi,$dst.lo\n\t"
5572
__ movl($dst$$Register, $mem$$Address);
5573
__ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5574
__ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5577
ins_pipe(ialu_reg_mem);
5580
// Load Integer with mask 0xFF into Long Register
5581
instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5582
match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5585
format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5586
"XOR $dst.hi,$dst.hi" %}
5588
Register Rdst = $dst$$Register;
5589
__ movzbl(Rdst, $mem$$Address);
5590
__ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5592
ins_pipe(ialu_reg_mem);
5595
// Load Integer with mask 0xFFFF into Long Register
5596
instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5597
match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5600
format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5601
"XOR $dst.hi,$dst.hi" %}
5603
Register Rdst = $dst$$Register;
5604
__ movzwl(Rdst, $mem$$Address);
5605
__ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5607
ins_pipe(ialu_reg_mem);
5610
// Load Integer with 31-bit mask into Long Register
5611
instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5612
match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5615
format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5616
"XOR $dst.hi,$dst.hi\n\t"
5617
"AND $dst.lo,$mask" %}
5619
Register Rdst = $dst$$Register;
5620
__ movl(Rdst, $mem$$Address);
5621
__ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5622
__ andl(Rdst, $mask$$constant);
5624
ins_pipe(ialu_reg_mem);
5627
// Load Unsigned Integer into Long Register
5628
instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5629
match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5633
format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t"
5634
"XOR $dst.hi,$dst.hi" %}
5637
__ movl($dst$$Register, $mem$$Address);
5638
__ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5641
ins_pipe(ialu_reg_mem);
5644
// Load Long. Cannot clobber address while loading, so restrict address
5646
instruct loadL(eRegL dst, load_long_memory mem) %{
5647
predicate(!((LoadLNode*)n)->require_atomic_access());
5648
match(Set dst (LoadL mem));
5651
format %{ "MOV $dst.lo,$mem\t# long\n\t"
5652
"MOV $dst.hi,$mem+4" %}
5655
Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5656
Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5657
__ movl($dst$$Register, Amemlo);
5658
__ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5661
ins_pipe(ialu_reg_long_mem);
5664
// Volatile Load Long. Must be atomic, so do 64-bit FILD
5665
// then store it down to the stack and reload on the int
5667
instruct loadL_volatile(stackSlotL dst, memory mem) %{
5668
predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5669
match(Set dst (LoadL mem));
5672
format %{ "FILD $mem\t# Atomic volatile long load\n\t"
5674
ins_encode(enc_loadL_volatile(mem,dst));
5675
ins_pipe( fpu_reg_mem );
5678
instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5679
predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5680
match(Set dst (LoadL mem));
5683
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5684
"MOVSD $dst,$tmp" %}
5686
__ movdbl($tmp$$XMMRegister, $mem$$Address);
5687
__ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5689
ins_pipe( pipe_slow );
5692
instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5693
predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5694
match(Set dst (LoadL mem));
5697
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
5698
"MOVD $dst.lo,$tmp\n\t"
5700
"MOVD $dst.hi,$tmp" %}
5702
__ movdbl($tmp$$XMMRegister, $mem$$Address);
5703
__ movdl($dst$$Register, $tmp$$XMMRegister);
5704
__ psrlq($tmp$$XMMRegister, 32);
5705
__ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5707
ins_pipe( pipe_slow );
5711
instruct loadRange(rRegI dst, memory mem) %{
5712
match(Set dst (LoadRange mem));
5715
format %{ "MOV $dst,$mem" %}
5717
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5718
ins_pipe( ialu_reg_mem );
5723
instruct loadP(eRegP dst, memory mem) %{
5724
match(Set dst (LoadP mem));
5727
format %{ "MOV $dst,$mem" %}
5729
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5730
ins_pipe( ialu_reg_mem );
5733
// Load Klass Pointer
5734
instruct loadKlass(eRegP dst, memory mem) %{
5735
match(Set dst (LoadKlass mem));
5738
format %{ "MOV $dst,$mem" %}
5740
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5741
ins_pipe( ialu_reg_mem );
5745
instruct loadDPR(regDPR dst, memory mem) %{
5746
predicate(UseSSE<=1);
5747
match(Set dst (LoadD mem));
5750
format %{ "FLD_D ST,$mem\n\t"
5752
opcode(0xDD); /* DD /0 */
5753
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
5754
Pop_Reg_DPR(dst), ClearInstMark );
5755
ins_pipe( fpu_reg_mem );
5758
// Load Double to XMM
5759
instruct loadD(regD dst, memory mem) %{
5760
predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5761
match(Set dst (LoadD mem));
5763
format %{ "MOVSD $dst,$mem" %}
5765
__ movdbl ($dst$$XMMRegister, $mem$$Address);
5767
ins_pipe( pipe_slow );
5770
instruct loadD_partial(regD dst, memory mem) %{
5771
predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5772
match(Set dst (LoadD mem));
5774
format %{ "MOVLPD $dst,$mem" %}
5776
__ movdbl ($dst$$XMMRegister, $mem$$Address);
5778
ins_pipe( pipe_slow );
5781
// Load to XMM register (single-precision floating point)
5783
instruct loadF(regF dst, memory mem) %{
5784
predicate(UseSSE>=1);
5785
match(Set dst (LoadF mem));
5787
format %{ "MOVSS $dst,$mem" %}
5789
__ movflt ($dst$$XMMRegister, $mem$$Address);
5791
ins_pipe( pipe_slow );
5795
instruct loadFPR(regFPR dst, memory mem) %{
5796
predicate(UseSSE==0);
5797
match(Set dst (LoadF mem));
5800
format %{ "FLD_S ST,$mem\n\t"
5802
opcode(0xD9); /* D9 /0 */
5803
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
5804
Pop_Reg_FPR(dst), ClearInstMark );
5805
ins_pipe( fpu_reg_mem );
5808
// Load Effective Address
5809
instruct leaP8(eRegP dst, indOffset8 mem) %{
5813
format %{ "LEA $dst,$mem" %}
5815
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5816
ins_pipe( ialu_reg_reg_fat );
5819
instruct leaP32(eRegP dst, indOffset32 mem) %{
5823
format %{ "LEA $dst,$mem" %}
5825
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5826
ins_pipe( ialu_reg_reg_fat );
5829
instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5833
format %{ "LEA $dst,$mem" %}
5835
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5836
ins_pipe( ialu_reg_reg_fat );
5839
instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5843
format %{ "LEA $dst,$mem" %}
5845
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5846
ins_pipe( ialu_reg_reg_fat );
5849
instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5853
format %{ "LEA $dst,$mem" %}
5855
ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5856
ins_pipe( ialu_reg_reg_fat );
5860
instruct loadConI(rRegI dst, immI src) %{
5863
format %{ "MOV $dst,$src" %}
5864
ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
5865
ins_pipe( ialu_reg_fat );
5868
// Load Constant zero
5869
instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
5874
format %{ "XOR $dst,$dst" %}
5875
opcode(0x33); /* + rd */
5876
ins_encode( OpcP, RegReg( dst, dst ) );
5877
ins_pipe( ialu_reg );
5880
instruct loadConP(eRegP dst, immP src) %{
5883
format %{ "MOV $dst,$src" %}
5884
opcode(0xB8); /* + rd */
5885
ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
5886
ins_pipe( ialu_reg_fat );
5889
instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5893
format %{ "MOV $dst.lo,$src.lo\n\t"
5894
"MOV $dst.hi,$src.hi" %}
5896
ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5897
ins_pipe( ialu_reg_long_fat );
5900
instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5904
format %{ "XOR $dst.lo,$dst.lo\n\t"
5905
"XOR $dst.hi,$dst.hi" %}
5907
ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5908
ins_pipe( ialu_reg_long );
5911
// The instruction usage is guarded by predicate in operand immFPR().
5912
instruct loadConFPR(regFPR dst, immFPR con) %{
5915
format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5918
__ fld_s($constantaddress($con));
5919
__ fstp_d($dst$$reg);
5921
ins_pipe(fpu_reg_con);
5924
// The instruction usage is guarded by predicate in operand immFPR0().
5925
instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5928
format %{ "FLDZ ST\n\t"
5932
__ fstp_d($dst$$reg);
5934
ins_pipe(fpu_reg_con);
5937
// The instruction usage is guarded by predicate in operand immFPR1().
5938
instruct loadConFPR1(regFPR dst, immFPR1 con) %{
5941
format %{ "FLD1 ST\n\t"
5945
__ fstp_d($dst$$reg);
5947
ins_pipe(fpu_reg_con);
5950
// The instruction usage is guarded by predicate in operand immF().
5951
instruct loadConF(regF dst, immF con) %{
5954
format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
5956
__ movflt($dst$$XMMRegister, $constantaddress($con));
5958
ins_pipe(pipe_slow);
5961
// The instruction usage is guarded by predicate in operand immF0().
5962
instruct loadConF0(regF dst, immF0 src) %{
5965
format %{ "XORPS $dst,$dst\t# float 0.0" %}
5967
__ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5969
ins_pipe(pipe_slow);
5972
// The instruction usage is guarded by predicate in operand immDPR().
5973
instruct loadConDPR(regDPR dst, immDPR con) %{
5977
format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
5980
__ fld_d($constantaddress($con));
5981
__ fstp_d($dst$$reg);
5983
ins_pipe(fpu_reg_con);
5986
// The instruction usage is guarded by predicate in operand immDPR0().
5987
instruct loadConDPR0(regDPR dst, immDPR0 con) %{
5991
format %{ "FLDZ ST\n\t"
5995
__ fstp_d($dst$$reg);
5997
ins_pipe(fpu_reg_con);
6000
// The instruction usage is guarded by predicate in operand immDPR1().
6001
instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6005
format %{ "FLD1 ST\n\t"
6009
__ fstp_d($dst$$reg);
6011
ins_pipe(fpu_reg_con);
6014
// The instruction usage is guarded by predicate in operand immD().
6015
instruct loadConD(regD dst, immD con) %{
6018
format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6020
__ movdbl($dst$$XMMRegister, $constantaddress($con));
6022
ins_pipe(pipe_slow);
6025
// The instruction usage is guarded by predicate in operand immD0().
6026
instruct loadConD0(regD dst, immD0 src) %{
6029
format %{ "XORPD $dst,$dst\t# double 0.0" %}
6031
__ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6033
ins_pipe( pipe_slow );
6037
instruct loadSSI(rRegI dst, stackSlotI src) %{
6041
format %{ "MOV $dst,$src" %}
6043
ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
6044
ins_pipe( ialu_reg_mem );
6047
instruct loadSSL(eRegL dst, stackSlotL src) %{
6051
format %{ "MOV $dst,$src.lo\n\t"
6052
"MOV $dst+4,$src.hi" %}
6054
ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
6055
ins_pipe( ialu_mem_long_reg );
6059
instruct loadSSP(eRegP dst, stackSlotP src) %{
6063
format %{ "MOV $dst,$src" %}
6065
ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
6066
ins_pipe( ialu_reg_mem );
6070
instruct loadSSF(regFPR dst, stackSlotF src) %{
6074
format %{ "FLD_S $src\n\t"
6076
opcode(0xD9); /* D9 /0, FLD m32real */
6077
ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
6078
Pop_Reg_FPR(dst), ClearInstMark );
6079
ins_pipe( fpu_reg_mem );
6083
instruct loadSSD(regDPR dst, stackSlotD src) %{
6087
format %{ "FLD_D $src\n\t"
6089
opcode(0xDD); /* DD /0, FLD m64real */
6090
ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
6091
Pop_Reg_DPR(dst), ClearInstMark );
6092
ins_pipe( fpu_reg_mem );
6095
// Prefetch instructions for allocation.
6096
// Must be safe to execute with invalid address (cannot fault).
6098
instruct prefetchAlloc0( memory mem ) %{
6099
predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6100
match(PrefetchAllocation mem);
6103
format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6108
instruct prefetchAlloc( memory mem ) %{
6109
predicate(AllocatePrefetchInstr==3);
6110
match( PrefetchAllocation mem );
6113
format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6115
__ prefetchw($mem$$Address);
6120
instruct prefetchAllocNTA( memory mem ) %{
6121
predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6122
match(PrefetchAllocation mem);
6125
format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6127
__ prefetchnta($mem$$Address);
6132
instruct prefetchAllocT0( memory mem ) %{
6133
predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6134
match(PrefetchAllocation mem);
6137
format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6139
__ prefetcht0($mem$$Address);
6144
instruct prefetchAllocT2( memory mem ) %{
6145
predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6146
match(PrefetchAllocation mem);
6149
format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6151
__ prefetcht2($mem$$Address);
6156
//----------Store Instructions-------------------------------------------------
6159
instruct storeB(memory mem, xRegI src) %{
6160
match(Set mem (StoreB mem src));
6163
format %{ "MOV8 $mem,$src" %}
6165
ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6166
ins_pipe( ialu_mem_reg );
6170
instruct storeC(memory mem, rRegI src) %{
6171
match(Set mem (StoreC mem src));
6174
format %{ "MOV16 $mem,$src" %}
6176
ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
6177
ins_pipe( ialu_mem_reg );
6181
instruct storeI(memory mem, rRegI src) %{
6182
match(Set mem (StoreI mem src));
6185
format %{ "MOV $mem,$src" %}
6187
ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6188
ins_pipe( ialu_mem_reg );
6192
instruct storeL(long_memory mem, eRegL src) %{
6193
predicate(!((StoreLNode*)n)->require_atomic_access());
6194
match(Set mem (StoreL mem src));
6197
format %{ "MOV $mem,$src.lo\n\t"
6198
"MOV $mem+4,$src.hi" %}
6200
ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
6201
ins_pipe( ialu_mem_long_reg );
6204
// Store Long to Integer
6205
instruct storeL2I(memory mem, eRegL src) %{
6206
match(Set mem (StoreI mem (ConvL2I src)));
6208
format %{ "MOV $mem,$src.lo\t# long -> int" %}
6210
__ movl($mem$$Address, $src$$Register);
6212
ins_pipe(ialu_mem_reg);
6215
// Volatile Store Long. Must be atomic, so move it into
6216
// the FP TOS and then do a 64-bit FIST. Has to probe the
6217
// target address before the store (for null-ptr checks)
6218
// so the memory operand is used twice in the encoding.
6219
instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6220
predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6221
match(Set mem (StoreL mem src));
6224
format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6226
"FISTp $mem\t # 64-bit atomic volatile long store" %}
6228
ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
6229
ins_pipe( fpu_reg_mem );
6232
instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6233
predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6234
match(Set mem (StoreL mem src));
6235
effect( TEMP tmp, KILL cr );
6237
format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6238
"MOVSD $tmp,$src\n\t"
6239
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6241
__ cmpl(rax, $mem$$Address);
6242
__ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6243
__ movdbl($mem$$Address, $tmp$$XMMRegister);
6245
ins_pipe( pipe_slow );
6248
instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6249
predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6250
match(Set mem (StoreL mem src));
6251
effect( TEMP tmp2 , TEMP tmp, KILL cr );
6253
format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
6254
"MOVD $tmp,$src.lo\n\t"
6255
"MOVD $tmp2,$src.hi\n\t"
6256
"PUNPCKLDQ $tmp,$tmp2\n\t"
6257
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
6259
__ cmpl(rax, $mem$$Address);
6260
__ movdl($tmp$$XMMRegister, $src$$Register);
6261
__ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6262
__ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6263
__ movdbl($mem$$Address, $tmp$$XMMRegister);
6265
ins_pipe( pipe_slow );
6268
// Store Pointer; for storing unknown oops and raw pointers
6269
instruct storeP(memory mem, anyRegP src) %{
6270
match(Set mem (StoreP mem src));
6273
format %{ "MOV $mem,$src" %}
6275
ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6276
ins_pipe( ialu_mem_reg );
6279
// Store Integer Immediate
6280
instruct storeImmI(memory mem, immI src) %{
6281
match(Set mem (StoreI mem src));
6284
format %{ "MOV $mem,$src" %}
6285
opcode(0xC7); /* C7 /0 */
6286
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
6287
ins_pipe( ialu_mem_imm );
6290
// Store Short/Char Immediate
6291
instruct storeImmI16(memory mem, immI16 src) %{
6292
predicate(UseStoreImmI16);
6293
match(Set mem (StoreC mem src));
6296
format %{ "MOV16 $mem,$src" %}
6297
opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */
6298
ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
6299
ins_pipe( ialu_mem_imm );
6302
// Store Pointer Immediate; null pointers or constant oops that do not
6303
// need card-mark barriers.
6304
instruct storeImmP(memory mem, immP src) %{
6305
match(Set mem (StoreP mem src));
6308
format %{ "MOV $mem,$src" %}
6309
opcode(0xC7); /* C7 /0 */
6310
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
6311
ins_pipe( ialu_mem_imm );
6314
// Store Byte Immediate
6315
instruct storeImmB(memory mem, immI8 src) %{
6316
match(Set mem (StoreB mem src));
6319
format %{ "MOV8 $mem,$src" %}
6320
opcode(0xC6); /* C6 /0 */
6321
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
6322
ins_pipe( ialu_mem_imm );
6325
// Store CMS card-mark Immediate
6326
instruct storeImmCM(memory mem, immI8 src) %{
6327
match(Set mem (StoreCM mem src));
6330
format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
6331
opcode(0xC6); /* C6 /0 */
6332
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
6333
ins_pipe( ialu_mem_imm );
6337
instruct storeDPR( memory mem, regDPR1 src) %{
6338
predicate(UseSSE<=1);
6339
match(Set mem (StoreD mem src));
6342
format %{ "FST_D $mem,$src" %}
6343
opcode(0xDD); /* DD /2 */
6344
ins_encode( enc_FPR_store(mem,src) );
6345
ins_pipe( fpu_mem_reg );
6348
// Store double does rounding on x86
6349
instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6350
predicate(UseSSE<=1);
6351
match(Set mem (StoreD mem (RoundDouble src)));
6354
format %{ "FST_D $mem,$src\t# round" %}
6355
opcode(0xDD); /* DD /2 */
6356
ins_encode( enc_FPR_store(mem,src) );
6357
ins_pipe( fpu_mem_reg );
6360
// Store XMM register to memory (double-precision floating points)
6362
instruct storeD(memory mem, regD src) %{
6363
predicate(UseSSE>=2);
6364
match(Set mem (StoreD mem src));
6366
format %{ "MOVSD $mem,$src" %}
6368
__ movdbl($mem$$Address, $src$$XMMRegister);
6370
ins_pipe( pipe_slow );
6373
// Store XMM register to memory (single-precision floating point)
6375
instruct storeF(memory mem, regF src) %{
6376
predicate(UseSSE>=1);
6377
match(Set mem (StoreF mem src));
6379
format %{ "MOVSS $mem,$src" %}
6381
__ movflt($mem$$Address, $src$$XMMRegister);
6383
ins_pipe( pipe_slow );
6388
instruct storeFPR( memory mem, regFPR1 src) %{
6389
predicate(UseSSE==0);
6390
match(Set mem (StoreF mem src));
6393
format %{ "FST_S $mem,$src" %}
6394
opcode(0xD9); /* D9 /2 */
6395
ins_encode( enc_FPR_store(mem,src) );
6396
ins_pipe( fpu_mem_reg );
6399
// Store Float does rounding on x86
6400
instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6401
predicate(UseSSE==0);
6402
match(Set mem (StoreF mem (RoundFloat src)));
6405
format %{ "FST_S $mem,$src\t# round" %}
6406
opcode(0xD9); /* D9 /2 */
6407
ins_encode( enc_FPR_store(mem,src) );
6408
ins_pipe( fpu_mem_reg );
6411
// Store Float does rounding on x86
6412
instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6413
predicate(UseSSE<=1);
6414
match(Set mem (StoreF mem (ConvD2F src)));
6417
format %{ "FST_S $mem,$src\t# D-round" %}
6418
opcode(0xD9); /* D9 /2 */
6419
ins_encode( enc_FPR_store(mem,src) );
6420
ins_pipe( fpu_mem_reg );
6423
// Store immediate Float value (it is faster than store from FPU register)
6424
// The instruction usage is guarded by predicate in operand immFPR().
6425
instruct storeFPR_imm( memory mem, immFPR src) %{
6426
match(Set mem (StoreF mem src));
6429
format %{ "MOV $mem,$src\t# store float" %}
6430
opcode(0xC7); /* C7 /0 */
6431
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark);
6432
ins_pipe( ialu_mem_imm );
6435
// Store immediate Float value (it is faster than store from XMM register)
6436
// The instruction usage is guarded by predicate in operand immF().
6437
instruct storeF_imm( memory mem, immF src) %{
6438
match(Set mem (StoreF mem src));
6441
format %{ "MOV $mem,$src\t# store float" %}
6442
opcode(0xC7); /* C7 /0 */
6443
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark);
6444
ins_pipe( ialu_mem_imm );
6447
// Store Integer to stack slot
6448
instruct storeSSI(stackSlotI dst, rRegI src) %{
6452
format %{ "MOV $dst,$src" %}
6454
ins_encode( OpcPRegSS( dst, src ) );
6455
ins_pipe( ialu_mem_reg );
6458
// Store Integer to stack slot
6459
instruct storeSSP(stackSlotP dst, eRegP src) %{
6463
format %{ "MOV $dst,$src" %}
6465
ins_encode( OpcPRegSS( dst, src ) );
6466
ins_pipe( ialu_mem_reg );
6469
// Store Long to stack slot
6470
instruct storeSSL(stackSlotL dst, eRegL src) %{
6474
format %{ "MOV $dst,$src.lo\n\t"
6475
"MOV $dst+4,$src.hi" %}
6477
ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
6478
ins_pipe( ialu_mem_long_reg );
6481
//----------MemBar Instructions-----------------------------------------------
6482
// Memory barrier flavors
6484
instruct membar_acquire() %{
6485
match(MemBarAcquire);
6490
format %{ "MEMBAR-acquire ! (empty encoding)" %}
6495
instruct membar_acquire_lock() %{
6496
match(MemBarAcquireLock);
6500
format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6505
instruct membar_release() %{
6506
match(MemBarRelease);
6511
format %{ "MEMBAR-release ! (empty encoding)" %}
6516
instruct membar_release_lock() %{
6517
match(MemBarReleaseLock);
6521
format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6526
instruct membar_volatile(eFlagsReg cr) %{
6527
match(MemBarVolatile);
6533
$$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6536
__ membar(Assembler::StoreLoad);
6538
ins_pipe(pipe_slow);
6541
instruct unnecessary_membar_volatile() %{
6542
match(MemBarVolatile);
6543
predicate(Matcher::post_store_load_barrier(n));
6547
format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6552
instruct membar_storestore() %{
6553
match(MemBarStoreStore);
6554
match(StoreStoreFence);
6558
format %{ "MEMBAR-storestore (empty encoding)" %}
6563
//----------Move Instructions--------------------------------------------------
6564
instruct castX2P(eAXRegP dst, eAXRegI src) %{
6565
match(Set dst (CastX2P src));
6566
format %{ "# X2P $dst, $src" %}
6567
ins_encode( /*empty encoding*/ );
6572
instruct castP2X(rRegI dst, eRegP src ) %{
6573
match(Set dst (CastP2X src));
6575
format %{ "MOV $dst, $src\t# CastP2X" %}
6576
ins_encode( enc_Copy( dst, src) );
6577
ins_pipe( ialu_reg_reg );
6580
//----------Conditional Move---------------------------------------------------
6582
instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6583
predicate(!VM_Version::supports_cmov() );
6584
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6586
format %{ "J$cop,us skip\t# signed cmove\n\t"
6591
// Invert sense of branch from sense of CMOV
6592
__ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6593
__ movl($dst$$Register, $src$$Register);
6596
ins_pipe( pipe_cmov_reg );
6599
instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6600
predicate(!VM_Version::supports_cmov() );
6601
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6603
format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6608
// Invert sense of branch from sense of CMOV
6609
__ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6610
__ movl($dst$$Register, $src$$Register);
6613
ins_pipe( pipe_cmov_reg );
6616
instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6617
predicate(VM_Version::supports_cmov() );
6618
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6620
format %{ "CMOV$cop $dst,$src" %}
6622
ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6623
ins_pipe( pipe_cmov_reg );
6626
instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6627
predicate(VM_Version::supports_cmov() );
6628
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6630
format %{ "CMOV$cop $dst,$src" %}
6632
ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6633
ins_pipe( pipe_cmov_reg );
6636
instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6637
predicate(VM_Version::supports_cmov() );
6638
match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6641
cmovI_regU(cop, cr, dst, src);
6646
instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6647
predicate(VM_Version::supports_cmov() );
6648
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6650
format %{ "CMOV$cop $dst,$src" %}
6652
ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
6653
ins_pipe( pipe_cmov_mem );
6657
instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6658
predicate(VM_Version::supports_cmov() );
6659
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6661
format %{ "CMOV$cop $dst,$src" %}
6663
ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
6664
ins_pipe( pipe_cmov_mem );
6667
instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6668
predicate(VM_Version::supports_cmov() );
6669
match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6672
cmovI_memU(cop, cr, dst, src);
6677
instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6678
predicate(VM_Version::supports_cmov() );
6679
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6681
format %{ "CMOV$cop $dst,$src\t# ptr" %}
6683
ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6684
ins_pipe( pipe_cmov_reg );
6687
// Conditional move (non-P6 version)
6688
// Note: a CMoveP is generated for stubs and native wrappers
6689
// regardless of whether we are on a P6, so we
6690
// emulate a cmov here
6691
instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6692
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6694
format %{ "Jn$cop skip\n\t"
6695
"MOV $dst,$src\t# pointer\n"
6698
ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6699
ins_pipe( pipe_cmov_reg );
6703
instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6704
predicate(VM_Version::supports_cmov() );
6705
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6707
format %{ "CMOV$cop $dst,$src\t# ptr" %}
6709
ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6710
ins_pipe( pipe_cmov_reg );
6713
instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6714
predicate(VM_Version::supports_cmov() );
6715
match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6718
cmovP_regU(cop, cr, dst, src);
6722
// DISABLED: Requires the ADLC to emit a bottom_type call that
6723
// correctly meets the two pointer arguments; one is an incoming
6724
// register but the other is a memory operand. ALSO appears to
6725
// be buggy with implicit null checks.
6727
//// Conditional move
6728
//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6729
// predicate(VM_Version::supports_cmov() );
6730
// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6732
// format %{ "CMOV$cop $dst,$src\t# ptr" %}
6733
// opcode(0x0F,0x40);
6734
// ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6735
// ins_pipe( pipe_cmov_mem );
6738
//// Conditional move
6739
//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6740
// predicate(VM_Version::supports_cmov() );
6741
// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6743
// format %{ "CMOV$cop $dst,$src\t# ptr" %}
6744
// opcode(0x0F,0x40);
6745
// ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6746
// ins_pipe( pipe_cmov_mem );
6750
instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6751
predicate(UseSSE<=1);
6752
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6754
format %{ "FCMOV$cop $dst,$src\t# double" %}
6756
ins_encode( enc_cmov_dpr(cop,src) );
6757
ins_pipe( pipe_cmovDPR_reg );
6761
instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6762
predicate(UseSSE==0);
6763
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6765
format %{ "FCMOV$cop $dst,$src\t# float" %}
6767
ins_encode( enc_cmov_dpr(cop,src) );
6768
ins_pipe( pipe_cmovDPR_reg );
6771
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6772
instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6773
predicate(UseSSE<=1);
6774
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6776
format %{ "Jn$cop skip\n\t"
6777
"MOV $dst,$src\t# double\n"
6779
opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6780
ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6781
ins_pipe( pipe_cmovDPR_reg );
6784
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6785
instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6786
predicate(UseSSE==0);
6787
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6789
format %{ "Jn$cop skip\n\t"
6790
"MOV $dst,$src\t# float\n"
6792
opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
6793
ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6794
ins_pipe( pipe_cmovDPR_reg );
6797
// No CMOVE with SSE/SSE2
6798
instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6799
predicate (UseSSE>=1);
6800
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6802
format %{ "Jn$cop skip\n\t"
6803
"MOVSS $dst,$src\t# float\n"
6807
// Invert sense of branch from sense of CMOV
6808
__ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6809
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
6812
ins_pipe( pipe_slow );
6815
// No CMOVE with SSE/SSE2
6816
instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6817
predicate (UseSSE>=2);
6818
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6820
format %{ "Jn$cop skip\n\t"
6821
"MOVSD $dst,$src\t# float\n"
6825
// Invert sense of branch from sense of CMOV
6826
__ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6827
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6830
ins_pipe( pipe_slow );
6834
instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6835
predicate (UseSSE>=1);
6836
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6838
format %{ "Jn$cop skip\n\t"
6839
"MOVSS $dst,$src\t# float\n"
6843
// Invert sense of branch from sense of CMOV
6844
__ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6845
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
6848
ins_pipe( pipe_slow );
6851
instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6852
predicate (UseSSE>=1);
6853
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6856
fcmovF_regU(cop, cr, dst, src);
6861
instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6862
predicate (UseSSE>=2);
6863
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6865
format %{ "Jn$cop skip\n\t"
6866
"MOVSD $dst,$src\t# float\n"
6870
// Invert sense of branch from sense of CMOV
6871
__ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6872
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6875
ins_pipe( pipe_slow );
6878
instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6879
predicate (UseSSE>=2);
6880
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6883
fcmovD_regU(cop, cr, dst, src);
6887
instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6888
predicate(VM_Version::supports_cmov() );
6889
match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6891
format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6892
"CMOV$cop $dst.hi,$src.hi" %}
6894
ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6895
ins_pipe( pipe_cmov_reg_long );
6898
instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6899
predicate(VM_Version::supports_cmov() );
6900
match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6902
format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6903
"CMOV$cop $dst.hi,$src.hi" %}
6905
ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6906
ins_pipe( pipe_cmov_reg_long );
6909
instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6910
predicate(VM_Version::supports_cmov() );
6911
match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6914
cmovL_regU(cop, cr, dst, src);
6918
//----------Arithmetic Instructions--------------------------------------------
6919
//----------Addition Instructions----------------------------------------------
6921
// Integer Addition Instructions
6922
instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
6923
match(Set dst (AddI dst src));
6927
format %{ "ADD $dst,$src" %}
6929
ins_encode( OpcP, RegReg( dst, src) );
6930
ins_pipe( ialu_reg_reg );
6933
instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
6934
match(Set dst (AddI dst src));
6937
format %{ "ADD $dst,$src" %}
6938
opcode(0x81, 0x00); /* /0 id */
6939
ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
6940
ins_pipe( ialu_reg );
6943
instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
6944
predicate(UseIncDec);
6945
match(Set dst (AddI dst src));
6949
format %{ "INC $dst" %}
6951
ins_encode( Opc_plus( primary, dst ) );
6952
ins_pipe( ialu_reg );
6955
instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
6956
match(Set dst (AddI src0 src1));
6959
format %{ "LEA $dst,[$src0 + $src1]" %}
6960
opcode(0x8D); /* 0x8D /r */
6961
ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
6962
ins_pipe( ialu_reg_reg );
6965
instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
6966
match(Set dst (AddP src0 src1));
6969
format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %}
6970
opcode(0x8D); /* 0x8D /r */
6971
ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
6972
ins_pipe( ialu_reg_reg );
6975
instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
6976
predicate(UseIncDec);
6977
match(Set dst (AddI dst src));
6981
format %{ "DEC $dst" %}
6983
ins_encode( Opc_plus( primary, dst ) );
6984
ins_pipe( ialu_reg );
6987
instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
6988
match(Set dst (AddP dst src));
6992
format %{ "ADD $dst,$src" %}
6994
ins_encode( OpcP, RegReg( dst, src) );
6995
ins_pipe( ialu_reg_reg );
6998
instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
6999
match(Set dst (AddP dst src));
7002
format %{ "ADD $dst,$src" %}
7003
opcode(0x81,0x00); /* Opcode 81 /0 id */
7004
// ins_encode( RegImm( dst, src) );
7005
ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7006
ins_pipe( ialu_reg );
7009
instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7010
match(Set dst (AddI dst (LoadI src)));
7014
format %{ "ADD $dst,$src" %}
7016
ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
7017
ins_pipe( ialu_reg_mem );
7020
instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7021
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7025
format %{ "ADD $dst,$src" %}
7026
opcode(0x01); /* Opcode 01 /r */
7027
ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
7028
ins_pipe( ialu_mem_reg );
7031
// Add Memory with Immediate
7032
instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7033
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7037
format %{ "ADD $dst,$src" %}
7038
opcode(0x81); /* Opcode 81 /0 id */
7039
ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
7040
ins_pipe( ialu_mem_imm );
7043
instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
7044
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7048
format %{ "INC $dst" %}
7049
opcode(0xFF); /* Opcode FF /0 */
7050
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
7051
ins_pipe( ialu_mem_imm );
7054
instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7055
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7059
format %{ "DEC $dst" %}
7060
opcode(0xFF); /* Opcode FF /1 */
7061
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
7062
ins_pipe( ialu_mem_imm );
7066
instruct checkCastPP( eRegP dst ) %{
7067
match(Set dst (CheckCastPP dst));
7070
format %{ "#checkcastPP of $dst" %}
7071
ins_encode( /*empty encoding*/ );
7075
instruct castPP( eRegP dst ) %{
7076
match(Set dst (CastPP dst));
7077
format %{ "#castPP of $dst" %}
7078
ins_encode( /*empty encoding*/ );
7082
instruct castII( rRegI dst ) %{
7083
match(Set dst (CastII dst));
7084
format %{ "#castII of $dst" %}
7085
ins_encode( /*empty encoding*/ );
7090
instruct castLL( eRegL dst ) %{
7091
match(Set dst (CastLL dst));
7092
format %{ "#castLL of $dst" %}
7093
ins_encode( /*empty encoding*/ );
7098
instruct castFF( regF dst ) %{
7099
predicate(UseSSE >= 1);
7100
match(Set dst (CastFF dst));
7101
format %{ "#castFF of $dst" %}
7102
ins_encode( /*empty encoding*/ );
7107
instruct castDD( regD dst ) %{
7108
predicate(UseSSE >= 2);
7109
match(Set dst (CastDD dst));
7110
format %{ "#castDD of $dst" %}
7111
ins_encode( /*empty encoding*/ );
7116
instruct castFF_PR( regFPR dst ) %{
7117
predicate(UseSSE < 1);
7118
match(Set dst (CastFF dst));
7119
format %{ "#castFF of $dst" %}
7120
ins_encode( /*empty encoding*/ );
7125
instruct castDD_PR( regDPR dst ) %{
7126
predicate(UseSSE < 2);
7127
match(Set dst (CastDD dst));
7128
format %{ "#castDD of $dst" %}
7129
ins_encode( /*empty encoding*/ );
7134
// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7136
instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7137
match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7138
match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7139
effect(KILL cr, KILL oldval);
7140
format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7145
ins_encode( enc_cmpxchg8(mem_ptr),
7146
enc_flags_ne_to_boolean(res) );
7147
ins_pipe( pipe_cmpxchg );
7150
instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7151
match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7152
match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7153
effect(KILL cr, KILL oldval);
7154
format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7159
ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7160
ins_pipe( pipe_cmpxchg );
7163
instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7164
match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7165
match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7166
effect(KILL cr, KILL oldval);
7167
format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7172
ins_encode( enc_cmpxchgb(mem_ptr),
7173
enc_flags_ne_to_boolean(res) );
7174
ins_pipe( pipe_cmpxchg );
7177
instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7178
match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7179
match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7180
effect(KILL cr, KILL oldval);
7181
format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7186
ins_encode( enc_cmpxchgw(mem_ptr),
7187
enc_flags_ne_to_boolean(res) );
7188
ins_pipe( pipe_cmpxchg );
7191
instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7192
match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7193
match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7194
effect(KILL cr, KILL oldval);
7195
format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7200
ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7201
ins_pipe( pipe_cmpxchg );
7204
instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7205
match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7207
format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7208
ins_encode( enc_cmpxchg8(mem_ptr) );
7209
ins_pipe( pipe_cmpxchg );
7212
instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7213
match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7215
format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7216
ins_encode( enc_cmpxchg(mem_ptr) );
7217
ins_pipe( pipe_cmpxchg );
7220
instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7221
match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7223
format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7224
ins_encode( enc_cmpxchgb(mem_ptr) );
7225
ins_pipe( pipe_cmpxchg );
7228
instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7229
match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7231
format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7232
ins_encode( enc_cmpxchgw(mem_ptr) );
7233
ins_pipe( pipe_cmpxchg );
7236
instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7237
match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7239
format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7240
ins_encode( enc_cmpxchg(mem_ptr) );
7241
ins_pipe( pipe_cmpxchg );
7244
instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7245
predicate(n->as_LoadStore()->result_not_used());
7246
match(Set dummy (GetAndAddB mem add));
7248
format %{ "ADDB [$mem],$add" %}
7251
__ addb($mem$$Address, $add$$constant);
7253
ins_pipe( pipe_cmpxchg );
7256
// Important to match to xRegI: only 8-bit regs.
7257
instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7258
match(Set newval (GetAndAddB mem newval));
7260
format %{ "XADDB [$mem],$newval" %}
7263
__ xaddb($mem$$Address, $newval$$Register);
7265
ins_pipe( pipe_cmpxchg );
7268
instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7269
predicate(n->as_LoadStore()->result_not_used());
7270
match(Set dummy (GetAndAddS mem add));
7272
format %{ "ADDS [$mem],$add" %}
7275
__ addw($mem$$Address, $add$$constant);
7277
ins_pipe( pipe_cmpxchg );
7280
instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7281
match(Set newval (GetAndAddS mem newval));
7283
format %{ "XADDS [$mem],$newval" %}
7286
__ xaddw($mem$$Address, $newval$$Register);
7288
ins_pipe( pipe_cmpxchg );
7291
instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7292
predicate(n->as_LoadStore()->result_not_used());
7293
match(Set dummy (GetAndAddI mem add));
7295
format %{ "ADDL [$mem],$add" %}
7298
__ addl($mem$$Address, $add$$constant);
7300
ins_pipe( pipe_cmpxchg );
7303
instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7304
match(Set newval (GetAndAddI mem newval));
7306
format %{ "XADDL [$mem],$newval" %}
7309
__ xaddl($mem$$Address, $newval$$Register);
7311
ins_pipe( pipe_cmpxchg );
7314
// Important to match to xRegI: only 8-bit regs.
7315
instruct xchgB( memory mem, xRegI newval) %{
7316
match(Set newval (GetAndSetB mem newval));
7317
format %{ "XCHGB $newval,[$mem]" %}
7319
__ xchgb($newval$$Register, $mem$$Address);
7321
ins_pipe( pipe_cmpxchg );
7324
instruct xchgS( memory mem, rRegI newval) %{
7325
match(Set newval (GetAndSetS mem newval));
7326
format %{ "XCHGW $newval,[$mem]" %}
7328
__ xchgw($newval$$Register, $mem$$Address);
7330
ins_pipe( pipe_cmpxchg );
7333
instruct xchgI( memory mem, rRegI newval) %{
7334
match(Set newval (GetAndSetI mem newval));
7335
format %{ "XCHGL $newval,[$mem]" %}
7337
__ xchgl($newval$$Register, $mem$$Address);
7339
ins_pipe( pipe_cmpxchg );
7342
instruct xchgP( memory mem, pRegP newval) %{
7343
match(Set newval (GetAndSetP mem newval));
7344
format %{ "XCHGL $newval,[$mem]" %}
7346
__ xchgl($newval$$Register, $mem$$Address);
7348
ins_pipe( pipe_cmpxchg );
7351
//----------Subtraction Instructions-------------------------------------------
7353
// Integer Subtraction Instructions
7354
instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7355
match(Set dst (SubI dst src));
7359
format %{ "SUB $dst,$src" %}
7361
ins_encode( OpcP, RegReg( dst, src) );
7362
ins_pipe( ialu_reg_reg );
7365
instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7366
match(Set dst (SubI dst src));
7369
format %{ "SUB $dst,$src" %}
7370
opcode(0x81,0x05); /* Opcode 81 /5 */
7371
// ins_encode( RegImm( dst, src) );
7372
ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7373
ins_pipe( ialu_reg );
7376
instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7377
match(Set dst (SubI dst (LoadI src)));
7381
format %{ "SUB $dst,$src" %}
7383
ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
7384
ins_pipe( ialu_reg_mem );
7387
instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7388
match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7392
format %{ "SUB $dst,$src" %}
7393
opcode(0x29); /* Opcode 29 /r */
7394
ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
7395
ins_pipe( ialu_mem_reg );
7398
// Subtract from a pointer
7399
instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
7400
match(Set dst (AddP dst (SubI zero src)));
7404
format %{ "SUB $dst,$src" %}
7406
ins_encode( OpcP, RegReg( dst, src) );
7407
ins_pipe( ialu_reg_reg );
7410
instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
7411
match(Set dst (SubI zero dst));
7415
format %{ "NEG $dst" %}
7416
opcode(0xF7,0x03); // Opcode F7 /3
7417
ins_encode( OpcP, RegOpc( dst ) );
7418
ins_pipe( ialu_reg );
7421
//----------Multiplication/Division Instructions-------------------------------
7422
// Integer Multiplication Instructions
7424
instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7425
match(Set dst (MulI dst src));
7430
format %{ "IMUL $dst,$src" %}
7432
ins_encode( OpcS, OpcP, RegReg( dst, src) );
7433
ins_pipe( ialu_reg_reg_alu0 );
7436
// Multiply 32-bit Immediate
7437
instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7438
match(Set dst (MulI src imm));
7442
format %{ "IMUL $dst,$src,$imm" %}
7443
opcode(0x69); /* 69 /r id */
7444
ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7445
ins_pipe( ialu_reg_reg_alu0 );
7448
instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7452
// Note that this is artificially increased to make it more expensive than loadConL
7454
format %{ "MOV EAX,$src\t// low word only" %}
7456
ins_encode( LdImmL_Lo(dst, src) );
7457
ins_pipe( ialu_reg_fat );
7460
// Multiply by 32-bit Immediate, taking the shifted high order results
7461
// (special case for shift by 32)
7462
instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7463
match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7464
predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7465
_kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7466
_kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7467
effect(USE src1, KILL cr);
7469
// Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7470
ins_cost(0*100 + 1*400 - 150);
7471
format %{ "IMUL EDX:EAX,$src1" %}
7472
ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7473
ins_pipe( pipe_slow );
7476
// Multiply by 32-bit Immediate, taking the shifted high order results
7477
instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7478
match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7479
predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7480
_kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7481
_kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7482
effect(USE src1, KILL cr);
7484
// Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7485
ins_cost(1*100 + 1*400 - 150);
7486
format %{ "IMUL EDX:EAX,$src1\n\t"
7487
"SAR EDX,$cnt-32" %}
7488
ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7489
ins_pipe( pipe_slow );
7492
// Multiply Memory 32-bit Immediate
7493
instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7494
match(Set dst (MulI (LoadI src) imm));
7498
format %{ "IMUL $dst,$src,$imm" %}
7499
opcode(0x69); /* 69 /r id */
7500
ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
7501
ins_pipe( ialu_reg_mem_alu0 );
7505
instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7506
match(Set dst (MulI dst (LoadI src)));
7510
format %{ "IMUL $dst,$src" %}
7512
ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
7513
ins_pipe( ialu_reg_mem_alu0 );
7516
instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7518
match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7519
effect(KILL cr, KILL src2);
7521
expand %{ mulI_eReg(dst, src1, cr);
7522
mulI_eReg(src2, src3, cr);
7523
addI_eReg(dst, src2, cr); %}
7526
// Multiply Register Int to Long
7527
instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7528
// Basic Idea: long = (long)int * (long)int
7529
match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7530
effect(DEF dst, USE src, USE src1, KILL flags);
7533
format %{ "IMUL $dst,$src1" %}
7535
ins_encode( long_int_multiply( dst, src1 ) );
7536
ins_pipe( ialu_reg_reg_alu0 );
7539
instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7540
// Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
7541
match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7545
format %{ "MUL $dst,$src1" %}
7547
ins_encode( long_uint_multiply(dst, src1) );
7548
ins_pipe( ialu_reg_reg_alu0 );
7551
// Multiply Register Long
7552
instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7553
match(Set dst (MulL dst src));
7554
effect(KILL cr, TEMP tmp);
7555
ins_cost(4*100+3*400);
7556
// Basic idea: lo(result) = lo(x_lo * y_lo)
7557
// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7558
format %{ "MOV $tmp,$src.lo\n\t"
7560
"MOV EDX,$src.hi\n\t"
7563
"MUL EDX:EAX,$src.lo\n\t"
7565
ins_encode( long_multiply( dst, src, tmp ) );
7566
ins_pipe( pipe_slow );
7569
// Multiply Register Long where the left operand's high 32 bits are zero
7570
instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7571
predicate(is_operand_hi32_zero(n->in(1)));
7572
match(Set dst (MulL dst src));
7573
effect(KILL cr, TEMP tmp);
7574
ins_cost(2*100+2*400);
7575
// Basic idea: lo(result) = lo(x_lo * y_lo)
7576
// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7577
format %{ "MOV $tmp,$src.hi\n\t"
7579
"MUL EDX:EAX,$src.lo\n\t"
7582
__ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7583
__ imull($tmp$$Register, rax);
7584
__ mull($src$$Register);
7585
__ addl(rdx, $tmp$$Register);
7587
ins_pipe( pipe_slow );
7590
// Multiply Register Long where the right operand's high 32 bits are zero
7591
instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7592
predicate(is_operand_hi32_zero(n->in(2)));
7593
match(Set dst (MulL dst src));
7594
effect(KILL cr, TEMP tmp);
7595
ins_cost(2*100+2*400);
7596
// Basic idea: lo(result) = lo(x_lo * y_lo)
7597
// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7598
format %{ "MOV $tmp,$src.lo\n\t"
7600
"MUL EDX:EAX,$src.lo\n\t"
7603
__ movl($tmp$$Register, $src$$Register);
7604
__ imull($tmp$$Register, rdx);
7605
__ mull($src$$Register);
7606
__ addl(rdx, $tmp$$Register);
7608
ins_pipe( pipe_slow );
7611
// Multiply Register Long where the left and the right operands' high 32 bits are zero
7612
instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7613
predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7614
match(Set dst (MulL dst src));
7617
// Basic idea: lo(result) = lo(x_lo * y_lo)
7618
// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7619
format %{ "MUL EDX:EAX,$src.lo\n\t" %}
7621
__ mull($src$$Register);
7623
ins_pipe( pipe_slow );
7626
// Multiply Register Long by small constant
7627
instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7628
match(Set dst (MulL dst src));
7629
effect(KILL cr, TEMP tmp);
7630
ins_cost(2*100+2*400);
7632
// Basic idea: lo(result) = lo(src * EAX)
7633
// hi(result) = hi(src * EAX) + lo(src * EDX)
7634
format %{ "IMUL $tmp,EDX,$src\n\t"
7636
"MUL EDX\t# EDX*EAX -> EDX:EAX\n\t"
7638
ins_encode( long_multiply_con( dst, src, tmp ) );
7639
ins_pipe( pipe_slow );
7642
// Integer DIV with Register
7643
instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7644
match(Set rax (DivI rax div));
7645
effect(KILL rdx, KILL cr);
7647
ins_cost(30*100+10*100);
7648
format %{ "CMP EAX,0x80000000\n\t"
7656
opcode(0xF7, 0x7); /* Opcode F7 /7 */
7657
ins_encode( cdq_enc, OpcP, RegOpc(div) );
7658
ins_pipe( ialu_reg_reg_alu0 );
7661
// Divide Register Long
7662
instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7663
match(Set dst (DivL src1 src2));
7666
format %{ "PUSH $src1.hi\n\t"
7670
"CALL SharedRuntime::ldiv\n\t"
7672
ins_encode( long_div(src1,src2) );
7673
ins_pipe( pipe_slow );
7676
// Integer DIVMOD with Register, both quotient and mod results
7677
instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7678
match(DivModI rax div);
7681
ins_cost(30*100+10*100);
7682
format %{ "CMP EAX,0x80000000\n\t"
7690
opcode(0xF7, 0x7); /* Opcode F7 /7 */
7691
ins_encode( cdq_enc, OpcP, RegOpc(div) );
7692
ins_pipe( pipe_slow );
7695
// Integer MOD with Register
7696
instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7697
match(Set rdx (ModI rax div));
7698
effect(KILL rax, KILL cr);
7704
opcode(0xF7, 0x7); /* Opcode F7 /7 */
7705
ins_encode( cdq_enc, OpcP, RegOpc(div) );
7706
ins_pipe( ialu_reg_reg_alu0 );
7709
// Remainder Register Long
7710
instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7711
match(Set dst (ModL src1 src2));
7714
format %{ "PUSH $src1.hi\n\t"
7718
"CALL SharedRuntime::lrem\n\t"
7720
ins_encode( long_mod(src1,src2) );
7721
ins_pipe( pipe_slow );
7724
// Divide Register Long (no special case since divisor != -1)
7725
instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7726
match(Set dst (DivL dst imm));
7727
effect( TEMP tmp, TEMP tmp2, KILL cr );
7729
format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7730
"XOR $tmp2,$tmp2\n\t"
7737
"LNEG EAX : $tmp2\n\t"
7738
"DIV $tmp # unsigned division\n\t"
7739
"XCHG EAX,$tmp2\n\t"
7741
"LNEG $tmp2 : EAX\n\t"
7750
"NEG EDX:EAX # if $imm < 0" %}
7752
int con = (int)$imm$$constant;
7753
assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7754
int pcon = (con > 0) ? con : -con;
7755
Label Lfast, Lpos, Ldone;
7757
__ movl($tmp$$Register, pcon);
7758
__ xorl($tmp2$$Register,$tmp2$$Register);
7759
__ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7760
__ jccb(Assembler::above, Lfast); // result fits into 32 bit
7762
__ movl($tmp2$$Register, $dst$$Register); // save
7763
__ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7764
__ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7765
__ jccb(Assembler::lessEqual, Lpos); // result is positive
7767
// Negative dividend.
7768
// convert value to positive to use unsigned division
7769
__ lneg($dst$$Register, $tmp2$$Register);
7770
__ divl($tmp$$Register);
7771
__ xchgl($dst$$Register, $tmp2$$Register);
7772
__ divl($tmp$$Register);
7773
// revert result back to negative
7774
__ lneg($tmp2$$Register, $dst$$Register);
7778
__ divl($tmp$$Register); // Use unsigned division
7779
__ xchgl($dst$$Register, $tmp2$$Register);
7780
// Fallthrow for final divide, tmp2 has 32 bit hi result
7783
// fast path: src is positive
7784
__ divl($tmp$$Register); // Use unsigned division
7787
__ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7789
__ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7792
ins_pipe( pipe_slow );
7795
// Remainder Register Long (remainder fit into 32 bits)
7796
instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7797
match(Set dst (ModL dst imm));
7798
effect( TEMP tmp, TEMP tmp2, KILL cr );
7800
format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7807
"LNEG EAX : $tmp2\n\t"
7808
"DIV $tmp # unsigned division\n\t"
7822
int con = (int)$imm$$constant;
7823
assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7824
int pcon = (con > 0) ? con : -con;
7825
Label Lfast, Lpos, Ldone;
7827
__ movl($tmp$$Register, pcon);
7828
__ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7829
__ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7831
__ movl($tmp2$$Register, $dst$$Register); // save
7832
__ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7833
__ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7834
__ jccb(Assembler::lessEqual, Lpos); // result is positive
7836
// Negative dividend.
7837
// convert value to positive to use unsigned division
7838
__ lneg($dst$$Register, $tmp2$$Register);
7839
__ divl($tmp$$Register);
7840
__ movl($dst$$Register, $tmp2$$Register);
7841
__ divl($tmp$$Register);
7842
// revert remainder back to negative
7843
__ negl(HIGH_FROM_LOW($dst$$Register));
7847
__ divl($tmp$$Register);
7848
__ movl($dst$$Register, $tmp2$$Register);
7851
// fast path: src is positive
7852
__ divl($tmp$$Register);
7855
__ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7856
__ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7859
ins_pipe( pipe_slow );
7862
// Integer Shift Instructions
7864
instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7865
match(Set dst (LShiftI dst shift));
7869
format %{ "SHL $dst,$shift" %}
7870
opcode(0xD1, 0x4); /* D1 /4 */
7871
ins_encode( OpcP, RegOpc( dst ) );
7872
ins_pipe( ialu_reg );
7875
// Shift Left by 8-bit immediate
7876
instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7877
match(Set dst (LShiftI dst shift));
7881
format %{ "SHL $dst,$shift" %}
7882
opcode(0xC1, 0x4); /* C1 /4 ib */
7883
ins_encode( RegOpcImm( dst, shift) );
7884
ins_pipe( ialu_reg );
7887
// Shift Left by variable
7888
instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7889
match(Set dst (LShiftI dst shift));
7893
format %{ "SHL $dst,$shift" %}
7894
opcode(0xD3, 0x4); /* D3 /4 */
7895
ins_encode( OpcP, RegOpc( dst ) );
7896
ins_pipe( ialu_reg_reg );
7899
// Arithmetic shift right by one
7900
instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7901
match(Set dst (RShiftI dst shift));
7905
format %{ "SAR $dst,$shift" %}
7906
opcode(0xD1, 0x7); /* D1 /7 */
7907
ins_encode( OpcP, RegOpc( dst ) );
7908
ins_pipe( ialu_reg );
7911
// Arithmetic shift right by one
7912
instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
7913
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7915
format %{ "SAR $dst,$shift" %}
7916
opcode(0xD1, 0x7); /* D1 /7 */
7917
ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
7918
ins_pipe( ialu_mem_imm );
7921
// Arithmetic Shift Right by 8-bit immediate
7922
instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7923
match(Set dst (RShiftI dst shift));
7927
format %{ "SAR $dst,$shift" %}
7928
opcode(0xC1, 0x7); /* C1 /7 ib */
7929
ins_encode( RegOpcImm( dst, shift ) );
7930
ins_pipe( ialu_mem_imm );
7933
// Arithmetic Shift Right by 8-bit immediate
7934
instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7935
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7938
format %{ "SAR $dst,$shift" %}
7939
opcode(0xC1, 0x7); /* C1 /7 ib */
7940
ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
7941
ins_pipe( ialu_mem_imm );
7944
// Arithmetic Shift Right by variable
7945
instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7946
match(Set dst (RShiftI dst shift));
7950
format %{ "SAR $dst,$shift" %}
7951
opcode(0xD3, 0x7); /* D3 /7 */
7952
ins_encode( OpcP, RegOpc( dst ) );
7953
ins_pipe( ialu_reg_reg );
7956
// Logical shift right by one
7957
instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7958
match(Set dst (URShiftI dst shift));
7962
format %{ "SHR $dst,$shift" %}
7963
opcode(0xD1, 0x5); /* D1 /5 */
7964
ins_encode( OpcP, RegOpc( dst ) );
7965
ins_pipe( ialu_reg );
7968
// Logical Shift Right by 8-bit immediate
7969
instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7970
match(Set dst (URShiftI dst shift));
7974
format %{ "SHR $dst,$shift" %}
7975
opcode(0xC1, 0x5); /* C1 /5 ib */
7976
ins_encode( RegOpcImm( dst, shift) );
7977
ins_pipe( ialu_reg );
7981
// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7982
// This idiom is used by the compiler for the i2b bytecode.
7983
instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7984
match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7987
format %{ "MOVSX $dst,$src :8" %}
7989
__ movsbl($dst$$Register, $src$$Register);
7991
ins_pipe(ialu_reg_reg);
7994
// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7995
// This idiom is used by the compiler the i2s bytecode.
7996
instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7997
match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8000
format %{ "MOVSX $dst,$src :16" %}
8002
__ movswl($dst$$Register, $src$$Register);
8004
ins_pipe(ialu_reg_reg);
8008
// Logical Shift Right by variable
8009
instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8010
match(Set dst (URShiftI dst shift));
8014
format %{ "SHR $dst,$shift" %}
8015
opcode(0xD3, 0x5); /* D3 /5 */
8016
ins_encode( OpcP, RegOpc( dst ) );
8017
ins_pipe( ialu_reg_reg );
8021
//----------Logical Instructions-----------------------------------------------
8022
//----------Integer Logical Instructions---------------------------------------
8024
// And Register with Register
8025
instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8026
match(Set dst (AndI dst src));
8030
format %{ "AND $dst,$src" %}
8032
ins_encode( OpcP, RegReg( dst, src) );
8033
ins_pipe( ialu_reg_reg );
8036
// And Register with Immediate
8037
instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8038
match(Set dst (AndI dst src));
8041
format %{ "AND $dst,$src" %}
8042
opcode(0x81,0x04); /* Opcode 81 /4 */
8043
// ins_encode( RegImm( dst, src) );
8044
ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8045
ins_pipe( ialu_reg );
8048
// And Register with Memory
8049
instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8050
match(Set dst (AndI dst (LoadI src)));
8054
format %{ "AND $dst,$src" %}
8056
ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
8057
ins_pipe( ialu_reg_mem );
8060
// And Memory with Register
8061
instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8062
match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8066
format %{ "AND $dst,$src" %}
8067
opcode(0x21); /* Opcode 21 /r */
8068
ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8069
ins_pipe( ialu_mem_reg );
8072
// And Memory with Immediate
8073
instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8074
match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8078
format %{ "AND $dst,$src" %}
8079
opcode(0x81, 0x4); /* Opcode 81 /4 id */
8080
// ins_encode( MemImm( dst, src) );
8081
ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8082
ins_pipe( ialu_mem_imm );
8086
instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8087
match(Set dst (AndI (XorI src1 minus_1) src2));
8088
predicate(UseBMI1Instructions);
8091
format %{ "ANDNL $dst, $src1, $src2" %}
8094
__ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8099
instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8100
match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8101
predicate(UseBMI1Instructions);
8105
format %{ "ANDNL $dst, $src1, $src2" %}
8108
__ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8110
ins_pipe(ialu_reg_mem);
8113
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
8114
match(Set dst (AndI (SubI imm_zero src) src));
8115
predicate(UseBMI1Instructions);
8118
format %{ "BLSIL $dst, $src" %}
8121
__ blsil($dst$$Register, $src$$Register);
8126
instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
8127
match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8128
predicate(UseBMI1Instructions);
8132
format %{ "BLSIL $dst, $src" %}
8135
__ blsil($dst$$Register, $src$$Address);
8137
ins_pipe(ialu_reg_mem);
8140
instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8142
match(Set dst (XorI (AddI src minus_1) src));
8143
predicate(UseBMI1Instructions);
8146
format %{ "BLSMSKL $dst, $src" %}
8149
__ blsmskl($dst$$Register, $src$$Register);
8155
instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8157
match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8158
predicate(UseBMI1Instructions);
8162
format %{ "BLSMSKL $dst, $src" %}
8165
__ blsmskl($dst$$Register, $src$$Address);
8168
ins_pipe(ialu_reg_mem);
8171
instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8173
match(Set dst (AndI (AddI src minus_1) src) );
8174
predicate(UseBMI1Instructions);
8177
format %{ "BLSRL $dst, $src" %}
8180
__ blsrl($dst$$Register, $src$$Register);
8186
instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8188
match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8189
predicate(UseBMI1Instructions);
8193
format %{ "BLSRL $dst, $src" %}
8196
__ blsrl($dst$$Register, $src$$Address);
8199
ins_pipe(ialu_reg_mem);
8203
// Or Register with Register
8204
instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8205
match(Set dst (OrI dst src));
8209
format %{ "OR $dst,$src" %}
8211
ins_encode( OpcP, RegReg( dst, src) );
8212
ins_pipe( ialu_reg_reg );
8215
instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8216
match(Set dst (OrI dst (CastP2X src)));
8220
format %{ "OR $dst,$src" %}
8222
ins_encode( OpcP, RegReg( dst, src) );
8223
ins_pipe( ialu_reg_reg );
8227
// Or Register with Immediate
8228
instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8229
match(Set dst (OrI dst src));
8232
format %{ "OR $dst,$src" %}
8233
opcode(0x81,0x01); /* Opcode 81 /1 id */
8234
// ins_encode( RegImm( dst, src) );
8235
ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8236
ins_pipe( ialu_reg );
8239
// Or Register with Memory
8240
instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8241
match(Set dst (OrI dst (LoadI src)));
8245
format %{ "OR $dst,$src" %}
8247
ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
8248
ins_pipe( ialu_reg_mem );
8251
// Or Memory with Register
8252
instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8253
match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8257
format %{ "OR $dst,$src" %}
8258
opcode(0x09); /* Opcode 09 /r */
8259
ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8260
ins_pipe( ialu_mem_reg );
8263
// Or Memory with Immediate
8264
instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8265
match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8269
format %{ "OR $dst,$src" %}
8270
opcode(0x81,0x1); /* Opcode 81 /1 id */
8271
// ins_encode( MemImm( dst, src) );
8272
ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8273
ins_pipe( ialu_mem_imm );
8278
instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8279
effect(USE_DEF dst, USE shift, KILL cr);
8281
format %{ "ROL $dst, $shift" %}
8282
opcode(0xD1, 0x0); /* Opcode D1 /0 */
8283
ins_encode( OpcP, RegOpc( dst ));
8284
ins_pipe( ialu_reg );
8287
instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8288
effect(USE_DEF dst, USE shift, KILL cr);
8290
format %{ "ROL $dst, $shift" %}
8291
opcode(0xC1, 0x0); /*Opcode /C1 /0 */
8292
ins_encode( RegOpcImm(dst, shift) );
8296
instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8297
effect(USE_DEF dst, USE shift, KILL cr);
8299
format %{ "ROL $dst, $shift" %}
8300
opcode(0xD3, 0x0); /* Opcode D3 /0 */
8301
ins_encode(OpcP, RegOpc(dst));
8302
ins_pipe( ialu_reg_reg );
8306
// ROL 32bit by one once
8307
instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8308
match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8311
rolI_eReg_imm1(dst, lshift, cr);
8315
// ROL 32bit var by imm8 once
8316
instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8317
predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8318
match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8321
rolI_eReg_imm8(dst, lshift, cr);
8325
// ROL 32bit var by var once
8326
instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8327
match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8330
rolI_eReg_CL(dst, shift, cr);
8334
// ROL 32bit var by var once
8335
instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8336
match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8339
rolI_eReg_CL(dst, shift, cr);
8344
instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8345
effect(USE_DEF dst, USE shift, KILL cr);
8347
format %{ "ROR $dst, $shift" %}
8348
opcode(0xD1,0x1); /* Opcode D1 /1 */
8349
ins_encode( OpcP, RegOpc( dst ) );
8350
ins_pipe( ialu_reg );
8353
instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8354
effect (USE_DEF dst, USE shift, KILL cr);
8356
format %{ "ROR $dst, $shift" %}
8357
opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8358
ins_encode( RegOpcImm(dst, shift) );
8359
ins_pipe( ialu_reg );
8362
instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8363
effect(USE_DEF dst, USE shift, KILL cr);
8365
format %{ "ROR $dst, $shift" %}
8366
opcode(0xD3, 0x1); /* Opcode D3 /1 */
8367
ins_encode(OpcP, RegOpc(dst));
8368
ins_pipe( ialu_reg_reg );
8373
instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8374
match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8377
rorI_eReg_imm1(dst, rshift, cr);
8381
// ROR 32bit by immI8 once
8382
instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8383
predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8384
match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8387
rorI_eReg_imm8(dst, rshift, cr);
8391
// ROR 32bit var by var once
8392
instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8393
match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8396
rorI_eReg_CL(dst, shift, cr);
8400
// ROR 32bit var by var once
8401
instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8402
match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8405
rorI_eReg_CL(dst, shift, cr);
8410
// Xor Register with Register
8411
instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8412
match(Set dst (XorI dst src));
8416
format %{ "XOR $dst,$src" %}
8418
ins_encode( OpcP, RegReg( dst, src) );
8419
ins_pipe( ialu_reg_reg );
8422
// Xor Register with Immediate -1
8423
instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8424
match(Set dst (XorI dst imm));
8427
format %{ "NOT $dst" %}
8429
__ notl($dst$$Register);
8431
ins_pipe( ialu_reg );
8434
// Xor Register with Immediate
8435
instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8436
match(Set dst (XorI dst src));
8439
format %{ "XOR $dst,$src" %}
8440
opcode(0x81,0x06); /* Opcode 81 /6 id */
8441
// ins_encode( RegImm( dst, src) );
8442
ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8443
ins_pipe( ialu_reg );
8446
// Xor Register with Memory
8447
instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8448
match(Set dst (XorI dst (LoadI src)));
8452
format %{ "XOR $dst,$src" %}
8454
ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
8455
ins_pipe( ialu_reg_mem );
8458
// Xor Memory with Register
8459
instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8460
match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8464
format %{ "XOR $dst,$src" %}
8465
opcode(0x31); /* Opcode 31 /r */
8466
ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8467
ins_pipe( ialu_mem_reg );
8470
// Xor Memory with Immediate
8471
instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8472
match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8476
format %{ "XOR $dst,$src" %}
8477
opcode(0x81,0x6); /* Opcode 81 /6 id */
8478
ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8479
ins_pipe( ialu_mem_imm );
8482
//----------Convert Int to Boolean---------------------------------------------
8484
instruct movI_nocopy(rRegI dst, rRegI src) %{
8485
effect( DEF dst, USE src );
8486
format %{ "MOV $dst,$src" %}
8487
ins_encode( enc_Copy( dst, src) );
8488
ins_pipe( ialu_reg_reg );
8491
instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8492
effect( USE_DEF dst, USE src, KILL cr );
8495
format %{ "NEG $dst\n\t"
8497
ins_encode( neg_reg(dst),
8498
OpcRegReg(0x13,dst,src) );
8499
ins_pipe( ialu_reg_reg_long );
8502
instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8503
match(Set dst (Conv2B src));
8506
movI_nocopy(dst,src);
8511
instruct movP_nocopy(rRegI dst, eRegP src) %{
8512
effect( DEF dst, USE src );
8513
format %{ "MOV $dst,$src" %}
8514
ins_encode( enc_Copy( dst, src) );
8515
ins_pipe( ialu_reg_reg );
8518
instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8519
effect( USE_DEF dst, USE src, KILL cr );
8520
format %{ "NEG $dst\n\t"
8522
ins_encode( neg_reg(dst),
8523
OpcRegReg(0x13,dst,src) );
8524
ins_pipe( ialu_reg_reg_long );
8527
instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8528
match(Set dst (Conv2B src));
8531
movP_nocopy(dst,src);
8536
instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8537
match(Set dst (CmpLTMask p q));
8541
// SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8542
format %{ "XOR $dst,$dst\n\t"
8547
Register Rp = $p$$Register;
8548
Register Rq = $q$$Register;
8549
Register Rd = $dst$$Register;
8553
__ setb(Assembler::less, Rd);
8557
ins_pipe(pipe_slow);
8560
instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
8561
match(Set dst (CmpLTMask dst zero));
8562
effect(DEF dst, KILL cr);
8565
format %{ "SAR $dst,31\t# cmpLTMask0" %}
8567
__ sarl($dst$$Register, 31);
8572
/* better to save a register than avoid a branch */
8573
instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8574
match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8577
format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t"
8582
Register Rp = $p$$Register;
8583
Register Rq = $q$$Register;
8584
Register Ry = $y$$Register;
8587
__ jccb(Assembler::greaterEqual, done);
8592
ins_pipe(pipe_cmplt);
8595
/* better to save a register than avoid a branch */
8596
instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8597
match(Set y (AndI (CmpLTMask p q) y));
8602
format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t"
8607
Register Rp = $p$$Register;
8608
Register Rq = $q$$Register;
8609
Register Ry = $y$$Register;
8612
__ jccb(Assembler::less, done);
8617
ins_pipe(pipe_cmplt);
8620
/* If I enable this, I encourage spilling in the inner loop of compress.
8621
instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8622
match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8624
//----------Overflow Math Instructions-----------------------------------------
8626
instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8628
match(Set cr (OverflowAddI op1 op2));
8629
effect(DEF cr, USE_KILL op1, USE op2);
8631
format %{ "ADD $op1, $op2\t# overflow check int" %}
8634
__ addl($op1$$Register, $op2$$Register);
8636
ins_pipe(ialu_reg_reg);
8639
instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8641
match(Set cr (OverflowAddI op1 op2));
8642
effect(DEF cr, USE_KILL op1, USE op2);
8644
format %{ "ADD $op1, $op2\t# overflow check int" %}
8647
__ addl($op1$$Register, $op2$$constant);
8649
ins_pipe(ialu_reg_reg);
8652
instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8654
match(Set cr (OverflowSubI op1 op2));
8656
format %{ "CMP $op1, $op2\t# overflow check int" %}
8658
__ cmpl($op1$$Register, $op2$$Register);
8660
ins_pipe(ialu_reg_reg);
8663
instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8665
match(Set cr (OverflowSubI op1 op2));
8667
format %{ "CMP $op1, $op2\t# overflow check int" %}
8669
__ cmpl($op1$$Register, $op2$$constant);
8671
ins_pipe(ialu_reg_reg);
8674
instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
8676
match(Set cr (OverflowSubI zero op2));
8677
effect(DEF cr, USE_KILL op2);
8679
format %{ "NEG $op2\t# overflow check int" %}
8681
__ negl($op2$$Register);
8683
ins_pipe(ialu_reg_reg);
8686
instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8688
match(Set cr (OverflowMulI op1 op2));
8689
effect(DEF cr, USE_KILL op1, USE op2);
8691
format %{ "IMUL $op1, $op2\t# overflow check int" %}
8693
__ imull($op1$$Register, $op2$$Register);
8695
ins_pipe(ialu_reg_reg_alu0);
8698
instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8700
match(Set cr (OverflowMulI op1 op2));
8701
effect(DEF cr, TEMP tmp, USE op1, USE op2);
8703
format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %}
8705
__ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8707
ins_pipe(ialu_reg_reg_alu0);
8710
// Integer Absolute Instructions
8711
instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8713
match(Set dst (AbsI src));
8714
effect(TEMP dst, TEMP tmp, KILL cr);
8715
format %{ "movl $tmp, $src\n\t"
8717
"movl $dst, $src\n\t"
8718
"xorl $dst, $tmp\n\t"
8722
__ movl($tmp$$Register, $src$$Register);
8723
__ sarl($tmp$$Register, 31);
8724
__ movl($dst$$Register, $src$$Register);
8725
__ xorl($dst$$Register, $tmp$$Register);
8726
__ subl($dst$$Register, $tmp$$Register);
8729
ins_pipe(ialu_reg_reg);
8732
//----------Long Instructions------------------------------------------------
8733
// Add Long Register with Register
8734
instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8735
match(Set dst (AddL dst src));
8738
format %{ "ADD $dst.lo,$src.lo\n\t"
8739
"ADC $dst.hi,$src.hi" %}
8741
ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8742
ins_pipe( ialu_reg_reg_long );
8745
// Add Long Register with Immediate
8746
instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8747
match(Set dst (AddL dst src));
8749
format %{ "ADD $dst.lo,$src.lo\n\t"
8750
"ADC $dst.hi,$src.hi" %}
8751
opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */
8752
ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8753
ins_pipe( ialu_reg_long );
8756
// Add Long Register with Memory
8757
instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8758
match(Set dst (AddL dst (LoadL mem)));
8761
format %{ "ADD $dst.lo,$mem\n\t"
8762
"ADC $dst.hi,$mem+4" %}
8764
ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8765
ins_pipe( ialu_reg_long_mem );
8768
// Subtract Long Register with Register.
8769
instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8770
match(Set dst (SubL dst src));
8773
format %{ "SUB $dst.lo,$src.lo\n\t"
8774
"SBB $dst.hi,$src.hi" %}
8776
ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8777
ins_pipe( ialu_reg_reg_long );
8780
// Subtract Long Register with Immediate
8781
instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8782
match(Set dst (SubL dst src));
8784
format %{ "SUB $dst.lo,$src.lo\n\t"
8785
"SBB $dst.hi,$src.hi" %}
8786
opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */
8787
ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8788
ins_pipe( ialu_reg_long );
8791
// Subtract Long Register with Memory
8792
instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8793
match(Set dst (SubL dst (LoadL mem)));
8796
format %{ "SUB $dst.lo,$mem\n\t"
8797
"SBB $dst.hi,$mem+4" %}
8799
ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8800
ins_pipe( ialu_reg_long_mem );
8803
instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8804
match(Set dst (SubL zero dst));
8807
format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %}
8808
ins_encode( neg_long(dst) );
8809
ins_pipe( ialu_reg_reg_long );
8812
// And Long Register with Register
8813
instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8814
match(Set dst (AndL dst src));
8816
format %{ "AND $dst.lo,$src.lo\n\t"
8817
"AND $dst.hi,$src.hi" %}
8819
ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8820
ins_pipe( ialu_reg_reg_long );
8823
// And Long Register with Immediate
8824
instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8825
match(Set dst (AndL dst src));
8827
format %{ "AND $dst.lo,$src.lo\n\t"
8828
"AND $dst.hi,$src.hi" %}
8829
opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */
8830
ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8831
ins_pipe( ialu_reg_long );
8834
// And Long Register with Memory
8835
instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8836
match(Set dst (AndL dst (LoadL mem)));
8839
format %{ "AND $dst.lo,$mem\n\t"
8840
"AND $dst.hi,$mem+4" %}
8842
ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8843
ins_pipe( ialu_reg_long_mem );
8847
instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8848
match(Set dst (AndL (XorL src1 minus_1) src2));
8849
predicate(UseBMI1Instructions);
8850
effect(KILL cr, TEMP dst);
8852
format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t"
8853
"ANDNL $dst.hi, $src1.hi, $src2.hi"
8857
Register Rdst = $dst$$Register;
8858
Register Rsrc1 = $src1$$Register;
8859
Register Rsrc2 = $src2$$Register;
8860
__ andnl(Rdst, Rsrc1, Rsrc2);
8861
__ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8863
ins_pipe(ialu_reg_reg_long);
8866
instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8867
match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8868
predicate(UseBMI1Instructions);
8869
effect(KILL cr, TEMP dst);
8872
format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t"
8873
"ANDNL $dst.hi, $src1.hi, $src2+4"
8877
Register Rdst = $dst$$Register;
8878
Register Rsrc1 = $src1$$Register;
8879
Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8881
__ andnl(Rdst, Rsrc1, $src2$$Address);
8882
__ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8884
ins_pipe(ialu_reg_mem);
8887
instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8888
match(Set dst (AndL (SubL imm_zero src) src));
8889
predicate(UseBMI1Instructions);
8890
effect(KILL cr, TEMP dst);
8892
format %{ "MOVL $dst.hi, 0\n\t"
8893
"BLSIL $dst.lo, $src.lo\n\t"
8895
"BLSIL $dst.hi, $src.hi\n"
8901
Register Rdst = $dst$$Register;
8902
Register Rsrc = $src$$Register;
8903
__ movl(HIGH_FROM_LOW(Rdst), 0);
8904
__ blsil(Rdst, Rsrc);
8905
__ jccb(Assembler::notZero, done);
8906
__ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8912
instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8913
match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8914
predicate(UseBMI1Instructions);
8915
effect(KILL cr, TEMP dst);
8918
format %{ "MOVL $dst.hi, 0\n\t"
8919
"BLSIL $dst.lo, $src\n\t"
8921
"BLSIL $dst.hi, $src+4\n"
8927
Register Rdst = $dst$$Register;
8928
Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8930
__ movl(HIGH_FROM_LOW(Rdst), 0);
8931
__ blsil(Rdst, $src$$Address);
8932
__ jccb(Assembler::notZero, done);
8933
__ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8936
ins_pipe(ialu_reg_mem);
8939
instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8941
match(Set dst (XorL (AddL src minus_1) src));
8942
predicate(UseBMI1Instructions);
8943
effect(KILL cr, TEMP dst);
8945
format %{ "MOVL $dst.hi, 0\n\t"
8946
"BLSMSKL $dst.lo, $src.lo\n\t"
8948
"BLSMSKL $dst.hi, $src.hi\n"
8954
Register Rdst = $dst$$Register;
8955
Register Rsrc = $src$$Register;
8956
__ movl(HIGH_FROM_LOW(Rdst), 0);
8957
__ blsmskl(Rdst, Rsrc);
8958
__ jccb(Assembler::carryClear, done);
8959
__ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8966
instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8968
match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8969
predicate(UseBMI1Instructions);
8970
effect(KILL cr, TEMP dst);
8973
format %{ "MOVL $dst.hi, 0\n\t"
8974
"BLSMSKL $dst.lo, $src\n\t"
8976
"BLSMSKL $dst.hi, $src+4\n"
8982
Register Rdst = $dst$$Register;
8983
Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8985
__ movl(HIGH_FROM_LOW(Rdst), 0);
8986
__ blsmskl(Rdst, $src$$Address);
8987
__ jccb(Assembler::carryClear, done);
8988
__ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8992
ins_pipe(ialu_reg_mem);
8995
instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8997
match(Set dst (AndL (AddL src minus_1) src) );
8998
predicate(UseBMI1Instructions);
8999
effect(KILL cr, TEMP dst);
9001
format %{ "MOVL $dst.hi, $src.hi\n\t"
9002
"BLSRL $dst.lo, $src.lo\n\t"
9004
"BLSRL $dst.hi, $src.hi\n"
9010
Register Rdst = $dst$$Register;
9011
Register Rsrc = $src$$Register;
9012
__ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9013
__ blsrl(Rdst, Rsrc);
9014
__ jccb(Assembler::carryClear, done);
9015
__ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9022
instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9024
match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9025
predicate(UseBMI1Instructions);
9026
effect(KILL cr, TEMP dst);
9029
format %{ "MOVL $dst.hi, $src+4\n\t"
9030
"BLSRL $dst.lo, $src\n\t"
9032
"BLSRL $dst.hi, $src+4\n"
9038
Register Rdst = $dst$$Register;
9039
Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9040
__ movl(HIGH_FROM_LOW(Rdst), src_hi);
9041
__ blsrl(Rdst, $src$$Address);
9042
__ jccb(Assembler::carryClear, done);
9043
__ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9047
ins_pipe(ialu_reg_mem);
9050
// Or Long Register with Register
9051
instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9052
match(Set dst (OrL dst src));
9054
format %{ "OR $dst.lo,$src.lo\n\t"
9055
"OR $dst.hi,$src.hi" %}
9057
ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9058
ins_pipe( ialu_reg_reg_long );
9061
// Or Long Register with Immediate
9062
instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9063
match(Set dst (OrL dst src));
9065
format %{ "OR $dst.lo,$src.lo\n\t"
9066
"OR $dst.hi,$src.hi" %}
9067
opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */
9068
ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9069
ins_pipe( ialu_reg_long );
9072
// Or Long Register with Memory
9073
instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9074
match(Set dst (OrL dst (LoadL mem)));
9077
format %{ "OR $dst.lo,$mem\n\t"
9078
"OR $dst.hi,$mem+4" %}
9080
ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
9081
ins_pipe( ialu_reg_long_mem );
9084
// Xor Long Register with Register
9085
instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9086
match(Set dst (XorL dst src));
9088
format %{ "XOR $dst.lo,$src.lo\n\t"
9089
"XOR $dst.hi,$src.hi" %}
9091
ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9092
ins_pipe( ialu_reg_reg_long );
9095
// Xor Long Register with Immediate -1
9096
instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9097
match(Set dst (XorL dst imm));
9098
format %{ "NOT $dst.lo\n\t"
9101
__ notl($dst$$Register);
9102
__ notl(HIGH_FROM_LOW($dst$$Register));
9104
ins_pipe( ialu_reg_long );
9107
// Xor Long Register with Immediate
9108
instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9109
match(Set dst (XorL dst src));
9111
format %{ "XOR $dst.lo,$src.lo\n\t"
9112
"XOR $dst.hi,$src.hi" %}
9113
opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */
9114
ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9115
ins_pipe( ialu_reg_long );
9118
// Xor Long Register with Memory
9119
instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9120
match(Set dst (XorL dst (LoadL mem)));
9123
format %{ "XOR $dst.lo,$mem\n\t"
9124
"XOR $dst.hi,$mem+4" %}
9126
ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
9127
ins_pipe( ialu_reg_long_mem );
9130
// Shift Left Long by 1
9131
instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9132
predicate(UseNewLongLShift);
9133
match(Set dst (LShiftL dst cnt));
9136
format %{ "ADD $dst.lo,$dst.lo\n\t"
9137
"ADC $dst.hi,$dst.hi" %}
9139
__ addl($dst$$Register,$dst$$Register);
9140
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9142
ins_pipe( ialu_reg_long );
9145
// Shift Left Long by 2
9146
instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9147
predicate(UseNewLongLShift);
9148
match(Set dst (LShiftL dst cnt));
9151
format %{ "ADD $dst.lo,$dst.lo\n\t"
9152
"ADC $dst.hi,$dst.hi\n\t"
9153
"ADD $dst.lo,$dst.lo\n\t"
9154
"ADC $dst.hi,$dst.hi" %}
9156
__ addl($dst$$Register,$dst$$Register);
9157
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9158
__ addl($dst$$Register,$dst$$Register);
9159
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9161
ins_pipe( ialu_reg_long );
9164
// Shift Left Long by 3
9165
instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9166
predicate(UseNewLongLShift);
9167
match(Set dst (LShiftL dst cnt));
9170
format %{ "ADD $dst.lo,$dst.lo\n\t"
9171
"ADC $dst.hi,$dst.hi\n\t"
9172
"ADD $dst.lo,$dst.lo\n\t"
9173
"ADC $dst.hi,$dst.hi\n\t"
9174
"ADD $dst.lo,$dst.lo\n\t"
9175
"ADC $dst.hi,$dst.hi" %}
9177
__ addl($dst$$Register,$dst$$Register);
9178
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9179
__ addl($dst$$Register,$dst$$Register);
9180
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9181
__ addl($dst$$Register,$dst$$Register);
9182
__ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9184
ins_pipe( ialu_reg_long );
9187
// Shift Left Long by 1-31
9188
instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9189
match(Set dst (LShiftL dst cnt));
9192
format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t"
9193
"SHL $dst.lo,$cnt" %}
9194
opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */
9195
ins_encode( move_long_small_shift(dst,cnt) );
9196
ins_pipe( ialu_reg_long );
9199
// Shift Left Long by 32-63
9200
instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9201
match(Set dst (LShiftL dst cnt));
9204
format %{ "MOV $dst.hi,$dst.lo\n"
9205
"\tSHL $dst.hi,$cnt-32\n"
9206
"\tXOR $dst.lo,$dst.lo" %}
9207
opcode(0xC1, 0x4); /* C1 /4 ib */
9208
ins_encode( move_long_big_shift_clr(dst,cnt) );
9209
ins_pipe( ialu_reg_long );
9212
// Shift Left Long by variable
9213
instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9214
match(Set dst (LShiftL dst shift));
9218
format %{ "TEST $shift,32\n\t"
9220
"MOV $dst.hi,$dst.lo\n\t"
9221
"XOR $dst.lo,$dst.lo\n"
9222
"small:\tSHLD $dst.hi,$dst.lo,$shift\n\t"
9223
"SHL $dst.lo,$shift" %}
9224
ins_encode( shift_left_long( dst, shift ) );
9225
ins_pipe( pipe_slow );
9228
// Shift Right Long by 1-31
9229
instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9230
match(Set dst (URShiftL dst cnt));
9233
format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9234
"SHR $dst.hi,$cnt" %}
9235
opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */
9236
ins_encode( move_long_small_shift(dst,cnt) );
9237
ins_pipe( ialu_reg_long );
9240
// Shift Right Long by 32-63
9241
instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9242
match(Set dst (URShiftL dst cnt));
9245
format %{ "MOV $dst.lo,$dst.hi\n"
9246
"\tSHR $dst.lo,$cnt-32\n"
9247
"\tXOR $dst.hi,$dst.hi" %}
9248
opcode(0xC1, 0x5); /* C1 /5 ib */
9249
ins_encode( move_long_big_shift_clr(dst,cnt) );
9250
ins_pipe( ialu_reg_long );
9253
// Shift Right Long by variable
9254
instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9255
match(Set dst (URShiftL dst shift));
9259
format %{ "TEST $shift,32\n\t"
9261
"MOV $dst.lo,$dst.hi\n\t"
9262
"XOR $dst.hi,$dst.hi\n"
9263
"small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9264
"SHR $dst.hi,$shift" %}
9265
ins_encode( shift_right_long( dst, shift ) );
9266
ins_pipe( pipe_slow );
9269
// Shift Right Long by 1-31
9270
instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9271
match(Set dst (RShiftL dst cnt));
9274
format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t"
9275
"SAR $dst.hi,$cnt" %}
9276
opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */
9277
ins_encode( move_long_small_shift(dst,cnt) );
9278
ins_pipe( ialu_reg_long );
9281
// Shift Right Long by 32-63
9282
instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9283
match(Set dst (RShiftL dst cnt));
9286
format %{ "MOV $dst.lo,$dst.hi\n"
9287
"\tSAR $dst.lo,$cnt-32\n"
9288
"\tSAR $dst.hi,31" %}
9289
opcode(0xC1, 0x7); /* C1 /7 ib */
9290
ins_encode( move_long_big_shift_sign(dst,cnt) );
9291
ins_pipe( ialu_reg_long );
9294
// Shift Right arithmetic Long by variable
9295
instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9296
match(Set dst (RShiftL dst shift));
9300
format %{ "TEST $shift,32\n\t"
9302
"MOV $dst.lo,$dst.hi\n\t"
9304
"small:\tSHRD $dst.lo,$dst.hi,$shift\n\t"
9305
"SAR $dst.hi,$shift" %}
9306
ins_encode( shift_right_arith_long( dst, shift ) );
9307
ins_pipe( pipe_slow );
9311
//----------Double Instructions------------------------------------------------
9316
// P6 version of float compare, sets condition codes in EFLAGS
9317
instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9318
predicate(VM_Version::supports_cmov() && UseSSE <=1);
9319
match(Set cr (CmpD src1 src2));
9322
format %{ "FLD $src1\n\t"
9323
"FUCOMIP ST,$src2 // P6 instruction\n\t"
9325
"MOV ah,1 // saw a NaN, set CF\n\t"
9327
"exit:\tNOP // avoid branch to branch" %}
9328
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9329
ins_encode( Push_Reg_DPR(src1),
9332
ins_pipe( pipe_slow );
9335
instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9336
predicate(VM_Version::supports_cmov() && UseSSE <=1);
9337
match(Set cr (CmpD src1 src2));
9339
format %{ "FLD $src1\n\t"
9340
"FUCOMIP ST,$src2 // P6 instruction" %}
9341
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9342
ins_encode( Push_Reg_DPR(src1),
9343
OpcP, RegOpc(src2));
9344
ins_pipe( pipe_slow );
9348
instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9349
predicate(UseSSE<=1);
9350
match(Set cr (CmpD src1 src2));
9353
format %{ "FLD $src1\n\t"
9358
"MOV AH,1\t# unordered treat as LT\n"
9360
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9361
ins_encode( Push_Reg_DPR(src1),
9364
ins_pipe( pipe_slow );
9367
// Compare vs zero into -1,0,1
9368
instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9369
predicate(UseSSE<=1);
9370
match(Set dst (CmpD3 src1 zero));
9371
effect(KILL cr, KILL rax);
9373
format %{ "FTSTD $dst,$src1" %}
9375
ins_encode( Push_Reg_DPR(src1),
9378
ins_pipe( pipe_slow );
9381
// Compare into -1,0,1
9382
instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9383
predicate(UseSSE<=1);
9384
match(Set dst (CmpD3 src1 src2));
9385
effect(KILL cr, KILL rax);
9387
format %{ "FCMPD $dst,$src1,$src2" %}
9388
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9389
ins_encode( Push_Reg_DPR(src1),
9392
ins_pipe( pipe_slow );
9395
// float compare and set condition codes in EFLAGS by XMM regs
9396
instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9397
predicate(UseSSE>=2);
9398
match(Set cr (CmpD src1 src2));
9400
format %{ "UCOMISD $src1,$src2\n\t"
9402
"PUSHF\t# saw NaN, set CF\n\t"
9403
"AND [rsp], #0xffffff2b\n\t"
9407
__ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9408
emit_cmpfp_fixup(masm);
9410
ins_pipe( pipe_slow );
9413
instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9414
predicate(UseSSE>=2);
9415
match(Set cr (CmpD src1 src2));
9417
format %{ "UCOMISD $src1,$src2" %}
9419
__ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9421
ins_pipe( pipe_slow );
9424
// float compare and set condition codes in EFLAGS by XMM regs
9425
instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9426
predicate(UseSSE>=2);
9427
match(Set cr (CmpD src1 (LoadD src2)));
9429
format %{ "UCOMISD $src1,$src2\n\t"
9431
"PUSHF\t# saw NaN, set CF\n\t"
9432
"AND [rsp], #0xffffff2b\n\t"
9436
__ ucomisd($src1$$XMMRegister, $src2$$Address);
9437
emit_cmpfp_fixup(masm);
9439
ins_pipe( pipe_slow );
9442
instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9443
predicate(UseSSE>=2);
9444
match(Set cr (CmpD src1 (LoadD src2)));
9446
format %{ "UCOMISD $src1,$src2" %}
9448
__ ucomisd($src1$$XMMRegister, $src2$$Address);
9450
ins_pipe( pipe_slow );
9453
// Compare into -1,0,1 in XMM
9454
instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9455
predicate(UseSSE>=2);
9456
match(Set dst (CmpD3 src1 src2));
9459
format %{ "UCOMISD $src1, $src2\n\t"
9464
"MOVZB $dst, $dst\n"
9467
__ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9468
emit_cmpfp3(masm, $dst$$Register);
9470
ins_pipe( pipe_slow );
9473
// Compare into -1,0,1 in XMM and memory
9474
instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9475
predicate(UseSSE>=2);
9476
match(Set dst (CmpD3 src1 (LoadD src2)));
9479
format %{ "UCOMISD $src1, $src2\n\t"
9484
"MOVZB $dst, $dst\n"
9487
__ ucomisd($src1$$XMMRegister, $src2$$Address);
9488
emit_cmpfp3(masm, $dst$$Register);
9490
ins_pipe( pipe_slow );
9494
instruct subDPR_reg(regDPR dst, regDPR src) %{
9495
predicate (UseSSE <=1);
9496
match(Set dst (SubD dst src));
9498
format %{ "FLD $src\n\t"
9500
opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9502
ins_encode( Push_Reg_DPR(src),
9503
OpcP, RegOpc(dst) );
9504
ins_pipe( fpu_reg_reg );
9507
instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9508
predicate (UseSSE <=1);
9509
match(Set dst (RoundDouble (SubD src1 src2)));
9512
format %{ "FLD $src2\n\t"
9514
"FSTP_D $dst\t# D-round" %}
9516
ins_encode( Push_Reg_DPR(src2),
9517
OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9518
ins_pipe( fpu_mem_reg_reg );
9522
instruct subDPR_reg_mem(regDPR dst, memory src) %{
9523
predicate (UseSSE <=1);
9524
match(Set dst (SubD dst (LoadD src)));
9527
format %{ "FLD $src\n\t"
9529
opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9530
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9531
OpcP, RegOpc(dst), ClearInstMark );
9532
ins_pipe( fpu_reg_mem );
9535
instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9536
predicate (UseSSE<=1);
9537
match(Set dst (AbsD src));
9541
ins_encode( OpcS, OpcP );
9542
ins_pipe( fpu_reg_reg );
9545
instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9546
predicate(UseSSE<=1);
9547
match(Set dst (NegD src));
9551
ins_encode( OpcS, OpcP );
9552
ins_pipe( fpu_reg_reg );
9555
instruct addDPR_reg(regDPR dst, regDPR src) %{
9556
predicate(UseSSE<=1);
9557
match(Set dst (AddD dst src));
9558
format %{ "FLD $src\n\t"
9562
opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9563
ins_encode( Push_Reg_DPR(src),
9564
OpcP, RegOpc(dst) );
9565
ins_pipe( fpu_reg_reg );
9569
instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9570
predicate(UseSSE<=1);
9571
match(Set dst (RoundDouble (AddD src1 src2)));
9574
format %{ "FLD $src2\n\t"
9576
"FSTP_D $dst\t# D-round" %}
9577
opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9578
ins_encode( Push_Reg_DPR(src2),
9579
OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9580
ins_pipe( fpu_mem_reg_reg );
9584
instruct addDPR_reg_mem(regDPR dst, memory src) %{
9585
predicate(UseSSE<=1);
9586
match(Set dst (AddD dst (LoadD src)));
9589
format %{ "FLD $src\n\t"
9591
opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9592
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9593
OpcP, RegOpc(dst), ClearInstMark );
9594
ins_pipe( fpu_reg_mem );
9598
instruct addDPR_mem_reg(memory dst, regDPR src) %{
9599
predicate(UseSSE<=1);
9600
match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9603
format %{ "FLD_D $dst\n\t"
9607
ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
9608
Opcode(0xD8), RegOpc(src), ClearInstMark,
9610
Opcode(0xDD), RMopc_Mem(0x03,dst),
9612
ins_pipe( fpu_reg_mem );
9615
instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9616
predicate(UseSSE<=1);
9617
match(Set dst (AddD dst con));
9619
format %{ "FLD1\n\t"
9623
__ faddp($dst$$reg);
9628
instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9629
predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9630
match(Set dst (AddD dst con));
9632
format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9635
__ fld_d($constantaddress($con));
9636
__ faddp($dst$$reg);
9638
ins_pipe(fpu_reg_mem);
9641
instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9642
predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9643
match(Set dst (RoundDouble (AddD src con)));
9645
format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9647
"FSTP_D $dst\t# D-round" %}
9649
__ fld_d($constantaddress($con));
9651
__ fstp_d(Address(rsp, $dst$$disp));
9653
ins_pipe(fpu_mem_reg_con);
9656
instruct mulDPR_reg(regDPR dst, regDPR src) %{
9657
predicate(UseSSE<=1);
9658
match(Set dst (MulD dst src));
9659
format %{ "FLD $src\n\t"
9661
opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9663
ins_encode( Push_Reg_DPR(src),
9664
OpcP, RegOpc(dst) );
9665
ins_pipe( fpu_reg_reg );
9668
// Strict FP instruction biases argument before multiply then
9669
// biases result to avoid double rounding of subnormals.
9671
// scale arg1 by multiplying arg1 by 2^(-15360)
9673
// multiply scaled arg1 by arg2
9674
// rescale product by 2^(15360)
9676
instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9677
predicate( UseSSE<=1 && Compile::current()->has_method() );
9678
match(Set dst (MulD dst src));
9679
ins_cost(1); // Select this instruction for all FP double multiplies
9681
format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9685
"FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9686
"DMULp $dst,ST\n\t" %}
9687
opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9688
ins_encode( strictfp_bias1(dst),
9691
strictfp_bias2(dst) );
9692
ins_pipe( fpu_reg_reg );
9695
instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9696
predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9697
match(Set dst (MulD dst con));
9699
format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9702
__ fld_d($constantaddress($con));
9703
__ fmulp($dst$$reg);
9705
ins_pipe(fpu_reg_mem);
9709
instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9710
predicate( UseSSE<=1 );
9711
match(Set dst (MulD dst (LoadD src)));
9713
format %{ "FLD_D $src\n\t"
9715
opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */
9716
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9717
OpcP, RegOpc(dst), ClearInstMark );
9718
ins_pipe( fpu_reg_mem );
9722
// Cisc-alternate to reg-reg multiply
9723
instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9724
predicate( UseSSE<=1 );
9725
match(Set dst (MulD src (LoadD mem)));
9727
format %{ "FLD_D $mem\n\t"
9730
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
9731
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
9733
Pop_Reg_DPR(dst), ClearInstMark );
9734
ins_pipe( fpu_reg_reg_mem );
9738
// MACRO3 -- addDPR a mulDPR
9739
// This instruction is a '2-address' instruction in that the result goes
9740
// back to src2. This eliminates a move from the macro; possibly the
9741
// register allocator will have to add it back (and maybe not).
9742
instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9743
predicate( UseSSE<=1 );
9744
match(Set src2 (AddD (MulD src0 src1) src2));
9745
format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9749
opcode(0xDD); /* LoadD DD /0 */
9750
ins_encode( Push_Reg_FPR(src0),
9752
FAddP_reg_ST(src2) );
9753
ins_pipe( fpu_reg_reg_reg );
9757
// MACRO3 -- subDPR a mulDPR
9758
instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9759
predicate( UseSSE<=1 );
9760
match(Set src2 (SubD (MulD src0 src1) src2));
9761
format %{ "FLD $src0\t# ===MACRO3d===\n\t"
9763
"DSUBRp $src2,ST" %}
9765
ins_encode( Push_Reg_FPR(src0),
9767
Opcode(0xDE), Opc_plus(0xE0,src2));
9768
ins_pipe( fpu_reg_reg_reg );
9772
instruct divDPR_reg(regDPR dst, regDPR src) %{
9773
predicate( UseSSE<=1 );
9774
match(Set dst (DivD dst src));
9776
format %{ "FLD $src\n\t"
9778
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9780
ins_encode( Push_Reg_DPR(src),
9781
OpcP, RegOpc(dst) );
9782
ins_pipe( fpu_reg_reg );
9785
// Strict FP instruction biases argument before division then
9786
// biases result, to avoid double rounding of subnormals.
9788
// scale dividend by multiplying dividend by 2^(-15360)
9790
// divide scaled dividend by divisor
9791
// rescale quotient by 2^(15360)
9793
instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9794
predicate (UseSSE<=1);
9795
match(Set dst (DivD dst src));
9796
predicate( UseSSE<=1 && Compile::current()->has_method() );
9799
format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9803
"FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9804
"DMULp $dst,ST\n\t" %}
9805
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9806
ins_encode( strictfp_bias1(dst),
9809
strictfp_bias2(dst) );
9810
ins_pipe( fpu_reg_reg );
9813
instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9814
predicate(UseSSE<=1);
9815
match(Set dst (ModD dst src));
9816
effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9818
format %{ "DMOD $dst,$src" %}
9820
ins_encode(Push_Reg_Mod_DPR(dst, src),
9822
Push_Result_Mod_DPR(src),
9824
ins_pipe( pipe_slow );
9827
instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9828
predicate(UseSSE>=2);
9829
match(Set dst (ModD src0 src1));
9830
effect(KILL rax, KILL cr);
9832
format %{ "SUB ESP,8\t # DMOD\n"
9833
"\tMOVSD [ESP+0],$src1\n"
9835
"\tMOVSD [ESP+0],$src0\n"
9842
"\tFSTP_D [ESP+0]\n"
9843
"\tMOVSD $dst,[ESP+0]\n"
9845
"\tFSTP ST0\t # Restore FPU Stack"
9848
ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9849
ins_pipe( pipe_slow );
9852
instruct atanDPR_reg(regDPR dst, regDPR src) %{
9853
predicate (UseSSE<=1);
9854
match(Set dst(AtanD dst src));
9855
format %{ "DATA $dst,$src" %}
9857
ins_encode( Push_Reg_DPR(src),
9858
OpcP, OpcS, RegOpc(dst) );
9859
ins_pipe( pipe_slow );
9862
instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9863
predicate (UseSSE>=2);
9864
match(Set dst(AtanD dst src));
9865
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9866
format %{ "DATA $dst,$src" %}
9868
ins_encode( Push_SrcD(src),
9869
OpcP, OpcS, Push_ResultD(dst) );
9870
ins_pipe( pipe_slow );
9873
instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9874
predicate (UseSSE<=1);
9875
match(Set dst (SqrtD src));
9876
format %{ "DSQRT $dst,$src" %}
9878
ins_encode( Push_Reg_DPR(src),
9879
OpcS, OpcP, Pop_Reg_DPR(dst) );
9880
ins_pipe( pipe_slow );
9883
//-------------Float Instructions-------------------------------
9886
// Code for float compare:
9888
// fwait(); fnstsw_ax();
9890
// movl(dst, unordered_result);
9891
// jcc(Assembler::parity, exit);
9892
// movl(dst, less_result);
9893
// jcc(Assembler::below, exit);
9894
// movl(dst, equal_result);
9895
// jcc(Assembler::equal, exit);
9896
// movl(dst, greater_result);
9899
// P6 version of float compare, sets condition codes in EFLAGS
9900
instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9901
predicate(VM_Version::supports_cmov() && UseSSE == 0);
9902
match(Set cr (CmpF src1 src2));
9905
format %{ "FLD $src1\n\t"
9906
"FUCOMIP ST,$src2 // P6 instruction\n\t"
9908
"MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
9910
"exit:\tNOP // avoid branch to branch" %}
9911
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9912
ins_encode( Push_Reg_DPR(src1),
9915
ins_pipe( pipe_slow );
9918
instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9919
predicate(VM_Version::supports_cmov() && UseSSE == 0);
9920
match(Set cr (CmpF src1 src2));
9922
format %{ "FLD $src1\n\t"
9923
"FUCOMIP ST,$src2 // P6 instruction" %}
9924
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9925
ins_encode( Push_Reg_DPR(src1),
9926
OpcP, RegOpc(src2));
9927
ins_pipe( pipe_slow );
9932
instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9933
predicate(UseSSE == 0);
9934
match(Set cr (CmpF src1 src2));
9937
format %{ "FLD $src1\n\t"
9942
"MOV AH,1\t# unordered treat as LT\n"
9944
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9945
ins_encode( Push_Reg_DPR(src1),
9948
ins_pipe( pipe_slow );
9951
// Compare vs zero into -1,0,1
9952
instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9953
predicate(UseSSE == 0);
9954
match(Set dst (CmpF3 src1 zero));
9955
effect(KILL cr, KILL rax);
9957
format %{ "FTSTF $dst,$src1" %}
9959
ins_encode( Push_Reg_DPR(src1),
9962
ins_pipe( pipe_slow );
9965
// Compare into -1,0,1
9966
instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9967
predicate(UseSSE == 0);
9968
match(Set dst (CmpF3 src1 src2));
9969
effect(KILL cr, KILL rax);
9971
format %{ "FCMPF $dst,$src1,$src2" %}
9972
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9973
ins_encode( Push_Reg_DPR(src1),
9976
ins_pipe( pipe_slow );
9979
// float compare and set condition codes in EFLAGS by XMM regs
9980
instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9981
predicate(UseSSE>=1);
9982
match(Set cr (CmpF src1 src2));
9984
format %{ "UCOMISS $src1,$src2\n\t"
9986
"PUSHF\t# saw NaN, set CF\n\t"
9987
"AND [rsp], #0xffffff2b\n\t"
9991
__ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9992
emit_cmpfp_fixup(masm);
9994
ins_pipe( pipe_slow );
9997
instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9998
predicate(UseSSE>=1);
9999
match(Set cr (CmpF src1 src2));
10001
format %{ "UCOMISS $src1,$src2" %}
10003
__ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10005
ins_pipe( pipe_slow );
10008
// float compare and set condition codes in EFLAGS by XMM regs
10009
instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10010
predicate(UseSSE>=1);
10011
match(Set cr (CmpF src1 (LoadF src2)));
10013
format %{ "UCOMISS $src1,$src2\n\t"
10015
"PUSHF\t# saw NaN, set CF\n\t"
10016
"AND [rsp], #0xffffff2b\n\t"
10020
__ ucomiss($src1$$XMMRegister, $src2$$Address);
10021
emit_cmpfp_fixup(masm);
10023
ins_pipe( pipe_slow );
10026
instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10027
predicate(UseSSE>=1);
10028
match(Set cr (CmpF src1 (LoadF src2)));
10030
format %{ "UCOMISS $src1,$src2" %}
10032
__ ucomiss($src1$$XMMRegister, $src2$$Address);
10034
ins_pipe( pipe_slow );
10037
// Compare into -1,0,1 in XMM
10038
instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10039
predicate(UseSSE>=1);
10040
match(Set dst (CmpF3 src1 src2));
10043
format %{ "UCOMISS $src1, $src2\n\t"
10044
"MOV $dst, #-1\n\t"
10048
"MOVZB $dst, $dst\n"
10051
__ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10052
emit_cmpfp3(masm, $dst$$Register);
10054
ins_pipe( pipe_slow );
10057
// Compare into -1,0,1 in XMM and memory
10058
instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10059
predicate(UseSSE>=1);
10060
match(Set dst (CmpF3 src1 (LoadF src2)));
10063
format %{ "UCOMISS $src1, $src2\n\t"
10064
"MOV $dst, #-1\n\t"
10068
"MOVZB $dst, $dst\n"
10071
__ ucomiss($src1$$XMMRegister, $src2$$Address);
10072
emit_cmpfp3(masm, $dst$$Register);
10074
ins_pipe( pipe_slow );
10077
// Spill to obtain 24-bit precision
10078
instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10079
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10080
match(Set dst (SubF src1 src2));
10082
format %{ "FSUB $dst,$src1 - $src2" %}
10083
opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10084
ins_encode( Push_Reg_FPR(src1),
10086
Pop_Mem_FPR(dst) );
10087
ins_pipe( fpu_mem_reg_reg );
10090
// This instruction does not round to 24-bits
10091
instruct subFPR_reg(regFPR dst, regFPR src) %{
10092
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10093
match(Set dst (SubF dst src));
10095
format %{ "FSUB $dst,$src" %}
10096
opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10097
ins_encode( Push_Reg_FPR(src),
10098
OpcP, RegOpc(dst) );
10099
ins_pipe( fpu_reg_reg );
10102
// Spill to obtain 24-bit precision
10103
instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10104
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10105
match(Set dst (AddF src1 src2));
10107
format %{ "FADD $dst,$src1,$src2" %}
10108
opcode(0xD8, 0x0); /* D8 C0+i */
10109
ins_encode( Push_Reg_FPR(src2),
10111
Pop_Mem_FPR(dst) );
10112
ins_pipe( fpu_mem_reg_reg );
10115
// This instruction does not round to 24-bits
10116
instruct addFPR_reg(regFPR dst, regFPR src) %{
10117
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10118
match(Set dst (AddF dst src));
10120
format %{ "FLD $src\n\t"
10122
opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10123
ins_encode( Push_Reg_FPR(src),
10124
OpcP, RegOpc(dst) );
10125
ins_pipe( fpu_reg_reg );
10128
instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10129
predicate(UseSSE==0);
10130
match(Set dst (AbsF src));
10132
format %{ "FABS" %}
10133
opcode(0xE1, 0xD9);
10134
ins_encode( OpcS, OpcP );
10135
ins_pipe( fpu_reg_reg );
10138
instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10139
predicate(UseSSE==0);
10140
match(Set dst (NegF src));
10142
format %{ "FCHS" %}
10143
opcode(0xE0, 0xD9);
10144
ins_encode( OpcS, OpcP );
10145
ins_pipe( fpu_reg_reg );
10148
// Cisc-alternate to addFPR_reg
10149
// Spill to obtain 24-bit precision
10150
instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10151
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10152
match(Set dst (AddF src1 (LoadF src2)));
10154
format %{ "FLD $src2\n\t"
10155
"FADD ST,$src1\n\t"
10157
opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10158
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10160
Pop_Mem_FPR(dst), ClearInstMark );
10161
ins_pipe( fpu_mem_reg_mem );
10164
// Cisc-alternate to addFPR_reg
10165
// This instruction does not round to 24-bits
10166
instruct addFPR_reg_mem(regFPR dst, memory src) %{
10167
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10168
match(Set dst (AddF dst (LoadF src)));
10170
format %{ "FADD $dst,$src" %}
10171
opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
10172
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10173
OpcP, RegOpc(dst), ClearInstMark );
10174
ins_pipe( fpu_reg_mem );
10177
// // Following two instructions for _222_mpegaudio
10178
// Spill to obtain 24-bit precision
10179
instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10180
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181
match(Set dst (AddF src1 src2));
10183
format %{ "FADD $dst,$src1,$src2" %}
10184
opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10185
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10187
Pop_Mem_FPR(dst), ClearInstMark );
10188
ins_pipe( fpu_mem_reg_mem );
10191
// Cisc-spill variant
10192
// Spill to obtain 24-bit precision
10193
instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10194
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10195
match(Set dst (AddF src1 (LoadF src2)));
10197
format %{ "FADD $dst,$src1,$src2 cisc" %}
10198
opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
10199
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10200
OpcP, RMopc_Mem(secondary,src1),
10203
ins_pipe( fpu_mem_mem_mem );
10206
// Spill to obtain 24-bit precision
10207
instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10208
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10209
match(Set dst (AddF src1 src2));
10211
format %{ "FADD $dst,$src1,$src2" %}
10212
opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
10213
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10214
OpcP, RMopc_Mem(secondary,src1),
10217
ins_pipe( fpu_mem_mem_mem );
10221
// Spill to obtain 24-bit precision
10222
instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10223
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10224
match(Set dst (AddF src con));
10225
format %{ "FLD $src\n\t"
10226
"FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10229
__ fld_s($src$$reg - 1); // FLD ST(i-1)
10230
__ fadd_s($constantaddress($con));
10231
__ fstp_s(Address(rsp, $dst$$disp));
10233
ins_pipe(fpu_mem_reg_con);
10236
// This instruction does not round to 24-bits
10237
instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10238
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10239
match(Set dst (AddF src con));
10240
format %{ "FLD $src\n\t"
10241
"FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10244
__ fld_s($src$$reg - 1); // FLD ST(i-1)
10245
__ fadd_s($constantaddress($con));
10246
__ fstp_d($dst$$reg);
10248
ins_pipe(fpu_reg_reg_con);
10251
// Spill to obtain 24-bit precision
10252
instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10253
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10254
match(Set dst (MulF src1 src2));
10256
format %{ "FLD $src1\n\t"
10259
opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10260
ins_encode( Push_Reg_FPR(src1),
10262
Pop_Mem_FPR(dst) );
10263
ins_pipe( fpu_mem_reg_reg );
10266
// This instruction does not round to 24-bits
10267
instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10268
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269
match(Set dst (MulF src1 src2));
10271
format %{ "FLD $src1\n\t"
10274
opcode(0xD8, 0x1); /* D8 C8+i */
10275
ins_encode( Push_Reg_FPR(src2),
10277
Pop_Reg_FPR(dst) );
10278
ins_pipe( fpu_reg_reg_reg );
10282
// Spill to obtain 24-bit precision
10283
// Cisc-alternate to reg-reg multiply
10284
instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10285
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10286
match(Set dst (MulF src1 (LoadF src2)));
10288
format %{ "FLD_S $src2\n\t"
10291
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
10292
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10294
Pop_Mem_FPR(dst), ClearInstMark );
10295
ins_pipe( fpu_mem_reg_mem );
10298
// This instruction does not round to 24-bits
10299
// Cisc-alternate to reg-reg multiply
10300
instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10301
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10302
match(Set dst (MulF src1 (LoadF src2)));
10304
format %{ "FMUL $dst,$src1,$src2" %}
10305
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
10306
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10308
Pop_Reg_FPR(dst), ClearInstMark );
10309
ins_pipe( fpu_reg_reg_mem );
10312
// Spill to obtain 24-bit precision
10313
instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10314
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315
match(Set dst (MulF src1 src2));
10317
format %{ "FMUL $dst,$src1,$src2" %}
10318
opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
10319
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10320
OpcP, RMopc_Mem(secondary,src1),
10323
ins_pipe( fpu_mem_mem_mem );
10326
// Spill to obtain 24-bit precision
10327
instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10328
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329
match(Set dst (MulF src con));
10331
format %{ "FLD $src\n\t"
10332
"FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10335
__ fld_s($src$$reg - 1); // FLD ST(i-1)
10336
__ fmul_s($constantaddress($con));
10337
__ fstp_s(Address(rsp, $dst$$disp));
10339
ins_pipe(fpu_mem_reg_con);
10342
// This instruction does not round to 24-bits
10343
instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10344
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10345
match(Set dst (MulF src con));
10347
format %{ "FLD $src\n\t"
10348
"FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10351
__ fld_s($src$$reg - 1); // FLD ST(i-1)
10352
__ fmul_s($constantaddress($con));
10353
__ fstp_d($dst$$reg);
10355
ins_pipe(fpu_reg_reg_con);
10360
// MACRO1 -- subsume unshared load into mulFPR
10361
// This instruction does not round to 24-bits
10362
instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10363
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10364
match(Set dst (MulF (LoadF mem1) src));
10366
format %{ "FLD $mem1 ===MACRO1===\n\t"
10369
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
10370
ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10372
Pop_Reg_FPR(dst), ClearInstMark );
10373
ins_pipe( fpu_reg_reg_mem );
10376
// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10377
// This instruction does not round to 24-bits
10378
instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10379
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10380
match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10383
format %{ "FLD $mem1 ===MACRO2===\n\t"
10384
"FMUL ST,$src1 subsume mulFPR left load\n\t"
10385
"FADD ST,$src2\n\t"
10387
opcode(0xD9); /* LoadF D9 /0 */
10388
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10391
Pop_Reg_FPR(dst), ClearInstMark );
10392
ins_pipe( fpu_reg_mem_reg_reg );
10395
// MACRO3 -- addFPR a mulFPR
10396
// This instruction does not round to 24-bits. It is a '2-address'
10397
// instruction in that the result goes back to src2. This eliminates
10398
// a move from the macro; possibly the register allocator will have
10399
// to add it back (and maybe not).
10400
instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10401
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10402
match(Set src2 (AddF (MulF src0 src1) src2));
10404
format %{ "FLD $src0 ===MACRO3===\n\t"
10405
"FMUL ST,$src1\n\t"
10406
"FADDP $src2,ST" %}
10407
opcode(0xD9); /* LoadF D9 /0 */
10408
ins_encode( Push_Reg_FPR(src0),
10410
FAddP_reg_ST(src2) );
10411
ins_pipe( fpu_reg_reg_reg );
10414
// MACRO4 -- divFPR subFPR
10415
// This instruction does not round to 24-bits
10416
instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10417
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10418
match(Set dst (DivF (SubF src2 src1) src3));
10420
format %{ "FLD $src2 ===MACRO4===\n\t"
10421
"FSUB ST,$src1\n\t"
10422
"FDIV ST,$src3\n\t"
10424
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10425
ins_encode( Push_Reg_FPR(src2),
10426
subFPR_divFPR_encode(src1,src3),
10427
Pop_Reg_FPR(dst) );
10428
ins_pipe( fpu_reg_reg_reg_reg );
10431
// Spill to obtain 24-bit precision
10432
instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10433
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10434
match(Set dst (DivF src1 src2));
10436
format %{ "FDIV $dst,$src1,$src2" %}
10437
opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10438
ins_encode( Push_Reg_FPR(src1),
10440
Pop_Mem_FPR(dst) );
10441
ins_pipe( fpu_mem_reg_reg );
10444
// This instruction does not round to 24-bits
10445
instruct divFPR_reg(regFPR dst, regFPR src) %{
10446
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10447
match(Set dst (DivF dst src));
10449
format %{ "FDIV $dst,$src" %}
10450
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10451
ins_encode( Push_Reg_FPR(src),
10452
OpcP, RegOpc(dst) );
10453
ins_pipe( fpu_reg_reg );
10457
// Spill to obtain 24-bit precision
10458
instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10459
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10460
match(Set dst (ModF src1 src2));
10461
effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10463
format %{ "FMOD $dst,$src1,$src2" %}
10464
ins_encode( Push_Reg_Mod_DPR(src1, src2),
10466
Push_Result_Mod_DPR(src2),
10468
ins_pipe( pipe_slow );
10471
// This instruction does not round to 24-bits
10472
instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10473
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10474
match(Set dst (ModF dst src));
10475
effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10477
format %{ "FMOD $dst,$src" %}
10478
ins_encode(Push_Reg_Mod_DPR(dst, src),
10480
Push_Result_Mod_DPR(src),
10482
ins_pipe( pipe_slow );
10485
instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10486
predicate(UseSSE>=1);
10487
match(Set dst (ModF src0 src1));
10488
effect(KILL rax, KILL cr);
10489
format %{ "SUB ESP,4\t # FMOD\n"
10490
"\tMOVSS [ESP+0],$src1\n"
10491
"\tFLD_S [ESP+0]\n"
10492
"\tMOVSS [ESP+0],$src0\n"
10493
"\tFLD_S [ESP+0]\n"
10499
"\tFSTP_S [ESP+0]\n"
10500
"\tMOVSS $dst,[ESP+0]\n"
10502
"\tFSTP ST0\t # Restore FPU Stack"
10505
ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10506
ins_pipe( pipe_slow );
10510
//----------Arithmetic Conversion Instructions---------------------------------
10511
// The conversions operations are all Alpha sorted. Please keep it that way!
10513
instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10514
predicate(UseSSE==0);
10515
match(Set dst (RoundFloat src));
10517
format %{ "FST_S $dst,$src\t# F-round" %}
10518
ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10519
ins_pipe( fpu_mem_reg );
10522
instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10523
predicate(UseSSE<=1);
10524
match(Set dst (RoundDouble src));
10526
format %{ "FST_D $dst,$src\t# D-round" %}
10527
ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10528
ins_pipe( fpu_mem_reg );
10531
// Force rounding to 24-bit precision and 6-bit exponent
10532
instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10533
predicate(UseSSE==0);
10534
match(Set dst (ConvD2F src));
10535
format %{ "FST_S $dst,$src\t# F-round" %}
10537
roundFloat_mem_reg(dst,src);
10541
// Force rounding to 24-bit precision and 6-bit exponent
10542
instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10543
predicate(UseSSE==1);
10544
match(Set dst (ConvD2F src));
10546
format %{ "SUB ESP,4\n\t"
10547
"FST_S [ESP],$src\t# F-round\n\t"
10548
"MOVSS $dst,[ESP]\n\t"
10552
if ($src$$reg != FPR1L_enc) {
10553
__ fld_s($src$$reg-1);
10554
__ fstp_s(Address(rsp, 0));
10556
__ fst_s(Address(rsp, 0));
10558
__ movflt($dst$$XMMRegister, Address(rsp, 0));
10561
ins_pipe( pipe_slow );
10564
// Force rounding double precision to single precision
10565
instruct convD2F_reg(regF dst, regD src) %{
10566
predicate(UseSSE>=2);
10567
match(Set dst (ConvD2F src));
10568
format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10570
__ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10572
ins_pipe( pipe_slow );
10575
instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10576
predicate(UseSSE==0);
10577
match(Set dst (ConvF2D src));
10578
format %{ "FST_S $dst,$src\t# D-round" %}
10579
ins_encode( Pop_Reg_Reg_DPR(dst, src));
10580
ins_pipe( fpu_reg_reg );
10583
instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10584
predicate(UseSSE==1);
10585
match(Set dst (ConvF2D src));
10586
format %{ "FST_D $dst,$src\t# D-round" %}
10588
roundDouble_mem_reg(dst,src);
10592
instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10593
predicate(UseSSE==1);
10594
match(Set dst (ConvF2D src));
10596
format %{ "SUB ESP,4\n\t"
10597
"MOVSS [ESP] $src\n\t"
10600
"FSTP $dst\t# D-round" %}
10603
__ movflt(Address(rsp, 0), $src$$XMMRegister);
10604
__ fld_s(Address(rsp, 0));
10606
__ fstp_d($dst$$reg);
10608
ins_pipe( pipe_slow );
10611
instruct convF2D_reg(regD dst, regF src) %{
10612
predicate(UseSSE>=2);
10613
match(Set dst (ConvF2D src));
10614
format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10616
__ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10618
ins_pipe( pipe_slow );
10621
// Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10622
instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10623
predicate(UseSSE<=1);
10624
match(Set dst (ConvD2I src));
10625
effect( KILL tmp, KILL cr );
10626
format %{ "FLD $src\t# Convert double to int \n\t"
10627
"FLDCW trunc mode\n\t"
10629
"FISTp [ESP + #0]\n\t"
10630
"FLDCW std/24-bit mode\n\t"
10632
"CMP EAX,0x80000000\n\t"
10635
"CALL d2i_wrapper\n"
10637
ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10638
ins_pipe( pipe_slow );
10641
// Convert a double to an int. If the double is a NAN, stuff a zero in instead.
10642
instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10643
predicate(UseSSE>=2);
10644
match(Set dst (ConvD2I src));
10645
effect( KILL tmp, KILL cr );
10646
format %{ "CVTTSD2SI $dst, $src\n\t"
10647
"CMP $dst,0x80000000\n\t"
10650
"MOVSD [ESP], $src\n\t"
10653
"CALL d2i_wrapper\n"
10657
__ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10658
__ cmpl($dst$$Register, 0x80000000);
10659
__ jccb(Assembler::notEqual, fast);
10661
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
10662
__ fld_d(Address(rsp, 0));
10664
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10665
__ post_call_nop();
10668
ins_pipe( pipe_slow );
10671
instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10672
predicate(UseSSE<=1);
10673
match(Set dst (ConvD2L src));
10675
format %{ "FLD $src\t# Convert double to long\n\t"
10676
"FLDCW trunc mode\n\t"
10678
"FISTp [ESP + #0]\n\t"
10679
"FLDCW std/24-bit mode\n\t"
10682
"CMP EDX,0x80000000\n\t"
10687
"CALL d2l_wrapper\n"
10689
ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
10690
ins_pipe( pipe_slow );
10693
// XMM lacks a float/double->long conversion, so use the old FPU stack.
10694
instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10695
predicate (UseSSE>=2);
10696
match(Set dst (ConvD2L src));
10698
format %{ "SUB ESP,8\t# Convert double to long\n\t"
10699
"MOVSD [ESP],$src\n\t"
10701
"FLDCW trunc mode\n\t"
10702
"FISTp [ESP + #0]\n\t"
10703
"FLDCW std/24-bit mode\n\t"
10706
"CMP EDX,0x80000000\n\t"
10711
"MOVSD [ESP],$src\n\t"
10714
"CALL d2l_wrapper\n"
10719
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
10720
__ fld_d(Address(rsp, 0));
10721
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10722
__ fistp_d(Address(rsp, 0));
10723
// Restore the rounding mode, mask the exception
10724
if (Compile::current()->in_24_bit_fp_mode()) {
10725
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10727
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10729
// Load the converted long, adjust CPU stack
10732
__ cmpl(rdx, 0x80000000);
10733
__ jccb(Assembler::notEqual, fast);
10734
__ testl(rax, rax);
10735
__ jccb(Assembler::notEqual, fast);
10737
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
10738
__ fld_d(Address(rsp, 0));
10740
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10741
__ post_call_nop();
10744
ins_pipe( pipe_slow );
10747
// Convert a double to an int. Java semantics require we do complex
10748
// manglations in the corner cases. So we set the rounding mode to
10749
// 'zero', store the darned double down as an int, and reset the
10750
// rounding mode to 'nearest'. The hardware stores a flag value down
10751
// if we would overflow or converted a NAN; we check for this and
10752
// and go the slow path if needed.
10753
instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10754
predicate(UseSSE==0);
10755
match(Set dst (ConvF2I src));
10756
effect( KILL tmp, KILL cr );
10757
format %{ "FLD $src\t# Convert float to int \n\t"
10758
"FLDCW trunc mode\n\t"
10760
"FISTp [ESP + #0]\n\t"
10761
"FLDCW std/24-bit mode\n\t"
10763
"CMP EAX,0x80000000\n\t"
10766
"CALL d2i_wrapper\n"
10768
// DPR2I_encoding works for FPR2I
10769
ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10770
ins_pipe( pipe_slow );
10773
// Convert a float in xmm to an int reg.
10774
instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10775
predicate(UseSSE>=1);
10776
match(Set dst (ConvF2I src));
10777
effect( KILL tmp, KILL cr );
10778
format %{ "CVTTSS2SI $dst, $src\n\t"
10779
"CMP $dst,0x80000000\n\t"
10782
"MOVSS [ESP], $src\n\t"
10785
"CALL d2i_wrapper\n"
10789
__ cvttss2sil($dst$$Register, $src$$XMMRegister);
10790
__ cmpl($dst$$Register, 0x80000000);
10791
__ jccb(Assembler::notEqual, fast);
10793
__ movflt(Address(rsp, 0), $src$$XMMRegister);
10794
__ fld_s(Address(rsp, 0));
10796
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10797
__ post_call_nop();
10800
ins_pipe( pipe_slow );
10803
instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10804
predicate(UseSSE==0);
10805
match(Set dst (ConvF2L src));
10807
format %{ "FLD $src\t# Convert float to long\n\t"
10808
"FLDCW trunc mode\n\t"
10810
"FISTp [ESP + #0]\n\t"
10811
"FLDCW std/24-bit mode\n\t"
10814
"CMP EDX,0x80000000\n\t"
10819
"CALL d2l_wrapper\n"
10821
// DPR2L_encoding works for FPR2L
10822
ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10823
ins_pipe( pipe_slow );
10826
// XMM lacks a float/double->long conversion, so use the old FPU stack.
10827
instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10828
predicate (UseSSE>=1);
10829
match(Set dst (ConvF2L src));
10831
format %{ "SUB ESP,8\t# Convert float to long\n\t"
10832
"MOVSS [ESP],$src\n\t"
10834
"FLDCW trunc mode\n\t"
10835
"FISTp [ESP + #0]\n\t"
10836
"FLDCW std/24-bit mode\n\t"
10839
"CMP EDX,0x80000000\n\t"
10843
"SUB ESP,4\t# Convert float to long\n\t"
10844
"MOVSS [ESP],$src\n\t"
10847
"CALL d2l_wrapper\n"
10852
__ movflt(Address(rsp, 0), $src$$XMMRegister);
10853
__ fld_s(Address(rsp, 0));
10854
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10855
__ fistp_d(Address(rsp, 0));
10856
// Restore the rounding mode, mask the exception
10857
if (Compile::current()->in_24_bit_fp_mode()) {
10858
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10860
__ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10862
// Load the converted long, adjust CPU stack
10865
__ cmpl(rdx, 0x80000000);
10866
__ jccb(Assembler::notEqual, fast);
10867
__ testl(rax, rax);
10868
__ jccb(Assembler::notEqual, fast);
10870
__ movflt(Address(rsp, 0), $src$$XMMRegister);
10871
__ fld_s(Address(rsp, 0));
10873
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10874
__ post_call_nop();
10877
ins_pipe( pipe_slow );
10880
instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10881
predicate( UseSSE<=1 );
10882
match(Set dst (ConvI2D src));
10883
format %{ "FILD $src\n\t"
10885
opcode(0xDB, 0x0); /* DB /0 */
10886
ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10887
ins_pipe( fpu_reg_mem );
10890
instruct convI2D_reg(regD dst, rRegI src) %{
10891
predicate( UseSSE>=2 && !UseXmmI2D );
10892
match(Set dst (ConvI2D src));
10893
format %{ "CVTSI2SD $dst,$src" %}
10895
__ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10897
ins_pipe( pipe_slow );
10900
instruct convI2D_mem(regD dst, memory mem) %{
10901
predicate( UseSSE>=2 );
10902
match(Set dst (ConvI2D (LoadI mem)));
10903
format %{ "CVTSI2SD $dst,$mem" %}
10905
__ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10907
ins_pipe( pipe_slow );
10910
instruct convXI2D_reg(regD dst, rRegI src)
10912
predicate( UseSSE>=2 && UseXmmI2D );
10913
match(Set dst (ConvI2D src));
10915
format %{ "MOVD $dst,$src\n\t"
10916
"CVTDQ2PD $dst,$dst\t# i2d" %}
10918
__ movdl($dst$$XMMRegister, $src$$Register);
10919
__ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10921
ins_pipe(pipe_slow); // XXX
10924
instruct convI2DPR_mem(regDPR dst, memory mem) %{
10925
predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10926
match(Set dst (ConvI2D (LoadI mem)));
10927
format %{ "FILD $mem\n\t"
10929
opcode(0xDB); /* DB /0 */
10930
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10931
Pop_Reg_DPR(dst), ClearInstMark);
10932
ins_pipe( fpu_reg_mem );
10935
// Convert a byte to a float; no rounding step needed.
10936
instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10937
predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10938
match(Set dst (ConvI2F src));
10939
format %{ "FILD $src\n\t"
10942
opcode(0xDB, 0x0); /* DB /0 */
10943
ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10944
ins_pipe( fpu_reg_mem );
10947
// In 24-bit mode, force exponent rounding by storing back out
10948
instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10949
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10950
match(Set dst (ConvI2F src));
10952
format %{ "FILD $src\n\t"
10954
opcode(0xDB, 0x0); /* DB /0 */
10955
ins_encode( Push_Mem_I(src),
10957
ins_pipe( fpu_mem_mem );
10960
// In 24-bit mode, force exponent rounding by storing back out
10961
instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10962
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10963
match(Set dst (ConvI2F (LoadI mem)));
10965
format %{ "FILD $mem\n\t"
10967
opcode(0xDB); /* DB /0 */
10968
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10969
Pop_Mem_FPR(dst), ClearInstMark);
10970
ins_pipe( fpu_mem_mem );
10973
// This instruction does not round to 24-bits
10974
instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10975
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10976
match(Set dst (ConvI2F src));
10977
format %{ "FILD $src\n\t"
10979
opcode(0xDB, 0x0); /* DB /0 */
10980
ins_encode( Push_Mem_I(src),
10982
ins_pipe( fpu_reg_mem );
10985
// This instruction does not round to 24-bits
10986
instruct convI2FPR_mem(regFPR dst, memory mem) %{
10987
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10988
match(Set dst (ConvI2F (LoadI mem)));
10989
format %{ "FILD $mem\n\t"
10991
opcode(0xDB); /* DB /0 */
10992
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10993
Pop_Reg_FPR(dst), ClearInstMark);
10994
ins_pipe( fpu_reg_mem );
10997
// Convert an int to a float in xmm; no rounding step needed.
10998
instruct convI2F_reg(regF dst, rRegI src) %{
10999
predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
11000
match(Set dst (ConvI2F src));
11001
format %{ "CVTSI2SS $dst, $src" %}
11003
__ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11005
ins_pipe( pipe_slow );
11008
instruct convXI2F_reg(regF dst, rRegI src)
11010
predicate( UseSSE>=2 && UseXmmI2F );
11011
match(Set dst (ConvI2F src));
11013
format %{ "MOVD $dst,$src\n\t"
11014
"CVTDQ2PS $dst,$dst\t# i2f" %}
11016
__ movdl($dst$$XMMRegister, $src$$Register);
11017
__ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11019
ins_pipe(pipe_slow); // XXX
11022
instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11023
match(Set dst (ConvI2L src));
11026
format %{ "MOV $dst.lo,$src\n\t"
11027
"MOV $dst.hi,$src\n\t"
11028
"SAR $dst.hi,31" %}
11029
ins_encode(convert_int_long(dst,src));
11030
ins_pipe( ialu_reg_reg_long );
11033
// Zero-extend convert int to long
11034
instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11035
match(Set dst (AndL (ConvI2L src) mask) );
11036
effect( KILL flags );
11038
format %{ "MOV $dst.lo,$src\n\t"
11039
"XOR $dst.hi,$dst.hi" %}
11040
opcode(0x33); // XOR
11041
ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11042
ins_pipe( ialu_reg_reg_long );
11046
instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11047
match(Set dst (AndL src mask) );
11048
effect( KILL flags );
11050
format %{ "MOV $dst.lo,$src.lo\n\t"
11051
"XOR $dst.hi,$dst.hi\n\t" %}
11052
opcode(0x33); // XOR
11053
ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11054
ins_pipe( ialu_reg_reg_long );
11057
instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11058
predicate (UseSSE<=1);
11059
match(Set dst (ConvL2D src));
11061
format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11063
"FILD ST,[ESP + #0]\n\t"
11065
"FSTP_D $dst\t# D-round" %}
11066
opcode(0xDF, 0x5); /* DF /5 */
11067
ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11068
ins_pipe( pipe_slow );
11071
instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11072
predicate (UseSSE>=2);
11073
match(Set dst (ConvL2D src));
11075
format %{ "PUSH $src.hi\t# Convert long to double\n\t"
11079
"MOVSD $dst,[ESP]\n\t"
11081
opcode(0xDF, 0x5); /* DF /5 */
11082
ins_encode(convert_long_double2(src), Push_ResultD(dst));
11083
ins_pipe( pipe_slow );
11086
instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11087
predicate (UseSSE>=1);
11088
match(Set dst (ConvL2F src));
11090
format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11094
"MOVSS $dst,[ESP]\n\t"
11096
opcode(0xDF, 0x5); /* DF /5 */
11097
ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11098
ins_pipe( pipe_slow );
11101
instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11102
match(Set dst (ConvL2F src));
11104
format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
11106
"FILD ST,[ESP + #0]\n\t"
11108
"FSTP_S $dst\t# F-round" %}
11109
opcode(0xDF, 0x5); /* DF /5 */
11110
ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11111
ins_pipe( pipe_slow );
11114
instruct convL2I_reg( rRegI dst, eRegL src ) %{
11115
match(Set dst (ConvL2I src));
11116
effect( DEF dst, USE src );
11117
format %{ "MOV $dst,$src.lo" %}
11118
ins_encode(enc_CopyL_Lo(dst,src));
11119
ins_pipe( ialu_reg_reg );
11122
instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11123
match(Set dst (MoveF2I src));
11124
effect( DEF dst, USE src );
11126
format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
11128
__ movl($dst$$Register, Address(rsp, $src$$disp));
11130
ins_pipe( ialu_reg_mem );
11133
instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11134
predicate(UseSSE==0);
11135
match(Set dst (MoveF2I src));
11136
effect( DEF dst, USE src );
11139
format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
11140
ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11141
ins_pipe( fpu_mem_reg );
11144
instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11145
predicate(UseSSE>=1);
11146
match(Set dst (MoveF2I src));
11147
effect( DEF dst, USE src );
11150
format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
11152
__ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11154
ins_pipe( pipe_slow );
11157
instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11158
predicate(UseSSE>=2);
11159
match(Set dst (MoveF2I src));
11160
effect( DEF dst, USE src );
11162
format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
11164
__ movdl($dst$$Register, $src$$XMMRegister);
11166
ins_pipe( pipe_slow );
11169
instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11170
match(Set dst (MoveI2F src));
11171
effect( DEF dst, USE src );
11174
format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
11176
__ movl(Address(rsp, $dst$$disp), $src$$Register);
11178
ins_pipe( ialu_mem_reg );
11182
instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11183
predicate(UseSSE==0);
11184
match(Set dst (MoveI2F src));
11185
effect(DEF dst, USE src);
11188
format %{ "FLD_S $src\n\t"
11189
"FSTP $dst\t# MoveI2F_stack_reg" %}
11190
opcode(0xD9); /* D9 /0, FLD m32real */
11191
ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11192
Pop_Reg_FPR(dst), ClearInstMark );
11193
ins_pipe( fpu_reg_mem );
11196
instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11197
predicate(UseSSE>=1);
11198
match(Set dst (MoveI2F src));
11199
effect( DEF dst, USE src );
11202
format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
11204
__ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11206
ins_pipe( pipe_slow );
11209
instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11210
predicate(UseSSE>=2);
11211
match(Set dst (MoveI2F src));
11212
effect( DEF dst, USE src );
11215
format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
11217
__ movdl($dst$$XMMRegister, $src$$Register);
11219
ins_pipe( pipe_slow );
11222
instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11223
match(Set dst (MoveD2L src));
11224
effect(DEF dst, USE src);
11227
format %{ "MOV $dst.lo,$src\n\t"
11228
"MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11229
opcode(0x8B, 0x8B);
11230
ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11231
ins_pipe( ialu_mem_long_reg );
11234
instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11235
predicate(UseSSE<=1);
11236
match(Set dst (MoveD2L src));
11237
effect(DEF dst, USE src);
11240
format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
11241
ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11242
ins_pipe( fpu_mem_reg );
11245
instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11246
predicate(UseSSE>=2);
11247
match(Set dst (MoveD2L src));
11248
effect(DEF dst, USE src);
11250
format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
11252
__ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11254
ins_pipe( pipe_slow );
11257
instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11258
predicate(UseSSE>=2);
11259
match(Set dst (MoveD2L src));
11260
effect(DEF dst, USE src, TEMP tmp);
11262
format %{ "MOVD $dst.lo,$src\n\t"
11263
"PSHUFLW $tmp,$src,0x4E\n\t"
11264
"MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11266
__ movdl($dst$$Register, $src$$XMMRegister);
11267
__ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11268
__ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11270
ins_pipe( pipe_slow );
11273
instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11274
match(Set dst (MoveL2D src));
11275
effect(DEF dst, USE src);
11278
format %{ "MOV $dst,$src.lo\n\t"
11279
"MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11280
opcode(0x89, 0x89);
11281
ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11282
ins_pipe( ialu_mem_long_reg );
11286
instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11287
predicate(UseSSE<=1);
11288
match(Set dst (MoveL2D src));
11289
effect(DEF dst, USE src);
11292
format %{ "FLD_D $src\n\t"
11293
"FSTP $dst\t# MoveL2D_stack_reg" %}
11294
opcode(0xDD); /* DD /0, FLD m64real */
11295
ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11296
Pop_Reg_DPR(dst), ClearInstMark );
11297
ins_pipe( fpu_reg_mem );
11301
instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11302
predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11303
match(Set dst (MoveL2D src));
11304
effect(DEF dst, USE src);
11307
format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11309
__ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11311
ins_pipe( pipe_slow );
11314
instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11315
predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11316
match(Set dst (MoveL2D src));
11317
effect(DEF dst, USE src);
11320
format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11322
__ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11324
ins_pipe( pipe_slow );
11327
instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11328
predicate(UseSSE>=2);
11329
match(Set dst (MoveL2D src));
11330
effect(TEMP dst, USE src, TEMP tmp);
11332
format %{ "MOVD $dst,$src.lo\n\t"
11333
"MOVD $tmp,$src.hi\n\t"
11334
"PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11336
__ movdl($dst$$XMMRegister, $src$$Register);
11337
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11338
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11340
ins_pipe( pipe_slow );
11343
//----------------------------- CompressBits/ExpandBits ------------------------
11345
instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11346
predicate(n->bottom_type()->isa_long());
11347
match(Set dst (CompressBits src mask));
11348
effect(TEMP rtmp, TEMP xtmp, KILL cr);
11349
format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11351
Label exit, partail_result;
11352
// Parallely extract both upper and lower 32 bits of source into destination register pair.
11353
// Merge the results of upper and lower destination registers such that upper destination
11354
// results are contiguously laid out after the lower destination result.
11355
__ pextl($dst$$Register, $src$$Register, $mask$$Register);
11356
__ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11357
__ popcntl($rtmp$$Register, $mask$$Register);
11358
// Skip merging if bit count of lower mask register is equal to 32 (register size).
11359
__ cmpl($rtmp$$Register, 32);
11360
__ jccb(Assembler::equal, exit);
11361
// Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11362
__ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11363
// Shift left the contents of upper destination register by true bit count of lower mask register
11364
// and merge with lower destination register.
11365
__ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11366
__ orl($dst$$Register, $rtmp$$Register);
11367
__ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11368
// Zero out upper destination register if true bit count of lower 32 bit mask is zero
11369
// since contents of upper destination have already been copied to lower destination
11371
__ cmpl($rtmp$$Register, 0);
11372
__ jccb(Assembler::greater, partail_result);
11373
__ movl(HIGH_FROM_LOW($dst$$Register), 0);
11375
__ bind(partail_result);
11376
// Perform right shift over upper destination register to move out bits already copied
11377
// to lower destination register.
11378
__ subl($rtmp$$Register, 32);
11379
__ negl($rtmp$$Register);
11380
__ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11383
ins_pipe( pipe_slow );
11386
instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11387
predicate(n->bottom_type()->isa_long());
11388
match(Set dst (ExpandBits src mask));
11389
effect(TEMP rtmp, TEMP xtmp, KILL cr);
11390
format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11392
// Extraction operation sequentially reads the bits from source register starting from LSB
11393
// and lays them out into destination register at bit locations corresponding to true bits
11394
// in mask register. Thus number of source bits read are equal to combined true bit count
11395
// of mask register pair.
11396
Label exit, mask_clipping;
11397
__ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11398
__ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11399
__ popcntl($rtmp$$Register, $mask$$Register);
11400
// If true bit count of lower mask register is 32 then none of bit of lower source register
11401
// will feed to upper destination register.
11402
__ cmpl($rtmp$$Register, 32);
11403
__ jccb(Assembler::equal, exit);
11404
// Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11405
__ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11406
// Shift right the contents of lower source register to remove already consumed bits.
11407
__ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11408
// Extract the bits from lower source register starting from LSB under the influence
11409
// of upper mask register.
11410
__ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11411
__ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11412
__ subl($rtmp$$Register, 32);
11413
__ negl($rtmp$$Register);
11414
__ movdl($xtmp$$XMMRegister, $mask$$Register);
11415
__ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11416
// Clear the set bits in upper mask register which have been used to extract the contents
11417
// from lower source register.
11418
__ bind(mask_clipping);
11419
__ blsrl($mask$$Register, $mask$$Register);
11420
__ decrementl($rtmp$$Register, 1);
11421
__ jccb(Assembler::greater, mask_clipping);
11422
// Starting from LSB extract the bits from upper source register under the influence of
11423
// remaining set bits in upper mask register.
11424
__ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11425
// Merge the partial results extracted from lower and upper source register bits.
11426
__ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11427
__ movdl($mask$$Register, $xtmp$$XMMRegister);
11430
ins_pipe( pipe_slow );
11433
// =======================================================================
11434
// Fast clearing of an array
11435
// Small non-constant length ClearArray for non-AVX512 targets.
11436
instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11437
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11438
match(Set dummy (ClearArray cnt base));
11439
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11441
format %{ $$template
11442
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11443
$$emit$$"CMP InitArrayShortSize,rcx\n\t"
11444
$$emit$$"JG LARGE\n\t"
11445
$$emit$$"SHL ECX, 1\n\t"
11446
$$emit$$"DEC ECX\n\t"
11447
$$emit$$"JS DONE\t# Zero length\n\t"
11448
$$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11449
$$emit$$"DEC ECX\n\t"
11450
$$emit$$"JGE LOOP\n\t"
11451
$$emit$$"JMP DONE\n\t"
11452
$$emit$$"# LARGE:\n\t"
11453
if (UseFastStosb) {
11454
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11455
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11456
} else if (UseXMMForObjInit) {
11457
$$emit$$"MOV RDI,RAX\n\t"
11458
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11459
$$emit$$"JMPQ L_zero_64_bytes\n\t"
11460
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11461
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11462
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11463
$$emit$$"ADD 0x40,RAX\n\t"
11464
$$emit$$"# L_zero_64_bytes:\n\t"
11465
$$emit$$"SUB 0x8,RCX\n\t"
11466
$$emit$$"JGE L_loop\n\t"
11467
$$emit$$"ADD 0x4,RCX\n\t"
11468
$$emit$$"JL L_tail\n\t"
11469
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11470
$$emit$$"ADD 0x20,RAX\n\t"
11471
$$emit$$"SUB 0x4,RCX\n\t"
11472
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11473
$$emit$$"ADD 0x4,RCX\n\t"
11474
$$emit$$"JLE L_end\n\t"
11475
$$emit$$"DEC RCX\n\t"
11476
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11477
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
11478
$$emit$$"ADD 0x8,RAX\n\t"
11479
$$emit$$"DEC RCX\n\t"
11480
$$emit$$"JGE L_sloop\n\t"
11481
$$emit$$"# L_end:\n\t"
11483
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11484
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11489
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11490
$tmp$$XMMRegister, false, knoreg);
11492
ins_pipe( pipe_slow );
11495
// Small non-constant length ClearArray for AVX512 targets.
11496
instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11497
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11498
match(Set dummy (ClearArray cnt base));
11500
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11502
format %{ $$template
11503
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11504
$$emit$$"CMP InitArrayShortSize,rcx\n\t"
11505
$$emit$$"JG LARGE\n\t"
11506
$$emit$$"SHL ECX, 1\n\t"
11507
$$emit$$"DEC ECX\n\t"
11508
$$emit$$"JS DONE\t# Zero length\n\t"
11509
$$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
11510
$$emit$$"DEC ECX\n\t"
11511
$$emit$$"JGE LOOP\n\t"
11512
$$emit$$"JMP DONE\n\t"
11513
$$emit$$"# LARGE:\n\t"
11514
if (UseFastStosb) {
11515
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11516
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11517
} else if (UseXMMForObjInit) {
11518
$$emit$$"MOV RDI,RAX\n\t"
11519
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11520
$$emit$$"JMPQ L_zero_64_bytes\n\t"
11521
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11522
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11523
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11524
$$emit$$"ADD 0x40,RAX\n\t"
11525
$$emit$$"# L_zero_64_bytes:\n\t"
11526
$$emit$$"SUB 0x8,RCX\n\t"
11527
$$emit$$"JGE L_loop\n\t"
11528
$$emit$$"ADD 0x4,RCX\n\t"
11529
$$emit$$"JL L_tail\n\t"
11530
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11531
$$emit$$"ADD 0x20,RAX\n\t"
11532
$$emit$$"SUB 0x4,RCX\n\t"
11533
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11534
$$emit$$"ADD 0x4,RCX\n\t"
11535
$$emit$$"JLE L_end\n\t"
11536
$$emit$$"DEC RCX\n\t"
11537
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11538
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
11539
$$emit$$"ADD 0x8,RAX\n\t"
11540
$$emit$$"DEC RCX\n\t"
11541
$$emit$$"JGE L_sloop\n\t"
11542
$$emit$$"# L_end:\n\t"
11544
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11545
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11550
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11551
$tmp$$XMMRegister, false, $ktmp$$KRegister);
11553
ins_pipe( pipe_slow );
11556
// Large non-constant length ClearArray for non-AVX512 targets.
11557
instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11558
predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11559
match(Set dummy (ClearArray cnt base));
11560
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11561
format %{ $$template
11562
if (UseFastStosb) {
11563
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11564
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11565
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11566
} else if (UseXMMForObjInit) {
11567
$$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11568
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11569
$$emit$$"JMPQ L_zero_64_bytes\n\t"
11570
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11571
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11572
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11573
$$emit$$"ADD 0x40,RAX\n\t"
11574
$$emit$$"# L_zero_64_bytes:\n\t"
11575
$$emit$$"SUB 0x8,RCX\n\t"
11576
$$emit$$"JGE L_loop\n\t"
11577
$$emit$$"ADD 0x4,RCX\n\t"
11578
$$emit$$"JL L_tail\n\t"
11579
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11580
$$emit$$"ADD 0x20,RAX\n\t"
11581
$$emit$$"SUB 0x4,RCX\n\t"
11582
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11583
$$emit$$"ADD 0x4,RCX\n\t"
11584
$$emit$$"JLE L_end\n\t"
11585
$$emit$$"DEC RCX\n\t"
11586
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11587
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
11588
$$emit$$"ADD 0x8,RAX\n\t"
11589
$$emit$$"DEC RCX\n\t"
11590
$$emit$$"JGE L_sloop\n\t"
11591
$$emit$$"# L_end:\n\t"
11593
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11594
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11595
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11600
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11601
$tmp$$XMMRegister, true, knoreg);
11603
ins_pipe( pipe_slow );
11606
// Large non-constant length ClearArray for AVX512 targets.
11607
instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11608
predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11609
match(Set dummy (ClearArray cnt base));
11610
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11611
format %{ $$template
11612
if (UseFastStosb) {
11613
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11614
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
11615
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11616
} else if (UseXMMForObjInit) {
11617
$$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
11618
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
11619
$$emit$$"JMPQ L_zero_64_bytes\n\t"
11620
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11621
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11622
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11623
$$emit$$"ADD 0x40,RAX\n\t"
11624
$$emit$$"# L_zero_64_bytes:\n\t"
11625
$$emit$$"SUB 0x8,RCX\n\t"
11626
$$emit$$"JGE L_loop\n\t"
11627
$$emit$$"ADD 0x4,RCX\n\t"
11628
$$emit$$"JL L_tail\n\t"
11629
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11630
$$emit$$"ADD 0x20,RAX\n\t"
11631
$$emit$$"SUB 0x4,RCX\n\t"
11632
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11633
$$emit$$"ADD 0x4,RCX\n\t"
11634
$$emit$$"JLE L_end\n\t"
11635
$$emit$$"DEC RCX\n\t"
11636
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11637
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
11638
$$emit$$"ADD 0x8,RAX\n\t"
11639
$$emit$$"DEC RCX\n\t"
11640
$$emit$$"JGE L_sloop\n\t"
11641
$$emit$$"# L_end:\n\t"
11643
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
11644
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
11645
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11650
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11651
$tmp$$XMMRegister, true, $ktmp$$KRegister);
11653
ins_pipe( pipe_slow );
11656
// Small constant length ClearArray for AVX512 targets.
11657
instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11659
predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11660
match(Set dummy (ClearArray cnt base));
11662
effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11663
format %{ "clear_mem_imm $base , $cnt \n\t" %}
11665
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11667
ins_pipe(pipe_slow);
11670
instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11671
eAXRegI result, regD tmp1, eFlagsReg cr) %{
11672
predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11673
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11674
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11676
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11678
__ string_compare($str1$$Register, $str2$$Register,
11679
$cnt1$$Register, $cnt2$$Register, $result$$Register,
11680
$tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11682
ins_pipe( pipe_slow );
11685
instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11686
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11687
predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11688
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11689
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11691
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11693
__ string_compare($str1$$Register, $str2$$Register,
11694
$cnt1$$Register, $cnt2$$Register, $result$$Register,
11695
$tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11697
ins_pipe( pipe_slow );
11700
instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11701
eAXRegI result, regD tmp1, eFlagsReg cr) %{
11702
predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11703
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11704
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11706
format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11708
__ string_compare($str1$$Register, $str2$$Register,
11709
$cnt1$$Register, $cnt2$$Register, $result$$Register,
11710
$tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11712
ins_pipe( pipe_slow );
11715
instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11716
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11717
predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11718
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11719
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11721
format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11723
__ string_compare($str1$$Register, $str2$$Register,
11724
$cnt1$$Register, $cnt2$$Register, $result$$Register,
11725
$tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11727
ins_pipe( pipe_slow );
11730
instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11731
eAXRegI result, regD tmp1, eFlagsReg cr) %{
11732
predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11733
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11734
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11736
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11738
__ string_compare($str1$$Register, $str2$$Register,
11739
$cnt1$$Register, $cnt2$$Register, $result$$Register,
11740
$tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11742
ins_pipe( pipe_slow );
11745
instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11746
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11747
predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11748
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11749
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11751
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11753
__ string_compare($str1$$Register, $str2$$Register,
11754
$cnt1$$Register, $cnt2$$Register, $result$$Register,
11755
$tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11757
ins_pipe( pipe_slow );
11760
instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11761
eAXRegI result, regD tmp1, eFlagsReg cr) %{
11762
predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11763
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11764
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11766
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11768
__ string_compare($str2$$Register, $str1$$Register,
11769
$cnt2$$Register, $cnt1$$Register, $result$$Register,
11770
$tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11772
ins_pipe( pipe_slow );
11775
instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11776
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11777
predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11778
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11779
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11781
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11783
__ string_compare($str2$$Register, $str1$$Register,
11784
$cnt2$$Register, $cnt1$$Register, $result$$Register,
11785
$tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11787
ins_pipe( pipe_slow );
11790
// fast string equals
11791
instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11792
regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11793
predicate(!VM_Version::supports_avx512vlbw());
11794
match(Set result (StrEquals (Binary str1 str2) cnt));
11795
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11797
format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11799
__ arrays_equals(false, $str1$$Register, $str2$$Register,
11800
$cnt$$Register, $result$$Register, $tmp3$$Register,
11801
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11804
ins_pipe( pipe_slow );
11807
instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11808
regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11809
predicate(VM_Version::supports_avx512vlbw());
11810
match(Set result (StrEquals (Binary str1 str2) cnt));
11811
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11813
format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
11815
__ arrays_equals(false, $str1$$Register, $str2$$Register,
11816
$cnt$$Register, $result$$Register, $tmp3$$Register,
11817
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11820
ins_pipe( pipe_slow );
11824
// fast search of substring with known size.
11825
instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11826
eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11827
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11828
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11829
effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11831
format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11833
int icnt2 = (int)$int_cnt2$$constant;
11835
// IndexOf for constant substrings with size >= 16 elements
11836
// which don't need to be loaded through stack.
11837
__ string_indexofC8($str1$$Register, $str2$$Register,
11838
$cnt1$$Register, $cnt2$$Register,
11839
icnt2, $result$$Register,
11840
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11842
// Small strings are loaded through stack if they cross page boundary.
11843
__ string_indexof($str1$$Register, $str2$$Register,
11844
$cnt1$$Register, $cnt2$$Register,
11845
icnt2, $result$$Register,
11846
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11849
ins_pipe( pipe_slow );
11852
// fast search of substring with known size.
11853
instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11854
eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11855
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11856
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11857
effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11859
format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11861
int icnt2 = (int)$int_cnt2$$constant;
11863
// IndexOf for constant substrings with size >= 8 elements
11864
// which don't need to be loaded through stack.
11865
__ string_indexofC8($str1$$Register, $str2$$Register,
11866
$cnt1$$Register, $cnt2$$Register,
11867
icnt2, $result$$Register,
11868
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11870
// Small strings are loaded through stack if they cross page boundary.
11871
__ string_indexof($str1$$Register, $str2$$Register,
11872
$cnt1$$Register, $cnt2$$Register,
11873
icnt2, $result$$Register,
11874
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11877
ins_pipe( pipe_slow );
11880
// fast search of substring with known size.
11881
instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11882
eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11883
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11884
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11885
effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11887
format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11889
int icnt2 = (int)$int_cnt2$$constant;
11891
// IndexOf for constant substrings with size >= 8 elements
11892
// which don't need to be loaded through stack.
11893
__ string_indexofC8($str1$$Register, $str2$$Register,
11894
$cnt1$$Register, $cnt2$$Register,
11895
icnt2, $result$$Register,
11896
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11898
// Small strings are loaded through stack if they cross page boundary.
11899
__ string_indexof($str1$$Register, $str2$$Register,
11900
$cnt1$$Register, $cnt2$$Register,
11901
icnt2, $result$$Register,
11902
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11905
ins_pipe( pipe_slow );
11908
instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11909
eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11910
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11911
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11912
effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11914
format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11916
__ string_indexof($str1$$Register, $str2$$Register,
11917
$cnt1$$Register, $cnt2$$Register,
11918
(-1), $result$$Register,
11919
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11921
ins_pipe( pipe_slow );
11924
instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11925
eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11926
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11927
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11928
effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11930
format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11932
__ string_indexof($str1$$Register, $str2$$Register,
11933
$cnt1$$Register, $cnt2$$Register,
11934
(-1), $result$$Register,
11935
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11937
ins_pipe( pipe_slow );
11940
instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11941
eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11942
predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11943
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11944
effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11946
format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
11948
__ string_indexof($str1$$Register, $str2$$Register,
11949
$cnt1$$Register, $cnt2$$Register,
11950
(-1), $result$$Register,
11951
$vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11953
ins_pipe( pipe_slow );
11956
instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11957
eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11958
predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11959
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11960
effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11961
format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
11963
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11964
$vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11966
ins_pipe( pipe_slow );
11969
instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11970
eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11971
predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11972
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11973
effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11974
format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
11976
__ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11977
$vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11979
ins_pipe( pipe_slow );
11983
// fast array equals
11984
instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11985
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11987
predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11988
match(Set result (AryEq ary1 ary2));
11989
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11992
format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11994
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11995
$tmp3$$Register, $result$$Register, $tmp4$$Register,
11996
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11998
ins_pipe( pipe_slow );
12001
instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12002
regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12004
predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12005
match(Set result (AryEq ary1 ary2));
12006
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12009
format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12011
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12012
$tmp3$$Register, $result$$Register, $tmp4$$Register,
12013
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12015
ins_pipe( pipe_slow );
12018
instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12019
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12021
predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12022
match(Set result (AryEq ary1 ary2));
12023
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12026
format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12028
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12029
$tmp3$$Register, $result$$Register, $tmp4$$Register,
12030
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12032
ins_pipe( pipe_slow );
12035
instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12036
regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12038
predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12039
match(Set result (AryEq ary1 ary2));
12040
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12043
format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12045
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12046
$tmp3$$Register, $result$$Register, $tmp4$$Register,
12047
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12049
ins_pipe( pipe_slow );
12052
instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12053
regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12055
predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12056
match(Set result (CountPositives ary1 len));
12057
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12059
format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12061
__ count_positives($ary1$$Register, $len$$Register,
12062
$result$$Register, $tmp3$$Register,
12063
$tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12065
ins_pipe( pipe_slow );
12068
instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12069
regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12071
predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12072
match(Set result (CountPositives ary1 len));
12073
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12075
format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12077
__ count_positives($ary1$$Register, $len$$Register,
12078
$result$$Register, $tmp3$$Register,
12079
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12081
ins_pipe( pipe_slow );
12085
// fast char[] to byte[] compression
12086
instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12087
regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12088
predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12089
match(Set result (StrCompressedCopy src (Binary dst len)));
12090
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12092
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12094
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12095
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12096
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12099
ins_pipe( pipe_slow );
12102
instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12103
regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12104
predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12105
match(Set result (StrCompressedCopy src (Binary dst len)));
12106
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12108
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
12110
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12111
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12112
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12113
$ktmp1$$KRegister, $ktmp2$$KRegister);
12115
ins_pipe( pipe_slow );
12118
// fast byte[] to char[] inflation
12119
instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12120
regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12121
predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12122
match(Set dummy (StrInflatedCopy src (Binary dst len)));
12123
effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12125
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12127
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12128
$tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12130
ins_pipe( pipe_slow );
12133
instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12134
regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12135
predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12136
match(Set dummy (StrInflatedCopy src (Binary dst len)));
12137
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12139
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
12141
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12142
$tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12144
ins_pipe( pipe_slow );
12147
// encode char[] to byte[] in ISO_8859_1
12148
instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12149
regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12150
eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12151
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12152
match(Set result (EncodeISOArray src (Binary dst len)));
12153
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12155
format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12157
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12158
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12159
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12161
ins_pipe( pipe_slow );
12164
// encode char[] to byte[] in ASCII
12165
instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12166
regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12167
eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12168
predicate(((EncodeISOArrayNode*)n)->is_ascii());
12169
match(Set result (EncodeISOArray src (Binary dst len)));
12170
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12172
format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12174
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12175
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12176
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12178
ins_pipe( pipe_slow );
12181
//----------Control Flow Instructions------------------------------------------
12182
// Signed compare Instructions
12183
instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12184
match(Set cr (CmpI op1 op2));
12185
effect( DEF cr, USE op1, USE op2 );
12186
format %{ "CMP $op1,$op2" %}
12187
opcode(0x3B); /* Opcode 3B /r */
12188
ins_encode( OpcP, RegReg( op1, op2) );
12189
ins_pipe( ialu_cr_reg_reg );
12192
instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12193
match(Set cr (CmpI op1 op2));
12194
effect( DEF cr, USE op1 );
12195
format %{ "CMP $op1,$op2" %}
12196
opcode(0x81,0x07); /* Opcode 81 /7 */
12197
// ins_encode( RegImm( op1, op2) ); /* Was CmpImm */
12198
ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12199
ins_pipe( ialu_cr_reg_imm );
12202
// Cisc-spilled version of cmpI_eReg
12203
instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12204
match(Set cr (CmpI op1 (LoadI op2)));
12206
format %{ "CMP $op1,$op2" %}
12208
opcode(0x3B); /* Opcode 3B /r */
12209
ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12210
ins_pipe( ialu_cr_reg_mem );
12213
instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12214
match(Set cr (CmpI src zero));
12215
effect( DEF cr, USE src );
12217
format %{ "TEST $src,$src" %}
12219
ins_encode( OpcP, RegReg( src, src ) );
12220
ins_pipe( ialu_cr_reg_imm );
12223
instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12224
match(Set cr (CmpI (AndI src con) zero));
12226
format %{ "TEST $src,$con" %}
12228
ins_encode( OpcP, RegOpc(src), Con32(con) );
12229
ins_pipe( ialu_cr_reg_imm );
12232
instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12233
match(Set cr (CmpI (AndI src mem) zero));
12235
format %{ "TEST $src,$mem" %}
12237
ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12238
ins_pipe( ialu_cr_reg_mem );
12241
// Unsigned compare Instructions; really, same as signed except they
12242
// produce an eFlagsRegU instead of eFlagsReg.
12243
instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12244
match(Set cr (CmpU op1 op2));
12246
format %{ "CMPu $op1,$op2" %}
12247
opcode(0x3B); /* Opcode 3B /r */
12248
ins_encode( OpcP, RegReg( op1, op2) );
12249
ins_pipe( ialu_cr_reg_reg );
12252
instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12253
match(Set cr (CmpU op1 op2));
12255
format %{ "CMPu $op1,$op2" %}
12256
opcode(0x81,0x07); /* Opcode 81 /7 */
12257
ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12258
ins_pipe( ialu_cr_reg_imm );
12261
// // Cisc-spilled version of cmpU_eReg
12262
instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12263
match(Set cr (CmpU op1 (LoadI op2)));
12265
format %{ "CMPu $op1,$op2" %}
12267
opcode(0x3B); /* Opcode 3B /r */
12268
ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12269
ins_pipe( ialu_cr_reg_mem );
12272
// // Cisc-spilled version of cmpU_eReg
12273
//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12274
// match(Set cr (CmpU (LoadI op1) op2));
12276
// format %{ "CMPu $op1,$op2" %}
12278
// opcode(0x39); /* Opcode 39 /r */
12279
// ins_encode( OpcP, RegMem( op1, op2) );
12282
instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12283
match(Set cr (CmpU src zero));
12285
format %{ "TESTu $src,$src" %}
12287
ins_encode( OpcP, RegReg( src, src ) );
12288
ins_pipe( ialu_cr_reg_imm );
12291
// Unsigned pointer compare Instructions
12292
instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12293
match(Set cr (CmpP op1 op2));
12295
format %{ "CMPu $op1,$op2" %}
12296
opcode(0x3B); /* Opcode 3B /r */
12297
ins_encode( OpcP, RegReg( op1, op2) );
12298
ins_pipe( ialu_cr_reg_reg );
12301
instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12302
match(Set cr (CmpP op1 op2));
12304
format %{ "CMPu $op1,$op2" %}
12305
opcode(0x81,0x07); /* Opcode 81 /7 */
12306
ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12307
ins_pipe( ialu_cr_reg_imm );
12310
// // Cisc-spilled version of cmpP_eReg
12311
instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12312
match(Set cr (CmpP op1 (LoadP op2)));
12314
format %{ "CMPu $op1,$op2" %}
12316
opcode(0x3B); /* Opcode 3B /r */
12317
ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12318
ins_pipe( ialu_cr_reg_mem );
12321
// // Cisc-spilled version of cmpP_eReg
12322
//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12323
// match(Set cr (CmpP (LoadP op1) op2));
12325
// format %{ "CMPu $op1,$op2" %}
12327
// opcode(0x39); /* Opcode 39 /r */
12328
// ins_encode( OpcP, RegMem( op1, op2) );
12331
// Compare raw pointer (used in out-of-heap check).
12332
// Only works because non-oop pointers must be raw pointers
12333
// and raw pointers have no anti-dependencies.
12334
instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12335
predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12336
match(Set cr (CmpP op1 (LoadP op2)));
12338
format %{ "CMPu $op1,$op2" %}
12339
opcode(0x3B); /* Opcode 3B /r */
12340
ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12341
ins_pipe( ialu_cr_reg_mem );
12345
// This will generate a signed flags result. This should be ok
12346
// since any compare to a zero should be eq/neq.
12347
instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12348
match(Set cr (CmpP src zero));
12350
format %{ "TEST $src,$src" %}
12352
ins_encode( OpcP, RegReg( src, src ) );
12353
ins_pipe( ialu_cr_reg_imm );
12356
// Cisc-spilled version of testP_reg
12357
// This will generate a signed flags result. This should be ok
12358
// since any compare to a zero should be eq/neq.
12359
instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12360
match(Set cr (CmpP (LoadP op) zero));
12362
format %{ "TEST $op,0xFFFFFFFF" %}
12364
opcode(0xF7); /* Opcode F7 /0 */
12365
ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12366
ins_pipe( ialu_cr_reg_imm );
12369
// Yanked all unsigned pointer compare operations.
12370
// Pointer compares are done with CmpP which is already unsigned.
12372
//----------Max and Min--------------------------------------------------------
12375
// *** Min and Max using the conditional move are slower than the
12376
// *** branch version on a Pentium III.
12377
// // Conditional move for min
12378
//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12379
// effect( USE_DEF op2, USE op1, USE cr );
12380
// format %{ "CMOVlt $op2,$op1\t! min" %}
12381
// opcode(0x4C,0x0F);
12382
// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12383
// ins_pipe( pipe_cmov_reg );
12386
//// Min Register with Register (P6 version)
12387
//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12388
// predicate(VM_Version::supports_cmov() );
12389
// match(Set op2 (MinI op1 op2));
12393
// compI_eReg(cr,op1,op2);
12394
// cmovI_reg_lt(op2,op1,cr);
12398
// Min Register with Register (generic version)
12399
instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12400
match(Set dst (MinI dst src));
12401
effect(KILL flags);
12404
format %{ "MIN $dst,$src" %}
12406
ins_encode( min_enc(dst,src) );
12407
ins_pipe( pipe_slow );
12410
// Max Register with Register
12411
// *** Min and Max using the conditional move are slower than the
12412
// *** branch version on a Pentium III.
12413
// // Conditional move for max
12414
//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12415
// effect( USE_DEF op2, USE op1, USE cr );
12416
// format %{ "CMOVgt $op2,$op1\t! max" %}
12417
// opcode(0x4F,0x0F);
12418
// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12419
// ins_pipe( pipe_cmov_reg );
12422
// // Max Register with Register (P6 version)
12423
//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12424
// predicate(VM_Version::supports_cmov() );
12425
// match(Set op2 (MaxI op1 op2));
12429
// compI_eReg(cr,op1,op2);
12430
// cmovI_reg_gt(op2,op1,cr);
12434
// Max Register with Register (generic version)
12435
instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12436
match(Set dst (MaxI dst src));
12437
effect(KILL flags);
12440
format %{ "MAX $dst,$src" %}
12442
ins_encode( max_enc(dst,src) );
12443
ins_pipe( pipe_slow );
12446
// ============================================================================
12447
// Counted Loop limit node which represents exact final iterator value.
12448
// Note: the resulting value should fit into integer range since
12449
// counted loops have limit check on overflow.
12450
instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12451
match(Set limit (LoopLimit (Binary init limit) stride));
12452
effect(TEMP limit_hi, TEMP tmp, KILL flags);
12455
format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12457
int strd = (int)$stride$$constant;
12458
assert(strd != 1 && strd != -1, "sanity");
12459
int m1 = (strd > 0) ? 1 : -1;
12460
// Convert limit to long (EAX:EDX)
12462
// Convert init to long (init:tmp)
12463
__ movl($tmp$$Register, $init$$Register);
12464
__ sarl($tmp$$Register, 31);
12466
__ subl($limit$$Register, $init$$Register);
12467
__ sbbl($limit_hi$$Register, $tmp$$Register);
12470
__ addl($limit$$Register, (strd - 1));
12471
__ adcl($limit_hi$$Register, 0);
12472
__ movl($tmp$$Register, strd);
12474
__ addl($limit$$Register, (strd + 1));
12475
__ adcl($limit_hi$$Register, -1);
12476
__ lneg($limit_hi$$Register, $limit$$Register);
12477
__ movl($tmp$$Register, -strd);
12479
// signed division: (EAX:EDX) / pos_stride
12480
__ idivl($tmp$$Register);
12483
__ negl($tmp$$Register);
12486
__ mull($tmp$$Register);
12487
// + init (ignore upper bits)
12488
__ addl($limit$$Register, $init$$Register);
12490
ins_pipe( pipe_slow );
12493
// ============================================================================
12494
// Branch Instructions
12496
instruct jumpXtnd(rRegI switch_val) %{
12497
match(Jump switch_val);
12499
format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %}
12501
// Jump to Address(table_base + switch_reg)
12502
Address index(noreg, $switch_val$$Register, Address::times_1);
12503
__ jump(ArrayAddress($constantaddress, index), noreg);
12505
ins_pipe(pipe_jmp);
12508
// Jump Direct - Label defines a relative address from JMP+1
12509
instruct jmpDir(label labl) %{
12514
format %{ "JMP $labl" %}
12517
Label* L = $labl$$label;
12518
__ jmp(*L, false); // Always long jump
12520
ins_pipe( pipe_jmp );
12523
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12524
instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12529
format %{ "J$cop $labl" %}
12532
Label* L = $labl$$label;
12533
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12535
ins_pipe( pipe_jcc );
12538
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12539
instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12540
match(CountedLoopEnd cop cr);
12544
format %{ "J$cop $labl\t# Loop end" %}
12547
Label* L = $labl$$label;
12548
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12550
ins_pipe( pipe_jcc );
12553
// Jump Direct Conditional - using unsigned comparison
12554
instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12559
format %{ "J$cop,u $labl" %}
12562
Label* L = $labl$$label;
12563
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12565
ins_pipe(pipe_jcc);
12568
instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12573
format %{ "J$cop,u $labl" %}
12576
Label* L = $labl$$label;
12577
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12579
ins_pipe(pipe_jcc);
12582
instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12587
format %{ $$template
12588
if ($cop$$cmpcode == Assembler::notEqual) {
12589
$$emit$$"JP,u $labl\n\t"
12590
$$emit$$"J$cop,u $labl"
12592
$$emit$$"JP,u done\n\t"
12593
$$emit$$"J$cop,u $labl\n\t"
12598
Label* l = $labl$$label;
12599
if ($cop$$cmpcode == Assembler::notEqual) {
12600
__ jcc(Assembler::parity, *l, false);
12601
__ jcc(Assembler::notEqual, *l, false);
12602
} else if ($cop$$cmpcode == Assembler::equal) {
12604
__ jccb(Assembler::parity, done);
12605
__ jcc(Assembler::equal, *l, false);
12608
ShouldNotReachHere();
12611
ins_pipe(pipe_jcc);
12614
// ============================================================================
12615
// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
12616
// array for an instance of the superklass. Set a hidden internal cache on a
12617
// hit (cache is checked with exposed code in gen_subtype_check()). Return
12618
// NZ for a miss or zero for a hit. The encoding ALSO sets flags.
12619
instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12620
match(Set result (PartialSubtypeCheck sub super));
12621
effect( KILL rcx, KILL cr );
12623
ins_cost(1100); // slightly larger than the next version
12624
format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12625
"MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12626
"ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12627
"REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12628
"JNE,s miss\t\t# Missed: EDI not-zero\n\t"
12629
"MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12630
"XOR $result,$result\t\t Hit: EDI zero\n\t"
12633
opcode(0x1); // Force a XOR of EDI
12634
ins_encode( enc_PartialSubtypeCheck() );
12635
ins_pipe( pipe_slow );
12638
instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12639
match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12640
effect( KILL rcx, KILL result );
12643
format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
12644
"MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12645
"ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12646
"REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12647
"JNE,s miss\t\t# Missed: flags NZ\n\t"
12648
"MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12651
opcode(0x0); // No need to XOR EDI
12652
ins_encode( enc_PartialSubtypeCheck() );
12653
ins_pipe( pipe_slow );
12656
// ============================================================================
12657
// Branch Instructions -- short offset versions
12659
// These instructions are used to replace jumps of a long offset (the default
12660
// match) with jumps of a shorter offset. These instructions are all tagged
12661
// with the ins_short_branch attribute, which causes the ADLC to suppress the
12662
// match rules in general matching. Instead, the ADLC generates a conversion
12663
// method in the MachNode which can be used to do in-place replacement of the
12664
// long variant with the shorter variant. The compiler will determine if a
12665
// branch can be taken by the is_short_branch_offset() predicate in the machine
12666
// specific code section of the file.
12668
// Jump Direct - Label defines a relative address from JMP+1
12669
instruct jmpDir_short(label labl) %{
12674
format %{ "JMP,s $labl" %}
12677
Label* L = $labl$$label;
12680
ins_pipe( pipe_jmp );
12681
ins_short_branch(1);
12684
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12685
instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12690
format %{ "J$cop,s $labl" %}
12693
Label* L = $labl$$label;
12694
__ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12696
ins_pipe( pipe_jcc );
12697
ins_short_branch(1);
12700
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12701
instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12702
match(CountedLoopEnd cop cr);
12706
format %{ "J$cop,s $labl\t# Loop end" %}
12709
Label* L = $labl$$label;
12710
__ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12712
ins_pipe( pipe_jcc );
12713
ins_short_branch(1);
12716
// Jump Direct Conditional - using unsigned comparison
12717
instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12722
format %{ "J$cop,us $labl" %}
12725
Label* L = $labl$$label;
12726
__ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12728
ins_pipe( pipe_jcc );
12729
ins_short_branch(1);
12732
instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12737
format %{ "J$cop,us $labl" %}
12740
Label* L = $labl$$label;
12741
__ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12743
ins_pipe( pipe_jcc );
12744
ins_short_branch(1);
12747
instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12752
format %{ $$template
12753
if ($cop$$cmpcode == Assembler::notEqual) {
12754
$$emit$$"JP,u,s $labl\n\t"
12755
$$emit$$"J$cop,u,s $labl"
12757
$$emit$$"JP,u,s done\n\t"
12758
$$emit$$"J$cop,u,s $labl\n\t"
12764
Label* l = $labl$$label;
12765
if ($cop$$cmpcode == Assembler::notEqual) {
12766
__ jccb(Assembler::parity, *l);
12767
__ jccb(Assembler::notEqual, *l);
12768
} else if ($cop$$cmpcode == Assembler::equal) {
12770
__ jccb(Assembler::parity, done);
12771
__ jccb(Assembler::equal, *l);
12774
ShouldNotReachHere();
12777
ins_pipe(pipe_jcc);
12778
ins_short_branch(1);
12781
// ============================================================================
12784
// Currently we hold longs in 2 registers. Comparing such values efficiently
12785
// is tricky. The flavor of compare used depends on whether we are testing
12786
// for LT, LE, or EQ. For a simple LT test we can check just the sign bit.
12787
// The GE test is the negated LT test. The LE test can be had by commuting
12788
// the operands (yielding a GE test) and then negating; negate again for the
12789
// GT test. The EQ test is done by ORcc'ing the high and low halves, and the
12790
// NE test is negated from that.
12792
// Due to a shortcoming in the ADLC, it mixes up expressions like:
12793
// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
12794
// difference between 'Y' and '0L'. The tree-matches for the CmpI sections
12795
// are collapsed internally in the ADLC's dfa-gen code. The match for
12796
// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12797
// foo match ends up with the wrong leaf. One fix is to not match both
12798
// reg-reg and reg-zero forms of long-compare. This is unfortunate because
12799
// both forms beat the trinary form of long-compare and both are very useful
12800
// on Intel which has so few registers.
12802
// Manifest a CmpL result in an integer register. Very painful.
12803
// This is the test to avoid.
12804
instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12805
match(Set dst (CmpL3 src1 src2));
12806
effect( KILL flags );
12808
format %{ "XOR $dst,$dst\n\t"
12809
"CMP $src1.hi,$src2.hi\n\t"
12812
"CMP $src1.lo,$src2.lo\n\t"
12815
"p_one:\tINC $dst\n\t"
12817
"m_one:\tDEC $dst\n"
12820
Label p_one, m_one, done;
12821
__ xorptr($dst$$Register, $dst$$Register);
12822
__ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12823
__ jccb(Assembler::less, m_one);
12824
__ jccb(Assembler::greater, p_one);
12825
__ cmpl($src1$$Register, $src2$$Register);
12826
__ jccb(Assembler::below, m_one);
12827
__ jccb(Assembler::equal, done);
12829
__ incrementl($dst$$Register);
12832
__ decrementl($dst$$Register);
12835
ins_pipe( pipe_slow );
12839
// Manifest a CmpL result in the normal flags. Only good for LT or GE
12840
// compares. Can be used for LE or GT compares by reversing arguments.
12841
// NOT GOOD FOR EQ/NE tests.
12842
instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12843
match( Set flags (CmpL src zero ));
12845
format %{ "TEST $src.hi,$src.hi" %}
12847
ins_encode( OpcP, RegReg_Hi2( src, src ) );
12848
ins_pipe( ialu_cr_reg_reg );
12851
// Manifest a CmpL result in the normal flags. Only good for LT or GE
12852
// compares. Can be used for LE or GT compares by reversing arguments.
12853
// NOT GOOD FOR EQ/NE tests.
12854
instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12855
match( Set flags (CmpL src1 src2 ));
12856
effect( TEMP tmp );
12858
format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12859
"MOV $tmp,$src1.hi\n\t"
12860
"SBB $tmp,$src2.hi\t! Compute flags for long compare" %}
12861
ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12862
ins_pipe( ialu_cr_reg_reg );
12865
// Long compares reg < zero/req OR reg >= zero/req.
12866
// Just a wrapper for a normal branch, plus the predicate test.
12867
instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12868
match(If cmp flags);
12870
predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12872
jmpCon(cmp,flags,labl); // JLT or JGE...
12877
// Manifest a CmpUL result in the normal flags. Only good for LT or GE
12878
// compares. Can be used for LE or GT compares by reversing arguments.
12879
// NOT GOOD FOR EQ/NE tests.
12880
instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12881
match(Set flags (CmpUL src zero));
12883
format %{ "TEST $src.hi,$src.hi" %}
12885
ins_encode(OpcP, RegReg_Hi2(src, src));
12886
ins_pipe(ialu_cr_reg_reg);
12889
// Manifest a CmpUL result in the normal flags. Only good for LT or GE
12890
// compares. Can be used for LE or GT compares by reversing arguments.
12891
// NOT GOOD FOR EQ/NE tests.
12892
instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12893
match(Set flags (CmpUL src1 src2));
12896
format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12897
"MOV $tmp,$src1.hi\n\t"
12898
"SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12899
ins_encode(long_cmp_flags2(src1, src2, tmp));
12900
ins_pipe(ialu_cr_reg_reg);
12903
// Unsigned long compares reg < zero/req OR reg >= zero/req.
12904
// Just a wrapper for a normal branch, plus the predicate test.
12905
instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12906
match(If cmp flags);
12908
predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12910
jmpCon(cmp, flags, labl); // JLT or JGE...
12914
// Compare 2 longs and CMOVE longs.
12915
instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12916
match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12917
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12919
format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12920
"CMOV$cmp $dst.hi,$src.hi" %}
12922
ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12923
ins_pipe( pipe_cmov_reg_long );
12926
instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12927
match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12928
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12930
format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12931
"CMOV$cmp $dst.hi,$src.hi" %}
12933
ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12934
ins_pipe( pipe_cmov_reg_long );
12937
instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12938
match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12939
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12942
cmovLL_reg_LTGE(cmp, flags, dst, src);
12946
instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12947
match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12948
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12951
cmovLL_mem_LTGE(cmp, flags, dst, src);
12955
// Compare 2 longs and CMOVE ints.
12956
instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12957
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12958
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12960
format %{ "CMOV$cmp $dst,$src" %}
12962
ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12963
ins_pipe( pipe_cmov_reg );
12966
instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12967
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12968
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12970
format %{ "CMOV$cmp $dst,$src" %}
12972
ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12973
ins_pipe( pipe_cmov_mem );
12976
instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12977
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12978
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12981
cmovII_reg_LTGE(cmp, flags, dst, src);
12985
instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12986
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12987
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12990
cmovII_mem_LTGE(cmp, flags, dst, src);
12994
// Compare 2 longs and CMOVE ptrs.
12995
instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12996
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12997
match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12999
format %{ "CMOV$cmp $dst,$src" %}
13001
ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13002
ins_pipe( pipe_cmov_reg );
13005
// Compare 2 unsigned longs and CMOVE ptrs.
13006
instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13007
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13008
match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13011
cmovPP_reg_LTGE(cmp,flags,dst,src);
13015
// Compare 2 longs and CMOVE doubles
13016
instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13017
predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13018
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13021
fcmovDPR_regS(cmp,flags,dst,src);
13025
// Compare 2 longs and CMOVE doubles
13026
instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13027
predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13028
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13031
fcmovD_regS(cmp,flags,dst,src);
13035
instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13036
predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13037
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13040
fcmovFPR_regS(cmp,flags,dst,src);
13044
instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13045
predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13046
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13049
fcmovF_regS(cmp,flags,dst,src);
13054
// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13055
instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13056
match( Set flags (CmpL src zero ));
13059
format %{ "MOV $tmp,$src.lo\n\t"
13060
"OR $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13061
ins_encode( long_cmp_flags0( src, tmp ) );
13062
ins_pipe( ialu_reg_reg_long );
13065
// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13066
instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13067
match( Set flags (CmpL src1 src2 ));
13069
format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13071
"CMP $src1.hi,$src2.hi\n\t"
13073
ins_encode( long_cmp_flags1( src1, src2 ) );
13074
ins_pipe( ialu_cr_reg_reg );
13077
// Long compare reg == zero/reg OR reg != zero/reg
13078
// Just a wrapper for a normal branch, plus the predicate test.
13079
instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13080
match(If cmp flags);
13082
predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13084
jmpCon(cmp,flags,labl); // JEQ or JNE...
13089
// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13090
instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13091
match(Set flags (CmpUL src zero));
13094
format %{ "MOV $tmp,$src.lo\n\t"
13095
"OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13096
ins_encode(long_cmp_flags0(src, tmp));
13097
ins_pipe(ialu_reg_reg_long);
13100
// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares.
13101
instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13102
match(Set flags (CmpUL src1 src2));
13104
format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13106
"CMP $src1.hi,$src2.hi\n\t"
13108
ins_encode(long_cmp_flags1(src1, src2));
13109
ins_pipe(ialu_cr_reg_reg);
13112
// Unsigned long compare reg == zero/reg OR reg != zero/reg
13113
// Just a wrapper for a normal branch, plus the predicate test.
13114
instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13115
match(If cmp flags);
13117
predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13119
jmpCon(cmp, flags, labl); // JEQ or JNE...
13123
// Compare 2 longs and CMOVE longs.
13124
instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13125
match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13126
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13128
format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13129
"CMOV$cmp $dst.hi,$src.hi" %}
13131
ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13132
ins_pipe( pipe_cmov_reg_long );
13135
instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13136
match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13137
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13139
format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13140
"CMOV$cmp $dst.hi,$src.hi" %}
13142
ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13143
ins_pipe( pipe_cmov_reg_long );
13146
// Compare 2 longs and CMOVE ints.
13147
instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13148
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13149
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13151
format %{ "CMOV$cmp $dst,$src" %}
13153
ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13154
ins_pipe( pipe_cmov_reg );
13157
instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13158
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13159
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13161
format %{ "CMOV$cmp $dst,$src" %}
13163
ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13164
ins_pipe( pipe_cmov_mem );
13167
instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13168
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13169
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13172
cmovII_reg_EQNE(cmp, flags, dst, src);
13176
instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13177
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13178
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13181
cmovII_mem_EQNE(cmp, flags, dst, src);
13185
// Compare 2 longs and CMOVE ptrs.
13186
instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13187
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13188
match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13190
format %{ "CMOV$cmp $dst,$src" %}
13192
ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13193
ins_pipe( pipe_cmov_reg );
13196
// Compare 2 unsigned longs and CMOVE ptrs.
13197
instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13198
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13199
match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13202
cmovPP_reg_EQNE(cmp,flags,dst,src);
13206
// Compare 2 longs and CMOVE doubles
13207
instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13208
predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13209
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13212
fcmovDPR_regS(cmp,flags,dst,src);
13216
// Compare 2 longs and CMOVE doubles
13217
instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13218
predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13219
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13222
fcmovD_regS(cmp,flags,dst,src);
13226
instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13227
predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13228
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13231
fcmovFPR_regS(cmp,flags,dst,src);
13235
instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13236
predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13237
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13240
fcmovF_regS(cmp,flags,dst,src);
13245
// Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13246
// Same as cmpL_reg_flags_LEGT except must negate src
13247
instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13248
match( Set flags (CmpL src zero ));
13249
effect( TEMP tmp );
13251
format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13252
"CMP $tmp,$src.lo\n\t"
13253
"SBB $tmp,$src.hi\n\t" %}
13254
ins_encode( long_cmp_flags3(src, tmp) );
13255
ins_pipe( ialu_reg_reg_long );
13258
// Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13259
// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands
13260
// requires a commuted test to get the same result.
13261
instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13262
match( Set flags (CmpL src1 src2 ));
13263
effect( TEMP tmp );
13265
format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13266
"MOV $tmp,$src2.hi\n\t"
13267
"SBB $tmp,$src1.hi\t! Compute flags for long compare" %}
13268
ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13269
ins_pipe( ialu_cr_reg_reg );
13272
// Long compares reg < zero/req OR reg >= zero/req.
13273
// Just a wrapper for a normal branch, plus the predicate test
13274
instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13275
match(If cmp flags);
13277
predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13280
jmpCon(cmp,flags,labl); // JGT or JLE...
13285
// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13286
// Same as cmpUL_reg_flags_LEGT except must negate src
13287
instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13288
match(Set flags (CmpUL src zero));
13291
format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13292
"CMP $tmp,$src.lo\n\t"
13293
"SBB $tmp,$src.hi\n\t" %}
13294
ins_encode(long_cmp_flags3(src, tmp));
13295
ins_pipe(ialu_reg_reg_long);
13298
// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares.
13299
// Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands
13300
// requires a commuted test to get the same result.
13301
instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13302
match(Set flags (CmpUL src1 src2));
13305
format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13306
"MOV $tmp,$src2.hi\n\t"
13307
"SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13308
ins_encode(long_cmp_flags2( src2, src1, tmp));
13309
ins_pipe(ialu_cr_reg_reg);
13312
// Unsigned long compares reg < zero/req OR reg >= zero/req.
13313
// Just a wrapper for a normal branch, plus the predicate test
13314
instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13315
match(If cmp flags);
13317
predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13320
jmpCon(cmp, flags, labl); // JGT or JLE...
13324
// Compare 2 longs and CMOVE longs.
13325
instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13326
match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13327
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13329
format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13330
"CMOV$cmp $dst.hi,$src.hi" %}
13332
ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13333
ins_pipe( pipe_cmov_reg_long );
13336
instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13337
match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13338
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13340
format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13341
"CMOV$cmp $dst.hi,$src.hi+4" %}
13343
ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13344
ins_pipe( pipe_cmov_reg_long );
13347
instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13348
match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13349
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13352
cmovLL_reg_LEGT(cmp, flags, dst, src);
13356
instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13357
match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13358
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13361
cmovLL_mem_LEGT(cmp, flags, dst, src);
13365
// Compare 2 longs and CMOVE ints.
13366
instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13367
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13368
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13370
format %{ "CMOV$cmp $dst,$src" %}
13372
ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13373
ins_pipe( pipe_cmov_reg );
13376
instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13377
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13378
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13380
format %{ "CMOV$cmp $dst,$src" %}
13382
ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13383
ins_pipe( pipe_cmov_mem );
13386
instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13387
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13388
match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13391
cmovII_reg_LEGT(cmp, flags, dst, src);
13395
instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13396
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13397
match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13400
cmovII_mem_LEGT(cmp, flags, dst, src);
13404
// Compare 2 longs and CMOVE ptrs.
13405
instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13406
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13407
match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13409
format %{ "CMOV$cmp $dst,$src" %}
13411
ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13412
ins_pipe( pipe_cmov_reg );
13415
// Compare 2 unsigned longs and CMOVE ptrs.
13416
instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13417
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13418
match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13421
cmovPP_reg_LEGT(cmp,flags,dst,src);
13425
// Compare 2 longs and CMOVE doubles
13426
instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13427
predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13428
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13431
fcmovDPR_regS(cmp,flags,dst,src);
13435
// Compare 2 longs and CMOVE doubles
13436
instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13437
predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13438
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13441
fcmovD_regS(cmp,flags,dst,src);
13445
instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13446
predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13447
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13450
fcmovFPR_regS(cmp,flags,dst,src);
13455
instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13456
predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13457
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13460
fcmovF_regS(cmp,flags,dst,src);
13465
// ============================================================================
13466
// Procedure Call/Return Instructions
13467
// Call Java Static Instruction
13468
// Note: If this code changes, the corresponding ret_addr_offset() and
13469
// compute_padding() functions will have to be adjusted.
13470
instruct CallStaticJavaDirect(method meth) %{
13471
match(CallStaticJava);
13475
format %{ "CALL,static " %}
13476
opcode(0xE8); /* E8 cd */
13477
ins_encode( pre_call_resets,
13478
Java_Static_Call( meth ),
13481
ins_pipe( pipe_slow );
13485
// Call Java Dynamic Instruction
13486
// Note: If this code changes, the corresponding ret_addr_offset() and
13487
// compute_padding() functions will have to be adjusted.
13488
instruct CallDynamicJavaDirect(method meth) %{
13489
match(CallDynamicJava);
13493
format %{ "MOV EAX,(oop)-1\n\t"
13495
opcode(0xE8); /* E8 cd */
13496
ins_encode( pre_call_resets,
13497
Java_Dynamic_Call( meth ),
13500
ins_pipe( pipe_slow );
13504
// Call Runtime Instruction
13505
instruct CallRuntimeDirect(method meth) %{
13506
match(CallRuntime );
13510
format %{ "CALL,runtime " %}
13511
opcode(0xE8); /* E8 cd */
13512
// Use FFREEs to clear entries in float stack
13513
ins_encode( pre_call_resets,
13514
FFree_Float_Stack_All,
13515
Java_To_Runtime( meth ),
13517
ins_pipe( pipe_slow );
13520
// Call runtime without safepoint
13521
instruct CallLeafDirect(method meth) %{
13526
format %{ "CALL_LEAF,runtime " %}
13527
opcode(0xE8); /* E8 cd */
13528
ins_encode( pre_call_resets,
13529
FFree_Float_Stack_All,
13530
Java_To_Runtime( meth ),
13531
Verify_FPU_For_Leaf, post_call_FPU );
13532
ins_pipe( pipe_slow );
13535
instruct CallLeafNoFPDirect(method meth) %{
13536
match(CallLeafNoFP);
13540
format %{ "CALL_LEAF_NOFP,runtime " %}
13541
opcode(0xE8); /* E8 cd */
13542
ins_encode(pre_call_resets, Java_To_Runtime(meth));
13543
ins_pipe( pipe_slow );
13547
// Return Instruction
13548
// Remove the return address & jump to it.
13554
ins_pipe( pipe_jmp );
13557
// Tail Call; Jump from runtime stub to Java code.
13558
// Also known as an 'interprocedural jump'.
13559
// Target of jump will eventually return to caller.
13560
// TailJump below removes the return address.
13561
// Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13562
// emitted just above the TailCall which has reset ebp to the caller state.
13563
instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13564
match(TailCall jump_target method_ptr);
13566
format %{ "JMP $jump_target \t# EBX holds method" %}
13567
opcode(0xFF, 0x4); /* Opcode FF /4 */
13568
ins_encode( OpcP, RegOpc(jump_target) );
13569
ins_pipe( pipe_jmp );
13573
// Tail Jump; remove the return address; jump to target.
13574
// TailCall above leaves the return address around.
13575
instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13576
match( TailJump jump_target ex_oop );
13578
format %{ "POP EDX\t# pop return address into dummy\n\t"
13579
"JMP $jump_target " %}
13580
opcode(0xFF, 0x4); /* Opcode FF /4 */
13581
ins_encode( enc_pop_rdx,
13582
OpcP, RegOpc(jump_target) );
13583
ins_pipe( pipe_jmp );
13586
// Create exception oop: created by stack-crawling runtime code.
13587
// Created exception is now available to this handler, and is setup
13588
// just prior to jumping to this handler. No code emitted.
13589
instruct CreateException( eAXRegP ex_oop )
13591
match(Set ex_oop (CreateEx));
13594
// use the following format syntax
13595
format %{ "# exception oop is in EAX; no code emitted" %}
13601
// Rethrow exception:
13602
// The exception oop will come in the first argument position.
13603
// Then JUMP (not call) to the rethrow stub code.
13604
instruct RethrowException()
13608
// use the following format syntax
13609
format %{ "JMP rethrow_stub" %}
13610
ins_encode(enc_rethrow);
13611
ins_pipe( pipe_jmp );
13614
// inlined locking and unlocking
13616
instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13617
predicate(LockingMode != LM_LIGHTWEIGHT);
13618
match(Set cr (FastLock object box));
13619
effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13621
format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13623
__ get_thread($thread$$Register);
13624
__ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13625
$scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13627
ins_pipe(pipe_slow);
13630
instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13631
predicate(LockingMode != LM_LIGHTWEIGHT);
13632
match(Set cr (FastUnlock object box));
13633
effect(TEMP tmp, USE_KILL box);
13635
format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13637
__ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13639
ins_pipe(pipe_slow);
13642
instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13643
predicate(LockingMode == LM_LIGHTWEIGHT);
13644
match(Set cr (FastLock object box));
13645
effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13647
format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13649
__ get_thread($thread$$Register);
13650
__ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13652
ins_pipe(pipe_slow);
13655
instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13656
predicate(LockingMode == LM_LIGHTWEIGHT);
13657
match(Set cr (FastUnlock object eax_reg));
13658
effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13660
format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13662
__ get_thread($thread$$Register);
13663
__ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13665
ins_pipe(pipe_slow);
13668
instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13669
predicate(Matcher::vector_length(n) <= 32);
13670
match(Set dst (MaskAll src));
13671
format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13673
int mask_len = Matcher::vector_length(this);
13674
__ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13676
ins_pipe( pipe_slow );
13679
instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13680
predicate(Matcher::vector_length(n) > 32);
13681
match(Set dst (MaskAll src));
13683
format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13685
int mask_len = Matcher::vector_length(this);
13686
__ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13688
ins_pipe( pipe_slow );
13691
instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13692
predicate(Matcher::vector_length(n) > 32);
13693
match(Set dst (MaskAll src));
13695
format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13697
int mask_len = Matcher::vector_length(this);
13698
__ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13700
ins_pipe( pipe_slow );
13703
// ============================================================================
13704
// Safepoint Instruction
13705
instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13706
match(SafePoint poll);
13707
effect(KILL cr, USE poll);
13709
format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %}
13711
// EBP would need size(3)
13712
size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13714
__ set_inst_mark();
13715
__ relocate(relocInfo::poll_type);
13716
__ clear_inst_mark();
13717
address pre_pc = __ pc();
13718
__ testl(rax, Address($poll$$Register, 0));
13719
address post_pc = __ pc();
13720
guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13722
ins_pipe(ialu_reg_mem);
13726
// ============================================================================
13727
// This name is KNOWN by the ADLC and cannot be changed.
13728
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13730
instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13731
match(Set dst (ThreadLocal));
13732
effect(DEF dst, KILL cr);
13734
format %{ "MOV $dst, Thread::current()" %}
13736
Register dstReg = as_Register($dst$$reg);
13737
__ get_thread(dstReg);
13739
ins_pipe( ialu_reg_fat );
13744
//----------PEEPHOLE RULES-----------------------------------------------------
13745
// These must follow all instruction definitions as they use the names
13746
// defined in the instructions definitions.
13748
// peepmatch ( root_instr_name [preceding_instruction]* );
13750
// peepconstraint %{
13751
// (instruction_number.operand_name relational_op instruction_number.operand_name
13753
// // instruction numbers are zero-based using left to right order in peepmatch
13755
// peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
13756
// // provide an instruction_number.operand_name for each operand that appears
13757
// // in the replacement instruction's match rule
13759
// ---------VM FLAGS---------------------------------------------------------
13761
// All peephole optimizations can be turned off using -XX:-OptoPeephole
13763
// Each peephole rule is given an identifying number starting with zero and
13764
// increasing by one in the order seen by the parser. An individual peephole
13765
// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13766
// on the command-line.
13768
// ---------CURRENT LIMITATIONS----------------------------------------------
13770
// Only match adjacent instructions in same basic block
13771
// Only equality constraints
13772
// Only constraints between operands, not (0.dest_reg == EAX_enc)
13773
// Only one replacement instruction
13775
// ---------EXAMPLE----------------------------------------------------------
13777
// // pertinent parts of existing instructions in architecture description
13778
// instruct movI(rRegI dst, rRegI src) %{
13779
// match(Set dst (CopyI src));
13782
// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13783
// match(Set dst (AddI dst src));
13787
// // Change (inc mov) to lea
13789
// // increment preceded by register-register move
13790
// peepmatch ( incI_eReg movI );
13791
// // require that the destination register of the increment
13792
// // match the destination register of the move
13793
// peepconstraint ( 0.dst == 1.dst );
13794
// // construct a replacement instruction that sets
13795
// // the destination to ( move's source register + one )
13796
// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13799
// Implementation no longer uses movX instructions since
13800
// machine-independent system no longer uses CopyX nodes.
13803
// peepmatch ( incI_eReg movI );
13804
// peepconstraint ( 0.dst == 1.dst );
13805
// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13809
// peepmatch ( decI_eReg movI );
13810
// peepconstraint ( 0.dst == 1.dst );
13811
// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13815
// peepmatch ( addI_eReg_imm movI );
13816
// peepconstraint ( 0.dst == 1.dst );
13817
// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13821
// peepmatch ( addP_eReg_imm movP );
13822
// peepconstraint ( 0.dst == 1.dst );
13823
// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13826
// // Change load of spilled value to only a spill
13827
// instruct storeI(memory mem, rRegI src) %{
13828
// match(Set mem (StoreI mem src));
13831
// instruct loadI(rRegI dst, memory mem) %{
13832
// match(Set dst (LoadI mem));
13836
peepmatch ( loadI storeI );
13837
peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13838
peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13841
//----------SMARTSPILL RULES---------------------------------------------------
13842
// These must follow all instruction definitions as they use the names
13843
// defined in the instructions definitions.