jdk

Форк
0
/
x86_32.ad 
13843 строки · 440.3 Кб
1
//
2
// Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
3
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
//
5
// This code is free software; you can redistribute it and/or modify it
6
// under the terms of the GNU General Public License version 2 only, as
7
// published by the Free Software Foundation.
8
//
9
// This code is distributed in the hope that it will be useful, but WITHOUT
10
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
// version 2 for more details (a copy is included in the LICENSE file that
13
// accompanied this code).
14
//
15
// You should have received a copy of the GNU General Public License version
16
// 2 along with this work; if not, write to the Free Software Foundation,
17
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
//
19
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
// or visit www.oracle.com if you need additional information or have any
21
// questions.
22
//
23
//
24

25
// X86 Architecture Description File
26

27
//----------REGISTER DEFINITION BLOCK------------------------------------------
28
// This information is used by the matcher and the register allocator to
29
// describe individual registers and classes of registers within the target
30
// architecture.
31

32
register %{
33
//----------Architecture Description Register Definitions----------------------
34
// General Registers
35
// "reg_def"  name ( register save type, C convention save type,
36
//                   ideal register type, encoding );
37
// Register Save Types:
38
//
39
// NS  = No-Save:       The register allocator assumes that these registers
40
//                      can be used without saving upon entry to the method, &
41
//                      that they do not need to be saved at call sites.
42
//
43
// SOC = Save-On-Call:  The register allocator assumes that these registers
44
//                      can be used without saving upon entry to the method,
45
//                      but that they must be saved at call sites.
46
//
47
// SOE = Save-On-Entry: The register allocator assumes that these registers
48
//                      must be saved before using them upon entry to the
49
//                      method, but they do not need to be saved at call
50
//                      sites.
51
//
52
// AS  = Always-Save:   The register allocator assumes that these registers
53
//                      must be saved before using them upon entry to the
54
//                      method, & that they must be saved at call sites.
55
//
56
// Ideal Register Type is used to determine how to save & restore a
57
// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58
// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59
//
60
// The encoding number is the actual bit-pattern placed into the opcodes.
61

62
// General Registers
63
// Previously set EBX, ESI, and EDI as save-on-entry for java code
64
// Turn off SOE in java-code due to frequent use of uncommon-traps.
65
// Now that allocator is better, turn on ESI and EDI as SOE registers.
66

67
reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68
reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69
reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70
reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71
// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72
reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73
reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74
reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75
reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76

77
// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
78
// allocator, and only shows up in the encodings.
79
reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80
reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81
// Ok so here's the trick FPR1 is really st(0) except in the midst
82
// of emission of assembly for a machnode. During the emission the fpu stack
83
// is pushed making FPR1 == st(1) temporarily. However at any safepoint
84
// the stack will not have this element so FPR1 == st(0) from the
85
// oopMap viewpoint. This same weirdness with numbering causes
86
// instruction encoding to have to play games with the register
87
// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88
// where it does flt->flt moves to see an example
89
//
90
reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91
reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92
reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93
reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94
reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95
reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96
reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97
reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98
reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99
reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100
reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101
reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102
reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103
reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104
//
105
// Empty fill registers, which are never used, but supply alignment to xmm regs
106
//
107
reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108
reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109
reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110
reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111
reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112
reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113
reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114
reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115

116
// Specify priority of register selection within phases of register
117
// allocation.  Highest priority is first.  A useful heuristic is to
118
// give registers a low priority when they are required by machine
119
// instructions, like EAX and EDX.  Registers which are used as
120
// pairs must fall on an even boundary (witness the FPR#L's in this list).
121
// For the Intel integer registers, the equivalent Long pairs are
122
// EDX:EAX, EBX:ECX, and EDI:EBP.
123
alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
124
                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125
                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126
                    FPR6L, FPR6H, FPR7L, FPR7H,
127
                    FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128

129

130
//----------Architecture Description Register Classes--------------------------
131
// Several register classes are automatically defined based upon information in
132
// this architecture description.
133
// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
134
// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
135
//
136
// Class for no registers (empty set).
137
reg_class no_reg();
138

139
// Class for all registers
140
reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
141
// Class for all registers (excluding EBP)
142
reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
143
// Dynamic register class that selects at runtime between register classes
144
// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
145
// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
146
reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
147

148
// Class for general registers
149
reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
150
// Class for general registers (excluding EBP).
151
// It is also safe for use by tailjumps (we don't want to allocate in ebp).
152
// Used also if the PreserveFramePointer flag is true.
153
reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
154
// Dynamic register class that selects between int_reg and int_reg_no_ebp.
155
reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
156

157
// Class of "X" registers
158
reg_class int_x_reg(EBX, ECX, EDX, EAX);
159

160
// Class of registers that can appear in an address with no offset.
161
// EBP and ESP require an extra instruction byte for zero offset.
162
// Used in fast-unlock
163
reg_class p_reg(EDX, EDI, ESI, EBX);
164

165
// Class for general registers excluding ECX
166
reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
167
// Class for general registers excluding ECX (and EBP)
168
reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
169
// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
170
reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
171

172
// Class for general registers excluding EAX
173
reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
174

175
// Class for general registers excluding EAX and EBX.
176
reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
177
// Class for general registers excluding EAX and EBX (and EBP)
178
reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
179
// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
180
reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
181

182
// Class of EAX (for multiply and divide operations)
183
reg_class eax_reg(EAX);
184

185
// Class of EBX (for atomic add)
186
reg_class ebx_reg(EBX);
187

188
// Class of ECX (for shift and JCXZ operations and cmpLTMask)
189
reg_class ecx_reg(ECX);
190

191
// Class of EDX (for multiply and divide operations)
192
reg_class edx_reg(EDX);
193

194
// Class of EDI (for synchronization)
195
reg_class edi_reg(EDI);
196

197
// Class of ESI (for synchronization)
198
reg_class esi_reg(ESI);
199

200
// Singleton class for stack pointer
201
reg_class sp_reg(ESP);
202

203
// Singleton class for instruction pointer
204
// reg_class ip_reg(EIP);
205

206
// Class of integer register pairs
207
reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
208
// Class of integer register pairs (excluding EBP and EDI);
209
reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
210
// Dynamic register class that selects between long_reg and long_reg_no_ebp.
211
reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
212

213
// Class of integer register pairs that aligns with calling convention
214
reg_class eadx_reg( EAX,EDX );
215
reg_class ebcx_reg( ECX,EBX );
216
reg_class ebpd_reg( EBP,EDI );
217

218
// Not AX or DX, used in divides
219
reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
220
// Not AX or DX (and neither EBP), used in divides
221
reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
222
// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
223
reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
224

225
// Floating point registers.  Notice FPR0 is not a choice.
226
// FPR0 is not ever allocated; we use clever encodings to fake
227
// a 2-address instructions out of Intels FP stack.
228
reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
229

230
reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
231
                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
232
                      FPR7L,FPR7H );
233

234
reg_class fp_flt_reg0( FPR1L );
235
reg_class fp_dbl_reg0( FPR1L,FPR1H );
236
reg_class fp_dbl_reg1( FPR2L,FPR2H );
237
reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
238
                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
239

240
%}
241

242

243
//----------SOURCE BLOCK-------------------------------------------------------
244
// This is a block of C++ code which provides values, functions, and
245
// definitions necessary in the rest of the architecture description
246
source_hpp %{
247
// Must be visible to the DFA in dfa_x86_32.cpp
248
extern bool is_operand_hi32_zero(Node* n);
249
%}
250

251
source %{
252
#define   RELOC_IMM32    Assembler::imm_operand
253
#define   RELOC_DISP32   Assembler::disp32_operand
254

255
#define __ masm->
256

257
// How to find the high register of a Long pair, given the low register
258
#define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
259
#define   HIGH_FROM_LOW_ENC(x) ((x)+2)
260

261
// These masks are used to provide 128-bit aligned bitmasks to the XMM
262
// instructions, to allow sign-masking or sign-bit flipping.  They allow
263
// fast versions of NegF/NegD and AbsF/AbsD.
264

265
void reg_mask_init() {}
266

267
// Note: 'double' and 'long long' have 32-bits alignment on x86.
268
static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
269
  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
270
  // of 128-bits operands for SSE instructions.
271
  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
272
  // Store the value to a 128-bits operand.
273
  operand[0] = lo;
274
  operand[1] = hi;
275
  return operand;
276
}
277

278
// Buffer for 128-bits masks used by SSE instructions.
279
static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
280

281
// Static initialization during VM startup.
282
static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
283
static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
284
static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
285
static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
286

287
// Offset hacking within calls.
288
static int pre_call_resets_size() {
289
  int size = 0;
290
  Compile* C = Compile::current();
291
  if (C->in_24_bit_fp_mode()) {
292
    size += 6; // fldcw
293
  }
294
  if (VM_Version::supports_vzeroupper()) {
295
    size += 3; // vzeroupper
296
  }
297
  return size;
298
}
299

300
// !!!!! Special hack to get all type of calls to specify the byte offset
301
//       from the start of the call to the point where the return address
302
//       will point.
303
int MachCallStaticJavaNode::ret_addr_offset() {
304
  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
305
}
306

307
int MachCallDynamicJavaNode::ret_addr_offset() {
308
  return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
309
}
310

311
static int sizeof_FFree_Float_Stack_All = -1;
312

313
int MachCallRuntimeNode::ret_addr_offset() {
314
  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
315
  return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
316
}
317

318
//
319
// Compute padding required for nodes which need alignment
320
//
321

322
// The address of the call instruction needs to be 4-byte aligned to
323
// ensure that it does not span a cache line so that it can be patched.
324
int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
325
  current_offset += pre_call_resets_size();  // skip fldcw, if any
326
  current_offset += 1;      // skip call opcode byte
327
  return align_up(current_offset, alignment_required()) - current_offset;
328
}
329

330
// The address of the call instruction needs to be 4-byte aligned to
331
// ensure that it does not span a cache line so that it can be patched.
332
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
333
  current_offset += pre_call_resets_size();  // skip fldcw, if any
334
  current_offset += 5;      // skip MOV instruction
335
  current_offset += 1;      // skip call opcode byte
336
  return align_up(current_offset, alignment_required()) - current_offset;
337
}
338

339
// EMIT_RM()
340
void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
341
  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
342
  __ emit_int8(c);
343
}
344

345
// EMIT_CC()
346
void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
347
  unsigned char c = (unsigned char)( f1 | f2 );
348
  __ emit_int8(c);
349
}
350

351
// EMIT_OPCODE()
352
void emit_opcode(C2_MacroAssembler *masm, int code) {
353
  __ emit_int8((unsigned char) code);
354
}
355

356
// EMIT_OPCODE() w/ relocation information
357
void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
358
  __ relocate(__ inst_mark() + offset, reloc);
359
  emit_opcode(masm, code);
360
}
361

362
// EMIT_D8()
363
void emit_d8(C2_MacroAssembler *masm, int d8) {
364
  __ emit_int8((unsigned char) d8);
365
}
366

367
// EMIT_D16()
368
void emit_d16(C2_MacroAssembler *masm, int d16) {
369
  __ emit_int16(d16);
370
}
371

372
// EMIT_D32()
373
void emit_d32(C2_MacroAssembler *masm, int d32) {
374
  __ emit_int32(d32);
375
}
376

377
// emit 32 bit value and construct relocation entry from relocInfo::relocType
378
void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
379
        int format) {
380
  __ relocate(__ inst_mark(), reloc, format);
381
  __ emit_int32(d32);
382
}
383

384
// emit 32 bit value and construct relocation entry from RelocationHolder
385
void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
386
        int format) {
387
#ifdef ASSERT
388
  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
389
    assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
390
  }
391
#endif
392
  __ relocate(__ inst_mark(), rspec, format);
393
  __ emit_int32(d32);
394
}
395

396
// Access stack slot for load or store
397
void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
398
  emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
399
  if( -128 <= disp && disp <= 127 ) {
400
    emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
401
    emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
402
    emit_d8 (masm, disp);     // Displacement  // R/M byte
403
  } else {
404
    emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
405
    emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
406
    emit_d32(masm, disp);     // Displacement  // R/M byte
407
  }
408
}
409

410
   // rRegI ereg, memory mem) %{    // emit_reg_mem
411
void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
412
  // There is no index & no scale, use form without SIB byte
413
  if ((index == 0x4) &&
414
      (scale == 0) && (base != ESP_enc)) {
415
    // If no displacement, mode is 0x0; unless base is [EBP]
416
    if ( (displace == 0) && (base != EBP_enc) ) {
417
      emit_rm(masm, 0x0, reg_encoding, base);
418
    }
419
    else {                    // If 8-bit displacement, mode 0x1
420
      if ((displace >= -128) && (displace <= 127)
421
          && (disp_reloc == relocInfo::none) ) {
422
        emit_rm(masm, 0x1, reg_encoding, base);
423
        emit_d8(masm, displace);
424
      }
425
      else {                  // If 32-bit displacement
426
        if (base == -1) { // Special flag for absolute address
427
          emit_rm(masm, 0x0, reg_encoding, 0x5);
428
          // (manual lies; no SIB needed here)
429
          if ( disp_reloc != relocInfo::none ) {
430
            emit_d32_reloc(masm, displace, disp_reloc, 1);
431
          } else {
432
            emit_d32      (masm, displace);
433
          }
434
        }
435
        else {                // Normal base + offset
436
          emit_rm(masm, 0x2, reg_encoding, base);
437
          if ( disp_reloc != relocInfo::none ) {
438
            emit_d32_reloc(masm, displace, disp_reloc, 1);
439
          } else {
440
            emit_d32      (masm, displace);
441
          }
442
        }
443
      }
444
    }
445
  }
446
  else {                      // Else, encode with the SIB byte
447
    // If no displacement, mode is 0x0; unless base is [EBP]
448
    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
449
      emit_rm(masm, 0x0, reg_encoding, 0x4);
450
      emit_rm(masm, scale, index, base);
451
    }
452
    else {                    // If 8-bit displacement, mode 0x1
453
      if ((displace >= -128) && (displace <= 127)
454
          && (disp_reloc == relocInfo::none) ) {
455
        emit_rm(masm, 0x1, reg_encoding, 0x4);
456
        emit_rm(masm, scale, index, base);
457
        emit_d8(masm, displace);
458
      }
459
      else {                  // If 32-bit displacement
460
        if (base == 0x04 ) {
461
          emit_rm(masm, 0x2, reg_encoding, 0x4);
462
          emit_rm(masm, scale, index, 0x04);
463
        } else {
464
          emit_rm(masm, 0x2, reg_encoding, 0x4);
465
          emit_rm(masm, scale, index, base);
466
        }
467
        if ( disp_reloc != relocInfo::none ) {
468
          emit_d32_reloc(masm, displace, disp_reloc, 1);
469
        } else {
470
          emit_d32      (masm, displace);
471
        }
472
      }
473
    }
474
  }
475
}
476

477

478
void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
479
  if( dst_encoding == src_encoding ) {
480
    // reg-reg copy, use an empty encoding
481
  } else {
482
    emit_opcode( masm, 0x8B );
483
    emit_rm(masm, 0x3, dst_encoding, src_encoding );
484
  }
485
}
486

487
void emit_cmpfp_fixup(MacroAssembler* masm) {
488
  Label exit;
489
  __ jccb(Assembler::noParity, exit);
490
  __ pushf();
491
  //
492
  // comiss/ucomiss instructions set ZF,PF,CF flags and
493
  // zero OF,AF,SF for NaN values.
494
  // Fixup flags by zeroing ZF,PF so that compare of NaN
495
  // values returns 'less than' result (CF is set).
496
  // Leave the rest of flags unchanged.
497
  //
498
  //    7 6 5 4 3 2 1 0
499
  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
500
  //    0 0 1 0 1 0 1 1   (0x2B)
501
  //
502
  __ andl(Address(rsp, 0), 0xffffff2b);
503
  __ popf();
504
  __ bind(exit);
505
}
506

507
static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
508
  Label done;
509
  __ movl(dst, -1);
510
  __ jcc(Assembler::parity, done);
511
  __ jcc(Assembler::below, done);
512
  __ setb(Assembler::notEqual, dst);
513
  __ movzbl(dst, dst);
514
  __ bind(done);
515
}
516

517

518
//=============================================================================
519
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
520

521
int ConstantTable::calculate_table_base_offset() const {
522
  return 0;  // absolute addressing, no offset
523
}
524

525
bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
526
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
527
  ShouldNotReachHere();
528
}
529

530
void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
531
  // Empty encoding
532
}
533

534
uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
535
  return 0;
536
}
537

538
#ifndef PRODUCT
539
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
540
  st->print("# MachConstantBaseNode (empty encoding)");
541
}
542
#endif
543

544

545
//=============================================================================
546
#ifndef PRODUCT
547
void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
548
  Compile* C = ra_->C;
549

550
  int framesize = C->output()->frame_size_in_bytes();
551
  int bangsize = C->output()->bang_size_in_bytes();
552
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
553
  // Remove wordSize for return addr which is already pushed.
554
  framesize -= wordSize;
555

556
  if (C->output()->need_stack_bang(bangsize)) {
557
    framesize -= wordSize;
558
    st->print("# stack bang (%d bytes)", bangsize);
559
    st->print("\n\t");
560
    st->print("PUSH   EBP\t# Save EBP");
561
    if (PreserveFramePointer) {
562
      st->print("\n\t");
563
      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
564
    }
565
    if (framesize) {
566
      st->print("\n\t");
567
      st->print("SUB    ESP, #%d\t# Create frame",framesize);
568
    }
569
  } else {
570
    st->print("SUB    ESP, #%d\t# Create frame",framesize);
571
    st->print("\n\t");
572
    framesize -= wordSize;
573
    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
574
    if (PreserveFramePointer) {
575
      st->print("\n\t");
576
      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
577
      if (framesize > 0) {
578
        st->print("\n\t");
579
        st->print("ADD    EBP, #%d", framesize);
580
      }
581
    }
582
  }
583

584
  if (VerifyStackAtCalls) {
585
    st->print("\n\t");
586
    framesize -= wordSize;
587
    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
588
  }
589

590
  if( C->in_24_bit_fp_mode() ) {
591
    st->print("\n\t");
592
    st->print("FLDCW  \t# load 24 bit fpu control word");
593
  }
594
  if (UseSSE >= 2 && VerifyFPU) {
595
    st->print("\n\t");
596
    st->print("# verify FPU stack (must be clean on entry)");
597
  }
598

599
#ifdef ASSERT
600
  if (VerifyStackAtCalls) {
601
    st->print("\n\t");
602
    st->print("# stack alignment check");
603
  }
604
#endif
605
  st->cr();
606
}
607
#endif
608

609

610
void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
611
  Compile* C = ra_->C;
612

613
  int framesize = C->output()->frame_size_in_bytes();
614
  int bangsize = C->output()->bang_size_in_bytes();
615

616
  __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
617

618
  C->output()->set_frame_complete(__ offset());
619

620
  if (C->has_mach_constant_base_node()) {
621
    // NOTE: We set the table base offset here because users might be
622
    // emitted before MachConstantBaseNode.
623
    ConstantTable& constant_table = C->output()->constant_table();
624
    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
625
  }
626
}
627

628
uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
629
  return MachNode::size(ra_); // too many variables; just compute it the hard way
630
}
631

632
int MachPrologNode::reloc() const {
633
  return 0; // a large enough number
634
}
635

636
//=============================================================================
637
#ifndef PRODUCT
638
void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
639
  Compile *C = ra_->C;
640
  int framesize = C->output()->frame_size_in_bytes();
641
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
642
  // Remove two words for return addr and rbp,
643
  framesize -= 2*wordSize;
644

645
  if (C->max_vector_size() > 16) {
646
    st->print("VZEROUPPER");
647
    st->cr(); st->print("\t");
648
  }
649
  if (C->in_24_bit_fp_mode()) {
650
    st->print("FLDCW  standard control word");
651
    st->cr(); st->print("\t");
652
  }
653
  if (framesize) {
654
    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
655
    st->cr(); st->print("\t");
656
  }
657
  st->print_cr("POPL   EBP"); st->print("\t");
658
  if (do_polling() && C->is_method_compilation()) {
659
    st->print("CMPL    rsp, poll_offset[thread]  \n\t"
660
              "JA      #safepoint_stub\t"
661
              "# Safepoint: poll for GC");
662
  }
663
}
664
#endif
665

666
void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
667
  Compile *C = ra_->C;
668

669
  if (C->max_vector_size() > 16) {
670
    // Clear upper bits of YMM registers when current compiled code uses
671
    // wide vectors to avoid AVX <-> SSE transition penalty during call.
672
    __ vzeroupper();
673
  }
674
  // If method set FPU control word, restore to standard control word
675
  if (C->in_24_bit_fp_mode()) {
676
    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
677
  }
678

679
  int framesize = C->output()->frame_size_in_bytes();
680
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
681
  // Remove two words for return addr and rbp,
682
  framesize -= 2*wordSize;
683

684
  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
685

686
  if (framesize >= 128) {
687
    emit_opcode(masm, 0x81); // add  SP, #framesize
688
    emit_rm(masm, 0x3, 0x00, ESP_enc);
689
    emit_d32(masm, framesize);
690
  } else if (framesize) {
691
    emit_opcode(masm, 0x83); // add  SP, #framesize
692
    emit_rm(masm, 0x3, 0x00, ESP_enc);
693
    emit_d8(masm, framesize);
694
  }
695

696
  emit_opcode(masm, 0x58 | EBP_enc);
697

698
  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
699
    __ reserved_stack_check();
700
  }
701

702
  if (do_polling() && C->is_method_compilation()) {
703
    Register thread = as_Register(EBX_enc);
704
    __ get_thread(thread);
705
    Label dummy_label;
706
    Label* code_stub = &dummy_label;
707
    if (!C->output()->in_scratch_emit_size()) {
708
      C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
709
      C->output()->add_stub(stub);
710
      code_stub = &stub->entry();
711
    }
712
    __ set_inst_mark();
713
    __ relocate(relocInfo::poll_return_type);
714
    __ clear_inst_mark();
715
    __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
716
  }
717
}
718

719
uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
720
  return MachNode::size(ra_); // too many variables; just compute it
721
                              // the hard way
722
}
723

724
int MachEpilogNode::reloc() const {
725
  return 0; // a large enough number
726
}
727

728
const Pipeline * MachEpilogNode::pipeline() const {
729
  return MachNode::pipeline_class();
730
}
731

732
//=============================================================================
733

734
enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
735
static enum RC rc_class( OptoReg::Name reg ) {
736

737
  if( !OptoReg::is_valid(reg)  ) return rc_bad;
738
  if (OptoReg::is_stack(reg)) return rc_stack;
739

740
  VMReg r = OptoReg::as_VMReg(reg);
741
  if (r->is_Register()) return rc_int;
742
  if (r->is_FloatRegister()) {
743
    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
744
    return rc_float;
745
  }
746
  if (r->is_KRegister()) return rc_kreg;
747
  assert(r->is_XMMRegister(), "must be");
748
  return rc_xmm;
749
}
750

751
static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
752
                        int opcode, const char *op_str, int size, outputStream* st ) {
753
  if( masm ) {
754
    masm->set_inst_mark();
755
    emit_opcode  (masm, opcode );
756
    encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
757
    masm->clear_inst_mark();
758
#ifndef PRODUCT
759
  } else if( !do_size ) {
760
    if( size != 0 ) st->print("\n\t");
761
    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
762
      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
763
      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
764
    } else { // FLD, FST, PUSH, POP
765
      st->print("%s [ESP + #%d]",op_str,offset);
766
    }
767
#endif
768
  }
769
  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
770
  return size+3+offset_size;
771
}
772

773
// Helper for XMM registers.  Extra opcode bits, limited syntax.
774
static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
775
                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
776
  int in_size_in_bits = Assembler::EVEX_32bit;
777
  int evex_encoding = 0;
778
  if (reg_lo+1 == reg_hi) {
779
    in_size_in_bits = Assembler::EVEX_64bit;
780
    evex_encoding = Assembler::VEX_W;
781
  }
782
  if (masm) {
783
    // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
784
    //                          it maps more cases to single byte displacement
785
    __ set_managed();
786
    if (reg_lo+1 == reg_hi) { // double move?
787
      if (is_load) {
788
        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
789
      } else {
790
        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
791
      }
792
    } else {
793
      if (is_load) {
794
        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
795
      } else {
796
        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
797
      }
798
    }
799
#ifndef PRODUCT
800
  } else if (!do_size) {
801
    if (size != 0) st->print("\n\t");
802
    if (reg_lo+1 == reg_hi) { // double move?
803
      if (is_load) st->print("%s %s,[ESP + #%d]",
804
                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
805
                              Matcher::regName[reg_lo], offset);
806
      else         st->print("MOVSD  [ESP + #%d],%s",
807
                              offset, Matcher::regName[reg_lo]);
808
    } else {
809
      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
810
                              Matcher::regName[reg_lo], offset);
811
      else         st->print("MOVSS  [ESP + #%d],%s",
812
                              offset, Matcher::regName[reg_lo]);
813
    }
814
#endif
815
  }
816
  bool is_single_byte = false;
817
  if ((UseAVX > 2) && (offset != 0)) {
818
    is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
819
  }
820
  int offset_size = 0;
821
  if (UseAVX > 2 ) {
822
    offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
823
  } else {
824
    offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
825
  }
826
  size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
827
  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
828
  return size+5+offset_size;
829
}
830

831

832
static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
833
                            int src_hi, int dst_hi, int size, outputStream* st ) {
834
  if (masm) {
835
    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
836
    __ set_managed();
837
    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
838
      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
839
                as_XMMRegister(Matcher::_regEncode[src_lo]));
840
    } else {
841
      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
842
                as_XMMRegister(Matcher::_regEncode[src_lo]));
843
    }
844
#ifndef PRODUCT
845
  } else if (!do_size) {
846
    if (size != 0) st->print("\n\t");
847
    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
848
      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
849
        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
850
      } else {
851
        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
852
      }
853
    } else {
854
      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
855
        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
856
      } else {
857
        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
858
      }
859
    }
860
#endif
861
  }
862
  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
863
  // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
864
  int sz = (UseAVX > 2) ? 6 : 4;
865
  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
866
      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
867
  return size + sz;
868
}
869

870
static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
871
                            int src_hi, int dst_hi, int size, outputStream* st ) {
872
  // 32-bit
873
  if (masm) {
874
    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
875
    __ set_managed();
876
    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
877
             as_Register(Matcher::_regEncode[src_lo]));
878
#ifndef PRODUCT
879
  } else if (!do_size) {
880
    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
881
#endif
882
  }
883
  return (UseAVX> 2) ? 6 : 4;
884
}
885

886

887
static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
888
                                 int src_hi, int dst_hi, int size, outputStream* st ) {
889
  // 32-bit
890
  if (masm) {
891
    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
892
    __ set_managed();
893
    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
894
             as_XMMRegister(Matcher::_regEncode[src_lo]));
895
#ifndef PRODUCT
896
  } else if (!do_size) {
897
    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
898
#endif
899
  }
900
  return (UseAVX> 2) ? 6 : 4;
901
}
902

903
static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
904
  if( masm ) {
905
    emit_opcode(masm, 0x8B );
906
    emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
907
#ifndef PRODUCT
908
  } else if( !do_size ) {
909
    if( size != 0 ) st->print("\n\t");
910
    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
911
#endif
912
  }
913
  return size+2;
914
}
915

916
static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
917
                                 int offset, int size, outputStream* st ) {
918
  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
919
    if( masm ) {
920
      emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
921
      emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
922
#ifndef PRODUCT
923
    } else if( !do_size ) {
924
      if( size != 0 ) st->print("\n\t");
925
      st->print("FLD    %s",Matcher::regName[src_lo]);
926
#endif
927
    }
928
    size += 2;
929
  }
930

931
  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
932
  const char *op_str;
933
  int op;
934
  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
935
    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
936
    op = 0xDD;
937
  } else {                   // 32-bit store
938
    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
939
    op = 0xD9;
940
    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
941
  }
942

943
  return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
944
}
945

946
// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
947
static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
948
                          int src_hi, int dst_hi, uint ireg, outputStream* st);
949

950
void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
951
                            int stack_offset, int reg, uint ireg, outputStream* st);
952

953
static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
954
                                     int dst_offset, uint ireg, outputStream* st) {
955
  if (masm) {
956
    switch (ireg) {
957
    case Op_VecS:
958
      __ pushl(Address(rsp, src_offset));
959
      __ popl (Address(rsp, dst_offset));
960
      break;
961
    case Op_VecD:
962
      __ pushl(Address(rsp, src_offset));
963
      __ popl (Address(rsp, dst_offset));
964
      __ pushl(Address(rsp, src_offset+4));
965
      __ popl (Address(rsp, dst_offset+4));
966
      break;
967
    case Op_VecX:
968
      __ movdqu(Address(rsp, -16), xmm0);
969
      __ movdqu(xmm0, Address(rsp, src_offset));
970
      __ movdqu(Address(rsp, dst_offset), xmm0);
971
      __ movdqu(xmm0, Address(rsp, -16));
972
      break;
973
    case Op_VecY:
974
      __ vmovdqu(Address(rsp, -32), xmm0);
975
      __ vmovdqu(xmm0, Address(rsp, src_offset));
976
      __ vmovdqu(Address(rsp, dst_offset), xmm0);
977
      __ vmovdqu(xmm0, Address(rsp, -32));
978
      break;
979
    case Op_VecZ:
980
      __ evmovdquq(Address(rsp, -64), xmm0, 2);
981
      __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
982
      __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
983
      __ evmovdquq(xmm0, Address(rsp, -64), 2);
984
      break;
985
    default:
986
      ShouldNotReachHere();
987
    }
988
#ifndef PRODUCT
989
  } else {
990
    switch (ireg) {
991
    case Op_VecS:
992
      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
993
                "popl    [rsp + #%d]",
994
                src_offset, dst_offset);
995
      break;
996
    case Op_VecD:
997
      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
998
                "popq    [rsp + #%d]\n\t"
999
                "pushl   [rsp + #%d]\n\t"
1000
                "popq    [rsp + #%d]",
1001
                src_offset, dst_offset, src_offset+4, dst_offset+4);
1002
      break;
1003
     case Op_VecX:
1004
      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1005
                "movdqu  xmm0, [rsp + #%d]\n\t"
1006
                "movdqu  [rsp + #%d], xmm0\n\t"
1007
                "movdqu  xmm0, [rsp - #16]",
1008
                src_offset, dst_offset);
1009
      break;
1010
    case Op_VecY:
1011
      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1012
                "vmovdqu xmm0, [rsp + #%d]\n\t"
1013
                "vmovdqu [rsp + #%d], xmm0\n\t"
1014
                "vmovdqu xmm0, [rsp - #32]",
1015
                src_offset, dst_offset);
1016
      break;
1017
    case Op_VecZ:
1018
      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1019
                "vmovdqu xmm0, [rsp + #%d]\n\t"
1020
                "vmovdqu [rsp + #%d], xmm0\n\t"
1021
                "vmovdqu xmm0, [rsp - #64]",
1022
                src_offset, dst_offset);
1023
      break;
1024
    default:
1025
      ShouldNotReachHere();
1026
    }
1027
#endif
1028
  }
1029
}
1030

1031
uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1032
  // Get registers to move
1033
  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1034
  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1035
  OptoReg::Name dst_second = ra_->get_reg_second(this );
1036
  OptoReg::Name dst_first = ra_->get_reg_first(this );
1037

1038
  enum RC src_second_rc = rc_class(src_second);
1039
  enum RC src_first_rc = rc_class(src_first);
1040
  enum RC dst_second_rc = rc_class(dst_second);
1041
  enum RC dst_first_rc = rc_class(dst_first);
1042

1043
  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1044

1045
  // Generate spill code!
1046
  int size = 0;
1047

1048
  if( src_first == dst_first && src_second == dst_second )
1049
    return size;            // Self copy, no move
1050

1051
  if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
1052
    uint ireg = ideal_reg();
1053
    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1054
    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1055
    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1056
    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1057
      // mem -> mem
1058
      int src_offset = ra_->reg2offset(src_first);
1059
      int dst_offset = ra_->reg2offset(dst_first);
1060
      vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
1061
    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1062
      vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
1063
    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1064
      int stack_offset = ra_->reg2offset(dst_first);
1065
      vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
1066
    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1067
      int stack_offset = ra_->reg2offset(src_first);
1068
      vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
1069
    } else {
1070
      ShouldNotReachHere();
1071
    }
1072
    return 0;
1073
  }
1074

1075
  // --------------------------------------
1076
  // Check for mem-mem move.  push/pop to move.
1077
  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1078
    if( src_second == dst_first ) { // overlapping stack copy ranges
1079
      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1080
      size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1081
      size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1082
      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1083
    }
1084
    // move low bits
1085
    size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1086
    size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1087
    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1088
      size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1089
      size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1090
    }
1091
    return size;
1092
  }
1093

1094
  // --------------------------------------
1095
  // Check for integer reg-reg copy
1096
  if( src_first_rc == rc_int && dst_first_rc == rc_int )
1097
    size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
1098

1099
  // Check for integer store
1100
  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1101
    size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1102

1103
  // Check for integer load
1104
  if( src_first_rc == rc_stack && dst_first_rc == rc_int )
1105
    size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1106

1107
  // Check for integer reg-xmm reg copy
1108
  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1109
    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1110
            "no 64 bit integer-float reg moves" );
1111
    return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1112
  }
1113
  // --------------------------------------
1114
  // Check for float reg-reg copy
1115
  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1116
    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1117
            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1118
    if( masm ) {
1119

1120
      // Note the mucking with the register encode to compensate for the 0/1
1121
      // indexing issue mentioned in a comment in the reg_def sections
1122
      // for FPR registers many lines above here.
1123

1124
      if( src_first != FPR1L_num ) {
1125
        emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
1126
        emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
1127
        emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
1128
        emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
1129
     } else {
1130
        emit_opcode  (masm, 0xDD );           // FST    ST(i)
1131
        emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
1132
     }
1133
#ifndef PRODUCT
1134
    } else if( !do_size ) {
1135
      if( size != 0 ) st->print("\n\t");
1136
      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1137
      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1138
#endif
1139
    }
1140
    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1141
  }
1142

1143
  // Check for float store
1144
  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1145
    return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1146
  }
1147

1148
  // Check for float load
1149
  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1150
    int offset = ra_->reg2offset(src_first);
1151
    const char *op_str;
1152
    int op;
1153
    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1154
      op_str = "FLD_D";
1155
      op = 0xDD;
1156
    } else {                   // 32-bit load
1157
      op_str = "FLD_S";
1158
      op = 0xD9;
1159
      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1160
    }
1161
    if( masm ) {
1162
      masm->set_inst_mark();
1163
      emit_opcode  (masm, op );
1164
      encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1165
      emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
1166
      emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
1167
      masm->clear_inst_mark();
1168
#ifndef PRODUCT
1169
    } else if( !do_size ) {
1170
      if( size != 0 ) st->print("\n\t");
1171
      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1172
#endif
1173
    }
1174
    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1175
    return size + 3+offset_size+2;
1176
  }
1177

1178
  // Check for xmm reg-reg copy
1179
  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1180
    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1181
            (src_first+1 == src_second && dst_first+1 == dst_second),
1182
            "no non-adjacent float-moves" );
1183
    return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1184
  }
1185

1186
  // Check for xmm reg-integer reg copy
1187
  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1188
    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1189
            "no 64 bit float-integer reg moves" );
1190
    return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
1191
  }
1192

1193
  // Check for xmm store
1194
  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1195
    return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
1196
  }
1197

1198
  // Check for float xmm load
1199
  if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1200
    return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1201
  }
1202

1203
  // Copy from float reg to xmm reg
1204
  if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
1205
    // copy to the top of stack from floating point reg
1206
    // and use LEA to preserve flags
1207
    if( masm ) {
1208
      emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
1209
      emit_rm(masm, 0x1, ESP_enc, 0x04);
1210
      emit_rm(masm, 0x0, 0x04, ESP_enc);
1211
      emit_d8(masm,0xF8);
1212
#ifndef PRODUCT
1213
    } else if( !do_size ) {
1214
      if( size != 0 ) st->print("\n\t");
1215
      st->print("LEA    ESP,[ESP-8]");
1216
#endif
1217
    }
1218
    size += 4;
1219

1220
    size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1221

1222
    // Copy from the temp memory to the xmm reg.
1223
    size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
1224

1225
    if( masm ) {
1226
      emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
1227
      emit_rm(masm, 0x1, ESP_enc, 0x04);
1228
      emit_rm(masm, 0x0, 0x04, ESP_enc);
1229
      emit_d8(masm,0x08);
1230
#ifndef PRODUCT
1231
    } else if( !do_size ) {
1232
      if( size != 0 ) st->print("\n\t");
1233
      st->print("LEA    ESP,[ESP+8]");
1234
#endif
1235
    }
1236
    size += 4;
1237
    return size;
1238
  }
1239

1240
  // AVX-512 opmask specific spilling.
1241
  if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
1242
    assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1243
    assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1244
    int offset = ra_->reg2offset(src_first);
1245
    if (masm != nullptr) {
1246
      __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1247
#ifndef PRODUCT
1248
    } else {
1249
      st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
1250
#endif
1251
    }
1252
    return 0;
1253
  }
1254

1255
  if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
1256
    assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1257
    assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1258
    int offset = ra_->reg2offset(dst_first);
1259
    if (masm != nullptr) {
1260
      __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
1261
#ifndef PRODUCT
1262
    } else {
1263
      st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
1264
#endif
1265
    }
1266
    return 0;
1267
  }
1268

1269
  if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
1270
    Unimplemented();
1271
    return 0;
1272
  }
1273

1274
  if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
1275
    Unimplemented();
1276
    return 0;
1277
  }
1278

1279
  if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
1280
    assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
1281
    assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
1282
    if (masm != nullptr) {
1283
      __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
1284
#ifndef PRODUCT
1285
    } else {
1286
      st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
1287
#endif
1288
    }
1289
    return 0;
1290
  }
1291

1292
  assert( size > 0, "missed a case" );
1293

1294
  // --------------------------------------------------------------------
1295
  // Check for second bits still needing moving.
1296
  if( src_second == dst_second )
1297
    return size;               // Self copy; no move
1298
  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1299

1300
  // Check for second word int-int move
1301
  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1302
    return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
1303

1304
  // Check for second word integer store
1305
  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1306
    return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1307

1308
  // Check for second word integer load
1309
  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1310
    return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1311

1312
  Unimplemented();
1313
  return 0; // Mute compiler
1314
}
1315

1316
#ifndef PRODUCT
1317
void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1318
  implementation( nullptr, ra_, false, st );
1319
}
1320
#endif
1321

1322
void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1323
  implementation( masm, ra_, false, nullptr );
1324
}
1325

1326
uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1327
  return MachNode::size(ra_);
1328
}
1329

1330

1331
//=============================================================================
1332
#ifndef PRODUCT
1333
void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1334
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1335
  int reg = ra_->get_reg_first(this);
1336
  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1337
}
1338
#endif
1339

1340
void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1341
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1342
  int reg = ra_->get_encode(this);
1343
  if( offset >= 128 ) {
1344
    emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
1345
    emit_rm(masm, 0x2, reg, 0x04);
1346
    emit_rm(masm, 0x0, 0x04, ESP_enc);
1347
    emit_d32(masm, offset);
1348
  }
1349
  else {
1350
    emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
1351
    emit_rm(masm, 0x1, reg, 0x04);
1352
    emit_rm(masm, 0x0, 0x04, ESP_enc);
1353
    emit_d8(masm, offset);
1354
  }
1355
}
1356

1357
uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1358
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1359
  if( offset >= 128 ) {
1360
    return 7;
1361
  }
1362
  else {
1363
    return 4;
1364
  }
1365
}
1366

1367
//=============================================================================
1368
#ifndef PRODUCT
1369
void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1370
  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1371
  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1372
  st->print_cr("\tNOP");
1373
  st->print_cr("\tNOP");
1374
  if( !OptoBreakpoint )
1375
    st->print_cr("\tNOP");
1376
}
1377
#endif
1378

1379
void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1380
  __ ic_check(CodeEntryAlignment);
1381
}
1382

1383
uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1384
  return MachNode::size(ra_); // too many variables; just compute it
1385
                              // the hard way
1386
}
1387

1388

1389
//=============================================================================
1390

1391
// Vector calling convention not supported.
1392
bool Matcher::supports_vector_calling_convention() {
1393
  return false;
1394
}
1395

1396
OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
1397
  Unimplemented();
1398
  return OptoRegPair(0, 0);
1399
}
1400

1401
// Is this branch offset short enough that a short branch can be used?
1402
//
1403
// NOTE: If the platform does not provide any short branch variants, then
1404
//       this method should return false for offset 0.
1405
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1406
  // The passed offset is relative to address of the branch.
1407
  // On 86 a branch displacement is calculated relative to address
1408
  // of a next instruction.
1409
  offset -= br_size;
1410

1411
  // the short version of jmpConUCF2 contains multiple branches,
1412
  // making the reach slightly less
1413
  if (rule == jmpConUCF2_rule)
1414
    return (-126 <= offset && offset <= 125);
1415
  return (-128 <= offset && offset <= 127);
1416
}
1417

1418
// Return whether or not this register is ever used as an argument.  This
1419
// function is used on startup to build the trampoline stubs in generateOptoStub.
1420
// Registers not mentioned will be killed by the VM call in the trampoline, and
1421
// arguments in those registers not be available to the callee.
1422
bool Matcher::can_be_java_arg( int reg ) {
1423
  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1424
  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1425
  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1426
  return false;
1427
}
1428

1429
bool Matcher::is_spillable_arg( int reg ) {
1430
  return can_be_java_arg(reg);
1431
}
1432

1433
uint Matcher::int_pressure_limit()
1434
{
1435
  return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
1436
}
1437

1438
uint Matcher::float_pressure_limit()
1439
{
1440
  return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
1441
}
1442

1443
bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1444
  // Use hardware integer DIV instruction when
1445
  // it is faster than a code which use multiply.
1446
  // Only when constant divisor fits into 32 bit
1447
  // (min_jint is excluded to get only correct
1448
  // positive 32 bit values from negative).
1449
  return VM_Version::has_fast_idiv() &&
1450
         (divisor == (int)divisor && divisor != min_jint);
1451
}
1452

1453
// Register for DIVI projection of divmodI
1454
RegMask Matcher::divI_proj_mask() {
1455
  return EAX_REG_mask();
1456
}
1457

1458
// Register for MODI projection of divmodI
1459
RegMask Matcher::modI_proj_mask() {
1460
  return EDX_REG_mask();
1461
}
1462

1463
// Register for DIVL projection of divmodL
1464
RegMask Matcher::divL_proj_mask() {
1465
  ShouldNotReachHere();
1466
  return RegMask();
1467
}
1468

1469
// Register for MODL projection of divmodL
1470
RegMask Matcher::modL_proj_mask() {
1471
  ShouldNotReachHere();
1472
  return RegMask();
1473
}
1474

1475
const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1476
  return NO_REG_mask();
1477
}
1478

1479
// Returns true if the high 32 bits of the value is known to be zero.
1480
bool is_operand_hi32_zero(Node* n) {
1481
  int opc = n->Opcode();
1482
  if (opc == Op_AndL) {
1483
    Node* o2 = n->in(2);
1484
    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1485
      return true;
1486
    }
1487
  }
1488
  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1489
    return true;
1490
  }
1491
  return false;
1492
}
1493

1494
%}
1495

1496
//----------ENCODING BLOCK-----------------------------------------------------
1497
// This block specifies the encoding classes used by the compiler to output
1498
// byte streams.  Encoding classes generate functions which are called by
1499
// Machine Instruction Nodes in order to generate the bit encoding of the
1500
// instruction.  Operands specify their base encoding interface with the
1501
// interface keyword.  There are currently supported four interfaces,
1502
// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1503
// operand to generate a function which returns its register number when
1504
// queried.   CONST_INTER causes an operand to generate a function which
1505
// returns the value of the constant when queried.  MEMORY_INTER causes an
1506
// operand to generate four functions which return the Base Register, the
1507
// Index Register, the Scale Value, and the Offset Value of the operand when
1508
// queried.  COND_INTER causes an operand to generate six functions which
1509
// return the encoding code (ie - encoding bits for the instruction)
1510
// associated with each basic boolean condition for a conditional instruction.
1511
// Instructions specify two basic values for encoding.  They use the
1512
// ins_encode keyword to specify their encoding class (which must be one of
1513
// the class names specified in the encoding block), and they use the
1514
// opcode keyword to specify, in order, their primary, secondary, and
1515
// tertiary opcode.  Only the opcode sections which a particular instruction
1516
// needs for encoding need to be specified.
1517
encode %{
1518
  // Build emit functions for each basic byte or larger field in the intel
1519
  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1520
  // code in the enc_class source block.  Emit functions will live in the
1521
  // main source block for now.  In future, we can generalize this by
1522
  // adding a syntax that specifies the sizes of fields in an order,
1523
  // so that the adlc can build the emit functions automagically
1524

1525
  // Set instruction mark in MacroAssembler. This is used only in
1526
  // instructions that emit bytes directly to the CodeBuffer wraped
1527
  // in the MacroAssembler. Should go away once all "instruct" are
1528
  // patched to emit bytes only using methods in MacroAssembler.
1529
  enc_class SetInstMark %{
1530
    __ set_inst_mark();
1531
  %}
1532

1533
  enc_class ClearInstMark %{
1534
    __ clear_inst_mark();
1535
  %}
1536

1537
  // Emit primary opcode
1538
  enc_class OpcP %{
1539
    emit_opcode(masm, $primary);
1540
  %}
1541

1542
  // Emit secondary opcode
1543
  enc_class OpcS %{
1544
    emit_opcode(masm, $secondary);
1545
  %}
1546

1547
  // Emit opcode directly
1548
  enc_class Opcode(immI d8) %{
1549
    emit_opcode(masm, $d8$$constant);
1550
  %}
1551

1552
  enc_class SizePrefix %{
1553
    emit_opcode(masm,0x66);
1554
  %}
1555

1556
  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1557
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1558
  %}
1559

1560
  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1561
    emit_opcode(masm,$opcode$$constant);
1562
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1563
  %}
1564

1565
  enc_class mov_r32_imm0( rRegI dst ) %{
1566
    emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1567
    emit_d32   ( masm, 0x0  );             //                         imm32==0x0
1568
  %}
1569

1570
  enc_class cdq_enc %{
1571
    // Full implementation of Java idiv and irem; checks for
1572
    // special case as described in JVM spec., p.243 & p.271.
1573
    //
1574
    //         normal case                           special case
1575
    //
1576
    // input : rax,: dividend                         min_int
1577
    //         reg: divisor                          -1
1578
    //
1579
    // output: rax,: quotient  (= rax, idiv reg)       min_int
1580
    //         rdx: remainder (= rax, irem reg)       0
1581
    //
1582
    //  Code sequnce:
1583
    //
1584
    //  81 F8 00 00 00 80    cmp         rax,80000000h
1585
    //  0F 85 0B 00 00 00    jne         normal_case
1586
    //  33 D2                xor         rdx,edx
1587
    //  83 F9 FF             cmp         rcx,0FFh
1588
    //  0F 84 03 00 00 00    je          done
1589
    //                  normal_case:
1590
    //  99                   cdq
1591
    //  F7 F9                idiv        rax,ecx
1592
    //                  done:
1593
    //
1594
    emit_opcode(masm,0x81); emit_d8(masm,0xF8);
1595
    emit_opcode(masm,0x00); emit_d8(masm,0x00);
1596
    emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
1597
    emit_opcode(masm,0x0F); emit_d8(masm,0x85);
1598
    emit_opcode(masm,0x0B); emit_d8(masm,0x00);
1599
    emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
1600
    emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
1601
    emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
1602
    emit_opcode(masm,0x0F); emit_d8(masm,0x84);
1603
    emit_opcode(masm,0x03); emit_d8(masm,0x00);
1604
    emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
1605
    // normal_case:
1606
    emit_opcode(masm,0x99);                                         // cdq
1607
    // idiv (note: must be emitted by the user of this rule)
1608
    // normal:
1609
  %}
1610

1611
  // Dense encoding for older common ops
1612
  enc_class Opc_plus(immI opcode, rRegI reg) %{
1613
    emit_opcode(masm, $opcode$$constant + $reg$$reg);
1614
  %}
1615

1616

1617
  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1618
  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1619
    // Check for 8-bit immediate, and set sign extend bit in opcode
1620
    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1621
      emit_opcode(masm, $primary | 0x02);
1622
    }
1623
    else {                          // If 32-bit immediate
1624
      emit_opcode(masm, $primary);
1625
    }
1626
  %}
1627

1628
  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1629
    // Emit primary opcode and set sign-extend bit
1630
    // Check for 8-bit immediate, and set sign extend bit in opcode
1631
    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1632
      emit_opcode(masm, $primary | 0x02);    }
1633
    else {                          // If 32-bit immediate
1634
      emit_opcode(masm, $primary);
1635
    }
1636
    // Emit r/m byte with secondary opcode, after primary opcode.
1637
    emit_rm(masm, 0x3, $secondary, $dst$$reg);
1638
  %}
1639

1640
  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1641
    // Check for 8-bit immediate, and set sign extend bit in opcode
1642
    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1643
      $$$emit8$imm$$constant;
1644
    }
1645
    else {                          // If 32-bit immediate
1646
      // Output immediate
1647
      $$$emit32$imm$$constant;
1648
    }
1649
  %}
1650

1651
  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1652
    // Emit primary opcode and set sign-extend bit
1653
    // Check for 8-bit immediate, and set sign extend bit in opcode
1654
    int con = (int)$imm$$constant; // Throw away top bits
1655
    emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1656
    // Emit r/m byte with secondary opcode, after primary opcode.
1657
    emit_rm(masm, 0x3, $secondary, $dst$$reg);
1658
    if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
1659
    else                               emit_d32(masm,con);
1660
  %}
1661

1662
  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1663
    // Emit primary opcode and set sign-extend bit
1664
    // Check for 8-bit immediate, and set sign extend bit in opcode
1665
    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1666
    emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1667
    // Emit r/m byte with tertiary opcode, after primary opcode.
1668
    emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
1669
    if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
1670
    else                               emit_d32(masm,con);
1671
  %}
1672

1673
  enc_class OpcSReg (rRegI dst) %{    // BSWAP
1674
    emit_cc(masm, $secondary, $dst$$reg );
1675
  %}
1676

1677
  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1678
    int destlo = $dst$$reg;
1679
    int desthi = HIGH_FROM_LOW_ENC(destlo);
1680
    // bswap lo
1681
    emit_opcode(masm, 0x0F);
1682
    emit_cc(masm, 0xC8, destlo);
1683
    // bswap hi
1684
    emit_opcode(masm, 0x0F);
1685
    emit_cc(masm, 0xC8, desthi);
1686
    // xchg lo and hi
1687
    emit_opcode(masm, 0x87);
1688
    emit_rm(masm, 0x3, destlo, desthi);
1689
  %}
1690

1691
  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1692
    emit_rm(masm, 0x3, $secondary, $div$$reg );
1693
  %}
1694

1695
  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1696
    $$$emit8$primary;
1697
    emit_cc(masm, $secondary, $cop$$cmpcode);
1698
  %}
1699

1700
  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1701
    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1702
    emit_d8(masm, op >> 8 );
1703
    emit_d8(masm, op & 255);
1704
  %}
1705

1706
  // emulate a CMOV with a conditional branch around a MOV
1707
  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1708
    // Invert sense of branch from sense of CMOV
1709
    emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
1710
    emit_d8( masm, $brOffs$$constant );
1711
  %}
1712

1713
  enc_class enc_PartialSubtypeCheck( ) %{
1714
    Register Redi = as_Register(EDI_enc); // result register
1715
    Register Reax = as_Register(EAX_enc); // super class
1716
    Register Recx = as_Register(ECX_enc); // killed
1717
    Register Resi = as_Register(ESI_enc); // sub class
1718
    Label miss;
1719

1720
    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1721
                                     nullptr, &miss,
1722
                                     /*set_cond_codes:*/ true);
1723
    if ($primary) {
1724
      __ xorptr(Redi, Redi);
1725
    }
1726
    __ bind(miss);
1727
  %}
1728

1729
  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1730
    int start = __ offset();
1731
    if (UseSSE >= 2) {
1732
      if (VerifyFPU) {
1733
        __ verify_FPU(0, "must be empty in SSE2+ mode");
1734
      }
1735
    } else {
1736
      // External c_calling_convention expects the FPU stack to be 'clean'.
1737
      // Compiled code leaves it dirty.  Do cleanup now.
1738
      __ empty_FPU_stack();
1739
    }
1740
    if (sizeof_FFree_Float_Stack_All == -1) {
1741
      sizeof_FFree_Float_Stack_All = __ offset() - start;
1742
    } else {
1743
      assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1744
    }
1745
  %}
1746

1747
  enc_class Verify_FPU_For_Leaf %{
1748
    if( VerifyFPU ) {
1749
      __ verify_FPU( -3, "Returning from Runtime Leaf call");
1750
    }
1751
  %}
1752

1753
  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1754
    // This is the instruction starting address for relocation info.
1755
    __ set_inst_mark();
1756
    $$$emit8$primary;
1757
    // CALL directly to the runtime
1758
    emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1759
                runtime_call_Relocation::spec(), RELOC_IMM32 );
1760
    __ clear_inst_mark();
1761
    __ post_call_nop();
1762

1763
    if (UseSSE >= 2) {
1764
      BasicType rt = tf()->return_type();
1765

1766
      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1767
        // A C runtime call where the return value is unused.  In SSE2+
1768
        // mode the result needs to be removed from the FPU stack.  It's
1769
        // likely that this function call could be removed by the
1770
        // optimizer if the C function is a pure function.
1771
        __ ffree(0);
1772
      } else if (rt == T_FLOAT) {
1773
        __ lea(rsp, Address(rsp, -4));
1774
        __ fstp_s(Address(rsp, 0));
1775
        __ movflt(xmm0, Address(rsp, 0));
1776
        __ lea(rsp, Address(rsp,  4));
1777
      } else if (rt == T_DOUBLE) {
1778
        __ lea(rsp, Address(rsp, -8));
1779
        __ fstp_d(Address(rsp, 0));
1780
        __ movdbl(xmm0, Address(rsp, 0));
1781
        __ lea(rsp, Address(rsp,  8));
1782
      }
1783
    }
1784
  %}
1785

1786
  enc_class pre_call_resets %{
1787
    // If method sets FPU control word restore it here
1788
    debug_only(int off0 = __ offset());
1789
    if (ra_->C->in_24_bit_fp_mode()) {
1790
      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
1791
    }
1792
    // Clear upper bits of YMM registers when current compiled code uses
1793
    // wide vectors to avoid AVX <-> SSE transition penalty during call.
1794
    __ vzeroupper();
1795
    debug_only(int off1 = __ offset());
1796
    assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1797
  %}
1798

1799
  enc_class post_call_FPU %{
1800
    // If method sets FPU control word do it here also
1801
    if (Compile::current()->in_24_bit_fp_mode()) {
1802
      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
1803
    }
1804
  %}
1805

1806
  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1807
    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1808
    // who we intended to call.
1809
    __ set_inst_mark();
1810
    $$$emit8$primary;
1811

1812
    if (!_method) {
1813
      emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1814
                     runtime_call_Relocation::spec(),
1815
                     RELOC_IMM32);
1816
      __ clear_inst_mark();
1817
      __ post_call_nop();
1818
    } else {
1819
      int method_index = resolved_method_index(masm);
1820
      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1821
                                                  : static_call_Relocation::spec(method_index);
1822
      emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
1823
                     rspec, RELOC_DISP32);
1824
      __ post_call_nop();
1825
      address mark = __ inst_mark();
1826
      if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
1827
        // Calls of the same statically bound method can share
1828
        // a stub to the interpreter.
1829
        __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
1830
        __ clear_inst_mark();
1831
      } else {
1832
        // Emit stubs for static call.
1833
        address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
1834
        __ clear_inst_mark();
1835
        if (stub == nullptr) {
1836
          ciEnv::current()->record_failure("CodeCache is full");
1837
          return;
1838
        }
1839
      }
1840
    }
1841
  %}
1842

1843
  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1844
    __ ic_call((address)$meth$$method, resolved_method_index(masm));
1845
    __ post_call_nop();
1846
  %}
1847

1848
  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1849
    int disp = in_bytes(Method::from_compiled_offset());
1850
    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1851

1852
    // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1853
    __ set_inst_mark();
1854
    $$$emit8$primary;
1855
    emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
1856
    emit_d8(masm, disp);             // Displacement
1857
    __ clear_inst_mark();
1858
    __ post_call_nop();
1859
  %}
1860

1861
  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1862
    $$$emit8$primary;
1863
    emit_rm(masm, 0x3, $secondary, $dst$$reg);
1864
    $$$emit8$shift$$constant;
1865
  %}
1866

1867
  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1868
    // Load immediate does not have a zero or sign extended version
1869
    // for 8-bit immediates
1870
    emit_opcode(masm, 0xB8 + $dst$$reg);
1871
    $$$emit32$src$$constant;
1872
  %}
1873

1874
  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1875
    // Load immediate does not have a zero or sign extended version
1876
    // for 8-bit immediates
1877
    emit_opcode(masm, $primary + $dst$$reg);
1878
    $$$emit32$src$$constant;
1879
  %}
1880

1881
  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1882
    // Load immediate does not have a zero or sign extended version
1883
    // for 8-bit immediates
1884
    int dst_enc = $dst$$reg;
1885
    int src_con = $src$$constant & 0x0FFFFFFFFL;
1886
    if (src_con == 0) {
1887
      // xor dst, dst
1888
      emit_opcode(masm, 0x33);
1889
      emit_rm(masm, 0x3, dst_enc, dst_enc);
1890
    } else {
1891
      emit_opcode(masm, $primary + dst_enc);
1892
      emit_d32(masm, src_con);
1893
    }
1894
  %}
1895

1896
  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1897
    // Load immediate does not have a zero or sign extended version
1898
    // for 8-bit immediates
1899
    int dst_enc = $dst$$reg + 2;
1900
    int src_con = ((julong)($src$$constant)) >> 32;
1901
    if (src_con == 0) {
1902
      // xor dst, dst
1903
      emit_opcode(masm, 0x33);
1904
      emit_rm(masm, 0x3, dst_enc, dst_enc);
1905
    } else {
1906
      emit_opcode(masm, $primary + dst_enc);
1907
      emit_d32(masm, src_con);
1908
    }
1909
  %}
1910

1911

1912
  // Encode a reg-reg copy.  If it is useless, then empty encoding.
1913
  enc_class enc_Copy( rRegI dst, rRegI src ) %{
1914
    encode_Copy( masm, $dst$$reg, $src$$reg );
1915
  %}
1916

1917
  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1918
    encode_Copy( masm, $dst$$reg, $src$$reg );
1919
  %}
1920

1921
  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1922
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1923
  %}
1924

1925
  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
1926
    $$$emit8$primary;
1927
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1928
  %}
1929

1930
  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
1931
    $$$emit8$secondary;
1932
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1933
  %}
1934

1935
  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
1936
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
1937
  %}
1938

1939
  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
1940
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
1941
  %}
1942

1943
  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
1944
    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
1945
  %}
1946

1947
  enc_class Con32 (immI src) %{    // Con32(storeImmI)
1948
    // Output immediate
1949
    $$$emit32$src$$constant;
1950
  %}
1951

1952
  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
1953
    // Output Float immediate bits
1954
    jfloat jf = $src$$constant;
1955
    int    jf_as_bits = jint_cast( jf );
1956
    emit_d32(masm, jf_as_bits);
1957
  %}
1958

1959
  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
1960
    // Output Float immediate bits
1961
    jfloat jf = $src$$constant;
1962
    int    jf_as_bits = jint_cast( jf );
1963
    emit_d32(masm, jf_as_bits);
1964
  %}
1965

1966
  enc_class Con16 (immI src) %{    // Con16(storeImmI)
1967
    // Output immediate
1968
    $$$emit16$src$$constant;
1969
  %}
1970

1971
  enc_class Con_d32(immI src) %{
1972
    emit_d32(masm,$src$$constant);
1973
  %}
1974

1975
  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
1976
    // Output immediate memory reference
1977
    emit_rm(masm, 0x00, $t1$$reg, 0x05 );
1978
    emit_d32(masm, 0x00);
1979
  %}
1980

1981
  enc_class lock_prefix( ) %{
1982
    emit_opcode(masm,0xF0);         // [Lock]
1983
  %}
1984

1985
  // Cmp-xchg long value.
1986
  // Note: we need to swap rbx, and rcx before and after the
1987
  //       cmpxchg8 instruction because the instruction uses
1988
  //       rcx as the high order word of the new value to store but
1989
  //       our register encoding uses rbx,.
1990
  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
1991

1992
    // XCHG  rbx,ecx
1993
    emit_opcode(masm,0x87);
1994
    emit_opcode(masm,0xD9);
1995
    // [Lock]
1996
    emit_opcode(masm,0xF0);
1997
    // CMPXCHG8 [Eptr]
1998
    emit_opcode(masm,0x0F);
1999
    emit_opcode(masm,0xC7);
2000
    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2001
    // XCHG  rbx,ecx
2002
    emit_opcode(masm,0x87);
2003
    emit_opcode(masm,0xD9);
2004
  %}
2005

2006
  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2007
    // [Lock]
2008
    emit_opcode(masm,0xF0);
2009

2010
    // CMPXCHG [Eptr]
2011
    emit_opcode(masm,0x0F);
2012
    emit_opcode(masm,0xB1);
2013
    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2014
  %}
2015

2016
  enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2017
    // [Lock]
2018
    emit_opcode(masm,0xF0);
2019

2020
    // CMPXCHGB [Eptr]
2021
    emit_opcode(masm,0x0F);
2022
    emit_opcode(masm,0xB0);
2023
    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2024
  %}
2025

2026
  enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2027
    // [Lock]
2028
    emit_opcode(masm,0xF0);
2029

2030
    // 16-bit mode
2031
    emit_opcode(masm, 0x66);
2032

2033
    // CMPXCHGW [Eptr]
2034
    emit_opcode(masm,0x0F);
2035
    emit_opcode(masm,0xB1);
2036
    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
2037
  %}
2038

2039
  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2040
    int res_encoding = $res$$reg;
2041

2042
    // MOV  res,0
2043
    emit_opcode( masm, 0xB8 + res_encoding);
2044
    emit_d32( masm, 0 );
2045
    // JNE,s  fail
2046
    emit_opcode(masm,0x75);
2047
    emit_d8(masm, 5 );
2048
    // MOV  res,1
2049
    emit_opcode( masm, 0xB8 + res_encoding);
2050
    emit_d32( masm, 1 );
2051
    // fail:
2052
  %}
2053

2054
  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2055
    int reg_encoding = $ereg$$reg;
2056
    int base  = $mem$$base;
2057
    int index = $mem$$index;
2058
    int scale = $mem$$scale;
2059
    int displace = $mem$$disp;
2060
    relocInfo::relocType disp_reloc = $mem->disp_reloc();
2061
    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2062
  %}
2063

2064
  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2065
    int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
2066
    int base  = $mem$$base;
2067
    int index = $mem$$index;
2068
    int scale = $mem$$scale;
2069
    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2070
    assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2071
    encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
2072
  %}
2073

2074
  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2075
    int r1, r2;
2076
    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2077
    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2078
    emit_opcode(masm,0x0F);
2079
    emit_opcode(masm,$tertiary);
2080
    emit_rm(masm, 0x3, r1, r2);
2081
    emit_d8(masm,$cnt$$constant);
2082
    emit_d8(masm,$primary);
2083
    emit_rm(masm, 0x3, $secondary, r1);
2084
    emit_d8(masm,$cnt$$constant);
2085
  %}
2086

2087
  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2088
    emit_opcode( masm, 0x8B ); // Move
2089
    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2090
    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2091
      emit_d8(masm,$primary);
2092
      emit_rm(masm, 0x3, $secondary, $dst$$reg);
2093
      emit_d8(masm,$cnt$$constant-32);
2094
    }
2095
    emit_d8(masm,$primary);
2096
    emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
2097
    emit_d8(masm,31);
2098
  %}
2099

2100
  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2101
    int r1, r2;
2102
    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
2103
    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
2104

2105
    emit_opcode( masm, 0x8B ); // Move r1,r2
2106
    emit_rm(masm, 0x3, r1, r2);
2107
    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2108
      emit_opcode(masm,$primary);
2109
      emit_rm(masm, 0x3, $secondary, r1);
2110
      emit_d8(masm,$cnt$$constant-32);
2111
    }
2112
    emit_opcode(masm,0x33);  // XOR r2,r2
2113
    emit_rm(masm, 0x3, r2, r2);
2114
  %}
2115

2116
  // Clone of RegMem but accepts an extra parameter to access each
2117
  // half of a double in memory; it never needs relocation info.
2118
  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2119
    emit_opcode(masm,$opcode$$constant);
2120
    int reg_encoding = $rm_reg$$reg;
2121
    int base     = $mem$$base;
2122
    int index    = $mem$$index;
2123
    int scale    = $mem$$scale;
2124
    int displace = $mem$$disp + $disp_for_half$$constant;
2125
    relocInfo::relocType disp_reloc = relocInfo::none;
2126
    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2127
  %}
2128

2129
  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2130
  //
2131
  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2132
  // and it never needs relocation information.
2133
  // Frequently used to move data between FPU's Stack Top and memory.
2134
  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2135
    int rm_byte_opcode = $rm_opcode$$constant;
2136
    int base     = $mem$$base;
2137
    int index    = $mem$$index;
2138
    int scale    = $mem$$scale;
2139
    int displace = $mem$$disp;
2140
    assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2141
    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2142
  %}
2143

2144
  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2145
    int rm_byte_opcode = $rm_opcode$$constant;
2146
    int base     = $mem$$base;
2147
    int index    = $mem$$index;
2148
    int scale    = $mem$$scale;
2149
    int displace = $mem$$disp;
2150
    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2151
    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2152
  %}
2153

2154
  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2155
    int reg_encoding = $dst$$reg;
2156
    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2157
    int index        = 0x04;            // 0x04 indicates no index
2158
    int scale        = 0x00;            // 0x00 indicates no scale
2159
    int displace     = $src1$$constant; // 0x00 indicates no displacement
2160
    relocInfo::relocType disp_reloc = relocInfo::none;
2161
    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2162
  %}
2163

2164
  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2165
    // Compare dst,src
2166
    emit_opcode(masm,0x3B);
2167
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2168
    // jmp dst < src around move
2169
    emit_opcode(masm,0x7C);
2170
    emit_d8(masm,2);
2171
    // move dst,src
2172
    emit_opcode(masm,0x8B);
2173
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2174
  %}
2175

2176
  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2177
    // Compare dst,src
2178
    emit_opcode(masm,0x3B);
2179
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2180
    // jmp dst > src around move
2181
    emit_opcode(masm,0x7F);
2182
    emit_d8(masm,2);
2183
    // move dst,src
2184
    emit_opcode(masm,0x8B);
2185
    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
2186
  %}
2187

2188
  enc_class enc_FPR_store(memory mem, regDPR src) %{
2189
    // If src is FPR1, we can just FST to store it.
2190
    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2191
    int reg_encoding = 0x2; // Just store
2192
    int base  = $mem$$base;
2193
    int index = $mem$$index;
2194
    int scale = $mem$$scale;
2195
    int displace = $mem$$disp;
2196
    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2197
    if( $src$$reg != FPR1L_enc ) {
2198
      reg_encoding = 0x3;  // Store & pop
2199
      emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
2200
      emit_d8( masm, 0xC0-1+$src$$reg );
2201
    }
2202
    __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
2203
    emit_opcode(masm,$primary);
2204
    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
2205
    __ clear_inst_mark();
2206
  %}
2207

2208
  enc_class neg_reg(rRegI dst) %{
2209
    // NEG $dst
2210
    emit_opcode(masm,0xF7);
2211
    emit_rm(masm, 0x3, 0x03, $dst$$reg );
2212
  %}
2213

2214
  enc_class setLT_reg(eCXRegI dst) %{
2215
    // SETLT $dst
2216
    emit_opcode(masm,0x0F);
2217
    emit_opcode(masm,0x9C);
2218
    emit_rm( masm, 0x3, 0x4, $dst$$reg );
2219
  %}
2220

2221
  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2222
    int tmpReg = $tmp$$reg;
2223

2224
    // SUB $p,$q
2225
    emit_opcode(masm,0x2B);
2226
    emit_rm(masm, 0x3, $p$$reg, $q$$reg);
2227
    // SBB $tmp,$tmp
2228
    emit_opcode(masm,0x1B);
2229
    emit_rm(masm, 0x3, tmpReg, tmpReg);
2230
    // AND $tmp,$y
2231
    emit_opcode(masm,0x23);
2232
    emit_rm(masm, 0x3, tmpReg, $y$$reg);
2233
    // ADD $p,$tmp
2234
    emit_opcode(masm,0x03);
2235
    emit_rm(masm, 0x3, $p$$reg, tmpReg);
2236
  %}
2237

2238
  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2239
    // TEST shift,32
2240
    emit_opcode(masm,0xF7);
2241
    emit_rm(masm, 0x3, 0, ECX_enc);
2242
    emit_d32(masm,0x20);
2243
    // JEQ,s small
2244
    emit_opcode(masm, 0x74);
2245
    emit_d8(masm, 0x04);
2246
    // MOV    $dst.hi,$dst.lo
2247
    emit_opcode( masm, 0x8B );
2248
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2249
    // CLR    $dst.lo
2250
    emit_opcode(masm, 0x33);
2251
    emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
2252
// small:
2253
    // SHLD   $dst.hi,$dst.lo,$shift
2254
    emit_opcode(masm,0x0F);
2255
    emit_opcode(masm,0xA5);
2256
    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
2257
    // SHL    $dst.lo,$shift"
2258
    emit_opcode(masm,0xD3);
2259
    emit_rm(masm, 0x3, 0x4, $dst$$reg );
2260
  %}
2261

2262
  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2263
    // TEST shift,32
2264
    emit_opcode(masm,0xF7);
2265
    emit_rm(masm, 0x3, 0, ECX_enc);
2266
    emit_d32(masm,0x20);
2267
    // JEQ,s small
2268
    emit_opcode(masm, 0x74);
2269
    emit_d8(masm, 0x04);
2270
    // MOV    $dst.lo,$dst.hi
2271
    emit_opcode( masm, 0x8B );
2272
    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2273
    // CLR    $dst.hi
2274
    emit_opcode(masm, 0x33);
2275
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
2276
// small:
2277
    // SHRD   $dst.lo,$dst.hi,$shift
2278
    emit_opcode(masm,0x0F);
2279
    emit_opcode(masm,0xAD);
2280
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2281
    // SHR    $dst.hi,$shift"
2282
    emit_opcode(masm,0xD3);
2283
    emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
2284
  %}
2285

2286
  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2287
    // TEST shift,32
2288
    emit_opcode(masm,0xF7);
2289
    emit_rm(masm, 0x3, 0, ECX_enc);
2290
    emit_d32(masm,0x20);
2291
    // JEQ,s small
2292
    emit_opcode(masm, 0x74);
2293
    emit_d8(masm, 0x05);
2294
    // MOV    $dst.lo,$dst.hi
2295
    emit_opcode( masm, 0x8B );
2296
    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2297
    // SAR    $dst.hi,31
2298
    emit_opcode(masm, 0xC1);
2299
    emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
2300
    emit_d8(masm, 0x1F );
2301
// small:
2302
    // SHRD   $dst.lo,$dst.hi,$shift
2303
    emit_opcode(masm,0x0F);
2304
    emit_opcode(masm,0xAD);
2305
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
2306
    // SAR    $dst.hi,$shift"
2307
    emit_opcode(masm,0xD3);
2308
    emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
2309
  %}
2310

2311

2312
  // ----------------- Encodings for floating point unit -----------------
2313
  // May leave result in FPU-TOS or FPU reg depending on opcodes
2314
  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2315
    $$$emit8$primary;
2316
    emit_rm(masm, 0x3, $secondary, $src$$reg );
2317
  %}
2318

2319
  // Pop argument in FPR0 with FSTP ST(0)
2320
  enc_class PopFPU() %{
2321
    emit_opcode( masm, 0xDD );
2322
    emit_d8( masm, 0xD8 );
2323
  %}
2324

2325
  // !!!!! equivalent to Pop_Reg_F
2326
  enc_class Pop_Reg_DPR( regDPR dst ) %{
2327
    emit_opcode( masm, 0xDD );           // FSTP   ST(i)
2328
    emit_d8( masm, 0xD8+$dst$$reg );
2329
  %}
2330

2331
  enc_class Push_Reg_DPR( regDPR dst ) %{
2332
    emit_opcode( masm, 0xD9 );
2333
    emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2334
  %}
2335

2336
  enc_class strictfp_bias1( regDPR dst ) %{
2337
    emit_opcode( masm, 0xDB );           // FLD m80real
2338
    emit_opcode( masm, 0x2D );
2339
    emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
2340
    emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
2341
    emit_opcode( masm, 0xC8+$dst$$reg );
2342
  %}
2343

2344
  enc_class strictfp_bias2( regDPR dst ) %{
2345
    emit_opcode( masm, 0xDB );           // FLD m80real
2346
    emit_opcode( masm, 0x2D );
2347
    emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
2348
    emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
2349
    emit_opcode( masm, 0xC8+$dst$$reg );
2350
  %}
2351

2352
  // Special case for moving an integer register to a stack slot.
2353
  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2354
    store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
2355
  %}
2356

2357
  // Special case for moving a register to a stack slot.
2358
  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2359
    // Opcode already emitted
2360
    emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
2361
    emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
2362
    emit_d32(masm, $dst$$disp);   // Displacement
2363
  %}
2364

2365
  // Push the integer in stackSlot 'src' onto FP-stack
2366
  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2367
    store_to_stackslot( masm, $primary, $secondary, $src$$disp );
2368
  %}
2369

2370
  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2371
  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2372
    store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
2373
  %}
2374

2375
  // Same as Pop_Mem_F except for opcode
2376
  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2377
  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2378
    store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
2379
  %}
2380

2381
  enc_class Pop_Reg_FPR( regFPR dst ) %{
2382
    emit_opcode( masm, 0xDD );           // FSTP   ST(i)
2383
    emit_d8( masm, 0xD8+$dst$$reg );
2384
  %}
2385

2386
  enc_class Push_Reg_FPR( regFPR dst ) %{
2387
    emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
2388
    emit_d8( masm, 0xC0-1+$dst$$reg );
2389
  %}
2390

2391
  // Push FPU's float to a stack-slot, and pop FPU-stack
2392
  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2393
    int pop = 0x02;
2394
    if ($src$$reg != FPR1L_enc) {
2395
      emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
2396
      emit_d8( masm, 0xC0-1+$src$$reg );
2397
      pop = 0x03;
2398
    }
2399
    store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2400
  %}
2401

2402
  // Push FPU's double to a stack-slot, and pop FPU-stack
2403
  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2404
    int pop = 0x02;
2405
    if ($src$$reg != FPR1L_enc) {
2406
      emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
2407
      emit_d8( masm, 0xC0-1+$src$$reg );
2408
      pop = 0x03;
2409
    }
2410
    store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2411
  %}
2412

2413
  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2414
  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2415
    int pop = 0xD0 - 1; // -1 since we skip FLD
2416
    if ($src$$reg != FPR1L_enc) {
2417
      emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
2418
      emit_d8( masm, 0xC0-1+$src$$reg );
2419
      pop = 0xD8;
2420
    }
2421
    emit_opcode( masm, 0xDD );
2422
    emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
2423
  %}
2424

2425

2426
  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2427
    // load dst in FPR0
2428
    emit_opcode( masm, 0xD9 );
2429
    emit_d8( masm, 0xC0-1+$dst$$reg );
2430
    if ($src$$reg != FPR1L_enc) {
2431
      // fincstp
2432
      emit_opcode (masm, 0xD9);
2433
      emit_opcode (masm, 0xF7);
2434
      // swap src with FPR1:
2435
      // FXCH FPR1 with src
2436
      emit_opcode(masm, 0xD9);
2437
      emit_d8(masm, 0xC8-1+$src$$reg );
2438
      // fdecstp
2439
      emit_opcode (masm, 0xD9);
2440
      emit_opcode (masm, 0xF6);
2441
    }
2442
  %}
2443

2444
  enc_class Push_ModD_encoding(regD src0, regD src1) %{
2445
    __ subptr(rsp, 8);
2446
    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2447
    __ fld_d(Address(rsp, 0));
2448
    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2449
    __ fld_d(Address(rsp, 0));
2450
  %}
2451

2452
  enc_class Push_ModF_encoding(regF src0, regF src1) %{
2453
    __ subptr(rsp, 4);
2454
    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2455
    __ fld_s(Address(rsp, 0));
2456
    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2457
    __ fld_s(Address(rsp, 0));
2458
  %}
2459

2460
  enc_class Push_ResultD(regD dst) %{
2461
    __ fstp_d(Address(rsp, 0));
2462
    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2463
    __ addptr(rsp, 8);
2464
  %}
2465

2466
  enc_class Push_ResultF(regF dst, immI d8) %{
2467
    __ fstp_s(Address(rsp, 0));
2468
    __ movflt($dst$$XMMRegister, Address(rsp, 0));
2469
    __ addptr(rsp, $d8$$constant);
2470
  %}
2471

2472
  enc_class Push_SrcD(regD src) %{
2473
    __ subptr(rsp, 8);
2474
    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2475
    __ fld_d(Address(rsp, 0));
2476
  %}
2477

2478
  enc_class push_stack_temp_qword() %{
2479
    __ subptr(rsp, 8);
2480
  %}
2481

2482
  enc_class pop_stack_temp_qword() %{
2483
    __ addptr(rsp, 8);
2484
  %}
2485

2486
  enc_class push_xmm_to_fpr1(regD src) %{
2487
    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2488
    __ fld_d(Address(rsp, 0));
2489
  %}
2490

2491
  enc_class Push_Result_Mod_DPR( regDPR src) %{
2492
    if ($src$$reg != FPR1L_enc) {
2493
      // fincstp
2494
      emit_opcode (masm, 0xD9);
2495
      emit_opcode (masm, 0xF7);
2496
      // FXCH FPR1 with src
2497
      emit_opcode(masm, 0xD9);
2498
      emit_d8(masm, 0xC8-1+$src$$reg );
2499
      // fdecstp
2500
      emit_opcode (masm, 0xD9);
2501
      emit_opcode (masm, 0xF6);
2502
    }
2503
  %}
2504

2505
  enc_class fnstsw_sahf_skip_parity() %{
2506
    // fnstsw ax
2507
    emit_opcode( masm, 0xDF );
2508
    emit_opcode( masm, 0xE0 );
2509
    // sahf
2510
    emit_opcode( masm, 0x9E );
2511
    // jnp  ::skip
2512
    emit_opcode( masm, 0x7B );
2513
    emit_opcode( masm, 0x05 );
2514
  %}
2515

2516
  enc_class emitModDPR() %{
2517
    // fprem must be iterative
2518
    // :: loop
2519
    // fprem
2520
    emit_opcode( masm, 0xD9 );
2521
    emit_opcode( masm, 0xF8 );
2522
    // wait
2523
    emit_opcode( masm, 0x9b );
2524
    // fnstsw ax
2525
    emit_opcode( masm, 0xDF );
2526
    emit_opcode( masm, 0xE0 );
2527
    // sahf
2528
    emit_opcode( masm, 0x9E );
2529
    // jp  ::loop
2530
    emit_opcode( masm, 0x0F );
2531
    emit_opcode( masm, 0x8A );
2532
    emit_opcode( masm, 0xF4 );
2533
    emit_opcode( masm, 0xFF );
2534
    emit_opcode( masm, 0xFF );
2535
    emit_opcode( masm, 0xFF );
2536
  %}
2537

2538
  enc_class fpu_flags() %{
2539
    // fnstsw_ax
2540
    emit_opcode( masm, 0xDF);
2541
    emit_opcode( masm, 0xE0);
2542
    // test ax,0x0400
2543
    emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
2544
    emit_opcode( masm, 0xA9 );
2545
    emit_d16   ( masm, 0x0400 );
2546
    // // // This sequence works, but stalls for 12-16 cycles on PPro
2547
    // // test rax,0x0400
2548
    // emit_opcode( masm, 0xA9 );
2549
    // emit_d32   ( masm, 0x00000400 );
2550
    //
2551
    // jz exit (no unordered comparison)
2552
    emit_opcode( masm, 0x74 );
2553
    emit_d8    ( masm, 0x02 );
2554
    // mov ah,1 - treat as LT case (set carry flag)
2555
    emit_opcode( masm, 0xB4 );
2556
    emit_d8    ( masm, 0x01 );
2557
    // sahf
2558
    emit_opcode( masm, 0x9E);
2559
  %}
2560

2561
  enc_class cmpF_P6_fixup() %{
2562
    // Fixup the integer flags in case comparison involved a NaN
2563
    //
2564
    // JNP exit (no unordered comparison, P-flag is set by NaN)
2565
    emit_opcode( masm, 0x7B );
2566
    emit_d8    ( masm, 0x03 );
2567
    // MOV AH,1 - treat as LT case (set carry flag)
2568
    emit_opcode( masm, 0xB4 );
2569
    emit_d8    ( masm, 0x01 );
2570
    // SAHF
2571
    emit_opcode( masm, 0x9E);
2572
    // NOP     // target for branch to avoid branch to branch
2573
    emit_opcode( masm, 0x90);
2574
  %}
2575

2576
//     fnstsw_ax();
2577
//     sahf();
2578
//     movl(dst, nan_result);
2579
//     jcc(Assembler::parity, exit);
2580
//     movl(dst, less_result);
2581
//     jcc(Assembler::below, exit);
2582
//     movl(dst, equal_result);
2583
//     jcc(Assembler::equal, exit);
2584
//     movl(dst, greater_result);
2585

2586
// less_result     =  1;
2587
// greater_result  = -1;
2588
// equal_result    = 0;
2589
// nan_result      = -1;
2590

2591
  enc_class CmpF_Result(rRegI dst) %{
2592
    // fnstsw_ax();
2593
    emit_opcode( masm, 0xDF);
2594
    emit_opcode( masm, 0xE0);
2595
    // sahf
2596
    emit_opcode( masm, 0x9E);
2597
    // movl(dst, nan_result);
2598
    emit_opcode( masm, 0xB8 + $dst$$reg);
2599
    emit_d32( masm, -1 );
2600
    // jcc(Assembler::parity, exit);
2601
    emit_opcode( masm, 0x7A );
2602
    emit_d8    ( masm, 0x13 );
2603
    // movl(dst, less_result);
2604
    emit_opcode( masm, 0xB8 + $dst$$reg);
2605
    emit_d32( masm, -1 );
2606
    // jcc(Assembler::below, exit);
2607
    emit_opcode( masm, 0x72 );
2608
    emit_d8    ( masm, 0x0C );
2609
    // movl(dst, equal_result);
2610
    emit_opcode( masm, 0xB8 + $dst$$reg);
2611
    emit_d32( masm, 0 );
2612
    // jcc(Assembler::equal, exit);
2613
    emit_opcode( masm, 0x74 );
2614
    emit_d8    ( masm, 0x05 );
2615
    // movl(dst, greater_result);
2616
    emit_opcode( masm, 0xB8 + $dst$$reg);
2617
    emit_d32( masm, 1 );
2618
  %}
2619

2620

2621
  // Compare the longs and set flags
2622
  // BROKEN!  Do Not use as-is
2623
  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2624
    // CMP    $src1.hi,$src2.hi
2625
    emit_opcode( masm, 0x3B );
2626
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2627
    // JNE,s  done
2628
    emit_opcode(masm,0x75);
2629
    emit_d8(masm, 2 );
2630
    // CMP    $src1.lo,$src2.lo
2631
    emit_opcode( masm, 0x3B );
2632
    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2633
// done:
2634
  %}
2635

2636
  enc_class convert_int_long( regL dst, rRegI src ) %{
2637
    // mov $dst.lo,$src
2638
    int dst_encoding = $dst$$reg;
2639
    int src_encoding = $src$$reg;
2640
    encode_Copy( masm, dst_encoding  , src_encoding );
2641
    // mov $dst.hi,$src
2642
    encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
2643
    // sar $dst.hi,31
2644
    emit_opcode( masm, 0xC1 );
2645
    emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
2646
    emit_d8(masm, 0x1F );
2647
  %}
2648

2649
  enc_class convert_long_double( eRegL src ) %{
2650
    // push $src.hi
2651
    emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2652
    // push $src.lo
2653
    emit_opcode(masm, 0x50+$src$$reg  );
2654
    // fild 64-bits at [SP]
2655
    emit_opcode(masm,0xdf);
2656
    emit_d8(masm, 0x6C);
2657
    emit_d8(masm, 0x24);
2658
    emit_d8(masm, 0x00);
2659
    // pop stack
2660
    emit_opcode(masm, 0x83); // add  SP, #8
2661
    emit_rm(masm, 0x3, 0x00, ESP_enc);
2662
    emit_d8(masm, 0x8);
2663
  %}
2664

2665
  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2666
    // IMUL   EDX:EAX,$src1
2667
    emit_opcode( masm, 0xF7 );
2668
    emit_rm( masm, 0x3, 0x5, $src1$$reg );
2669
    // SAR    EDX,$cnt-32
2670
    int shift_count = ((int)$cnt$$constant) - 32;
2671
    if (shift_count > 0) {
2672
      emit_opcode(masm, 0xC1);
2673
      emit_rm(masm, 0x3, 7, $dst$$reg );
2674
      emit_d8(masm, shift_count);
2675
    }
2676
  %}
2677

2678
  // this version doesn't have add sp, 8
2679
  enc_class convert_long_double2( eRegL src ) %{
2680
    // push $src.hi
2681
    emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
2682
    // push $src.lo
2683
    emit_opcode(masm, 0x50+$src$$reg  );
2684
    // fild 64-bits at [SP]
2685
    emit_opcode(masm,0xdf);
2686
    emit_d8(masm, 0x6C);
2687
    emit_d8(masm, 0x24);
2688
    emit_d8(masm, 0x00);
2689
  %}
2690

2691
  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2692
    // Basic idea: long = (long)int * (long)int
2693
    // IMUL EDX:EAX, src
2694
    emit_opcode( masm, 0xF7 );
2695
    emit_rm( masm, 0x3, 0x5, $src$$reg);
2696
  %}
2697

2698
  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2699
    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2700
    // MUL EDX:EAX, src
2701
    emit_opcode( masm, 0xF7 );
2702
    emit_rm( masm, 0x3, 0x4, $src$$reg);
2703
  %}
2704

2705
  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2706
    // Basic idea: lo(result) = lo(x_lo * y_lo)
2707
    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2708
    // MOV    $tmp,$src.lo
2709
    encode_Copy( masm, $tmp$$reg, $src$$reg );
2710
    // IMUL   $tmp,EDX
2711
    emit_opcode( masm, 0x0F );
2712
    emit_opcode( masm, 0xAF );
2713
    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2714
    // MOV    EDX,$src.hi
2715
    encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
2716
    // IMUL   EDX,EAX
2717
    emit_opcode( masm, 0x0F );
2718
    emit_opcode( masm, 0xAF );
2719
    emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
2720
    // ADD    $tmp,EDX
2721
    emit_opcode( masm, 0x03 );
2722
    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2723
    // MUL   EDX:EAX,$src.lo
2724
    emit_opcode( masm, 0xF7 );
2725
    emit_rm( masm, 0x3, 0x4, $src$$reg );
2726
    // ADD    EDX,ESI
2727
    emit_opcode( masm, 0x03 );
2728
    emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
2729
  %}
2730

2731
  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2732
    // Basic idea: lo(result) = lo(src * y_lo)
2733
    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2734
    // IMUL   $tmp,EDX,$src
2735
    emit_opcode( masm, 0x6B );
2736
    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
2737
    emit_d8( masm, (int)$src$$constant );
2738
    // MOV    EDX,$src
2739
    emit_opcode(masm, 0xB8 + EDX_enc);
2740
    emit_d32( masm, (int)$src$$constant );
2741
    // MUL   EDX:EAX,EDX
2742
    emit_opcode( masm, 0xF7 );
2743
    emit_rm( masm, 0x3, 0x4, EDX_enc );
2744
    // ADD    EDX,ESI
2745
    emit_opcode( masm, 0x03 );
2746
    emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
2747
  %}
2748

2749
  enc_class long_div( eRegL src1, eRegL src2 ) %{
2750
    // PUSH src1.hi
2751
    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2752
    // PUSH src1.lo
2753
    emit_opcode(masm,               0x50+$src1$$reg  );
2754
    // PUSH src2.hi
2755
    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2756
    // PUSH src2.lo
2757
    emit_opcode(masm,               0x50+$src2$$reg  );
2758
    // CALL directly to the runtime
2759
    __ set_inst_mark();
2760
    emit_opcode(masm,0xE8);       // Call into runtime
2761
    emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2762
    __ clear_inst_mark();
2763
    __ post_call_nop();
2764
    // Restore stack
2765
    emit_opcode(masm, 0x83); // add  SP, #framesize
2766
    emit_rm(masm, 0x3, 0x00, ESP_enc);
2767
    emit_d8(masm, 4*4);
2768
  %}
2769

2770
  enc_class long_mod( eRegL src1, eRegL src2 ) %{
2771
    // PUSH src1.hi
2772
    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
2773
    // PUSH src1.lo
2774
    emit_opcode(masm,               0x50+$src1$$reg  );
2775
    // PUSH src2.hi
2776
    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
2777
    // PUSH src2.lo
2778
    emit_opcode(masm,               0x50+$src2$$reg  );
2779
    // CALL directly to the runtime
2780
    __ set_inst_mark();
2781
    emit_opcode(masm,0xE8);       // Call into runtime
2782
    emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2783
    __ clear_inst_mark();
2784
    __ post_call_nop();
2785
    // Restore stack
2786
    emit_opcode(masm, 0x83); // add  SP, #framesize
2787
    emit_rm(masm, 0x3, 0x00, ESP_enc);
2788
    emit_d8(masm, 4*4);
2789
  %}
2790

2791
  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2792
    // MOV   $tmp,$src.lo
2793
    emit_opcode(masm, 0x8B);
2794
    emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
2795
    // OR    $tmp,$src.hi
2796
    emit_opcode(masm, 0x0B);
2797
    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
2798
  %}
2799

2800
  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2801
    // CMP    $src1.lo,$src2.lo
2802
    emit_opcode( masm, 0x3B );
2803
    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2804
    // JNE,s  skip
2805
    emit_cc(masm, 0x70, 0x5);
2806
    emit_d8(masm,2);
2807
    // CMP    $src1.hi,$src2.hi
2808
    emit_opcode( masm, 0x3B );
2809
    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
2810
  %}
2811

2812
  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2813
    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2814
    emit_opcode( masm, 0x3B );
2815
    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
2816
    // MOV    $tmp,$src1.hi
2817
    emit_opcode( masm, 0x8B );
2818
    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
2819
    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2820
    emit_opcode( masm, 0x1B );
2821
    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
2822
  %}
2823

2824
  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2825
    // XOR    $tmp,$tmp
2826
    emit_opcode(masm,0x33);  // XOR
2827
    emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
2828
    // CMP    $tmp,$src.lo
2829
    emit_opcode( masm, 0x3B );
2830
    emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
2831
    // SBB    $tmp,$src.hi
2832
    emit_opcode( masm, 0x1B );
2833
    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
2834
  %}
2835

2836
 // Sniff, sniff... smells like Gnu Superoptimizer
2837
  enc_class neg_long( eRegL dst ) %{
2838
    emit_opcode(masm,0xF7);    // NEG hi
2839
    emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2840
    emit_opcode(masm,0xF7);    // NEG lo
2841
    emit_rm    (masm,0x3, 0x3,               $dst$$reg );
2842
    emit_opcode(masm,0x83);    // SBB hi,0
2843
    emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
2844
    emit_d8    (masm,0 );
2845
  %}
2846

2847
  enc_class enc_pop_rdx() %{
2848
    emit_opcode(masm,0x5A);
2849
  %}
2850

2851
  enc_class enc_rethrow() %{
2852
    __ set_inst_mark();
2853
    emit_opcode(masm, 0xE9);        // jmp    entry
2854
    emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
2855
                   runtime_call_Relocation::spec(), RELOC_IMM32 );
2856
    __ clear_inst_mark();
2857
    __ post_call_nop();
2858
  %}
2859

2860

2861
  // Convert a double to an int.  Java semantics require we do complex
2862
  // manglelations in the corner cases.  So we set the rounding mode to
2863
  // 'zero', store the darned double down as an int, and reset the
2864
  // rounding mode to 'nearest'.  The hardware throws an exception which
2865
  // patches up the correct value directly to the stack.
2866
  enc_class DPR2I_encoding( regDPR src ) %{
2867
    // Flip to round-to-zero mode.  We attempted to allow invalid-op
2868
    // exceptions here, so that a NAN or other corner-case value will
2869
    // thrown an exception (but normal values get converted at full speed).
2870
    // However, I2C adapters and other float-stack manglers leave pending
2871
    // invalid-op exceptions hanging.  We would have to clear them before
2872
    // enabling them and that is more expensive than just testing for the
2873
    // invalid value Intel stores down in the corner cases.
2874
    emit_opcode(masm,0xD9);            // FLDCW  trunc
2875
    emit_opcode(masm,0x2D);
2876
    emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2877
    // Allocate a word
2878
    emit_opcode(masm,0x83);            // SUB ESP,4
2879
    emit_opcode(masm,0xEC);
2880
    emit_d8(masm,0x04);
2881
    // Encoding assumes a double has been pushed into FPR0.
2882
    // Store down the double as an int, popping the FPU stack
2883
    emit_opcode(masm,0xDB);            // FISTP [ESP]
2884
    emit_opcode(masm,0x1C);
2885
    emit_d8(masm,0x24);
2886
    // Restore the rounding mode; mask the exception
2887
    emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
2888
    emit_opcode(masm,0x2D);
2889
    emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
2890
        ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2891
        : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2892

2893
    // Load the converted int; adjust CPU stack
2894
    emit_opcode(masm,0x58);       // POP EAX
2895
    emit_opcode(masm,0x3D);       // CMP EAX,imm
2896
    emit_d32   (masm,0x80000000); //         0x80000000
2897
    emit_opcode(masm,0x75);       // JNE around_slow_call
2898
    emit_d8    (masm,0x07);       // Size of slow_call
2899
    // Push src onto stack slow-path
2900
    emit_opcode(masm,0xD9 );      // FLD     ST(i)
2901
    emit_d8    (masm,0xC0-1+$src$$reg );
2902
    // CALL directly to the runtime
2903
    __ set_inst_mark();
2904
    emit_opcode(masm,0xE8);       // Call into runtime
2905
    emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2906
    __ clear_inst_mark();
2907
    __ post_call_nop();
2908
    // Carry on here...
2909
  %}
2910

2911
  enc_class DPR2L_encoding( regDPR src ) %{
2912
    emit_opcode(masm,0xD9);            // FLDCW  trunc
2913
    emit_opcode(masm,0x2D);
2914
    emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
2915
    // Allocate a word
2916
    emit_opcode(masm,0x83);            // SUB ESP,8
2917
    emit_opcode(masm,0xEC);
2918
    emit_d8(masm,0x08);
2919
    // Encoding assumes a double has been pushed into FPR0.
2920
    // Store down the double as a long, popping the FPU stack
2921
    emit_opcode(masm,0xDF);            // FISTP [ESP]
2922
    emit_opcode(masm,0x3C);
2923
    emit_d8(masm,0x24);
2924
    // Restore the rounding mode; mask the exception
2925
    emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
2926
    emit_opcode(masm,0x2D);
2927
    emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
2928
        ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
2929
        : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
2930

2931
    // Load the converted int; adjust CPU stack
2932
    emit_opcode(masm,0x58);       // POP EAX
2933
    emit_opcode(masm,0x5A);       // POP EDX
2934
    emit_opcode(masm,0x81);       // CMP EDX,imm
2935
    emit_d8    (masm,0xFA);       // rdx
2936
    emit_d32   (masm,0x80000000); //         0x80000000
2937
    emit_opcode(masm,0x75);       // JNE around_slow_call
2938
    emit_d8    (masm,0x07+4);     // Size of slow_call
2939
    emit_opcode(masm,0x85);       // TEST EAX,EAX
2940
    emit_opcode(masm,0xC0);       // 2/rax,/rax,
2941
    emit_opcode(masm,0x75);       // JNE around_slow_call
2942
    emit_d8    (masm,0x07);       // Size of slow_call
2943
    // Push src onto stack slow-path
2944
    emit_opcode(masm,0xD9 );      // FLD     ST(i)
2945
    emit_d8    (masm,0xC0-1+$src$$reg );
2946
    // CALL directly to the runtime
2947
    __ set_inst_mark();
2948
    emit_opcode(masm,0xE8);       // Call into runtime
2949
    emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2950
    __ clear_inst_mark();
2951
    __ post_call_nop();
2952
    // Carry on here...
2953
  %}
2954

2955
  enc_class FMul_ST_reg( eRegFPR src1 ) %{
2956
    // Operand was loaded from memory into fp ST (stack top)
2957
    // FMUL   ST,$src  /* D8 C8+i */
2958
    emit_opcode(masm, 0xD8);
2959
    emit_opcode(masm, 0xC8 + $src1$$reg);
2960
  %}
2961

2962
  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
2963
    // FADDP  ST,src2  /* D8 C0+i */
2964
    emit_opcode(masm, 0xD8);
2965
    emit_opcode(masm, 0xC0 + $src2$$reg);
2966
    //could use FADDP  src2,fpST  /* DE C0+i */
2967
  %}
2968

2969
  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
2970
    // FADDP  src2,ST  /* DE C0+i */
2971
    emit_opcode(masm, 0xDE);
2972
    emit_opcode(masm, 0xC0 + $src2$$reg);
2973
  %}
2974

2975
  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
2976
    // Operand has been loaded into fp ST (stack top)
2977
      // FSUB   ST,$src1
2978
      emit_opcode(masm, 0xD8);
2979
      emit_opcode(masm, 0xE0 + $src1$$reg);
2980

2981
      // FDIV
2982
      emit_opcode(masm, 0xD8);
2983
      emit_opcode(masm, 0xF0 + $src2$$reg);
2984
  %}
2985

2986
  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
2987
    // Operand was loaded from memory into fp ST (stack top)
2988
    // FADD   ST,$src  /* D8 C0+i */
2989
    emit_opcode(masm, 0xD8);
2990
    emit_opcode(masm, 0xC0 + $src1$$reg);
2991

2992
    // FMUL  ST,src2  /* D8 C*+i */
2993
    emit_opcode(masm, 0xD8);
2994
    emit_opcode(masm, 0xC8 + $src2$$reg);
2995
  %}
2996

2997

2998
  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
2999
    // Operand was loaded from memory into fp ST (stack top)
3000
    // FADD   ST,$src  /* D8 C0+i */
3001
    emit_opcode(masm, 0xD8);
3002
    emit_opcode(masm, 0xC0 + $src1$$reg);
3003

3004
    // FMULP  src2,ST  /* DE C8+i */
3005
    emit_opcode(masm, 0xDE);
3006
    emit_opcode(masm, 0xC8 + $src2$$reg);
3007
  %}
3008

3009
  // Atomically load the volatile long
3010
  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3011
    emit_opcode(masm,0xDF);
3012
    int rm_byte_opcode = 0x05;
3013
    int base     = $mem$$base;
3014
    int index    = $mem$$index;
3015
    int scale    = $mem$$scale;
3016
    int displace = $mem$$disp;
3017
    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3018
    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3019
    store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
3020
  %}
3021

3022
  // Volatile Store Long.  Must be atomic, so move it into
3023
  // the FP TOS and then do a 64-bit FIST.  Has to probe the
3024
  // target address before the store (for null-ptr checks)
3025
  // so the memory operand is used twice in the encoding.
3026
  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3027
    store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
3028
    __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
3029
    emit_opcode(masm,0xDF);
3030
    int rm_byte_opcode = 0x07;
3031
    int base     = $mem$$base;
3032
    int index    = $mem$$index;
3033
    int scale    = $mem$$scale;
3034
    int displace = $mem$$disp;
3035
    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3036
    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3037
    __ clear_inst_mark();
3038
  %}
3039

3040
%}
3041

3042

3043
//----------FRAME--------------------------------------------------------------
3044
// Definition of frame structure and management information.
3045
//
3046
//  S T A C K   L A Y O U T    Allocators stack-slot number
3047
//                             |   (to get allocators register number
3048
//  G  Owned by    |        |  v    add OptoReg::stack0())
3049
//  r   CALLER     |        |
3050
//  o     |        +--------+      pad to even-align allocators stack-slot
3051
//  w     V        |  pad0  |        numbers; owned by CALLER
3052
//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3053
//  h     ^        |   in   |  5
3054
//        |        |  args  |  4   Holes in incoming args owned by SELF
3055
//  |     |        |        |  3
3056
//  |     |        +--------+
3057
//  V     |        | old out|      Empty on Intel, window on Sparc
3058
//        |    old |preserve|      Must be even aligned.
3059
//        |     SP-+--------+----> Matcher::_old_SP, even aligned
3060
//        |        |   in   |  3   area for Intel ret address
3061
//     Owned by    |preserve|      Empty on Sparc.
3062
//       SELF      +--------+
3063
//        |        |  pad2  |  2   pad to align old SP
3064
//        |        +--------+  1
3065
//        |        | locks  |  0
3066
//        |        +--------+----> OptoReg::stack0(), even aligned
3067
//        |        |  pad1  | 11   pad to align new SP
3068
//        |        +--------+
3069
//        |        |        | 10
3070
//        |        | spills |  9   spills
3071
//        V        |        |  8   (pad0 slot for callee)
3072
//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3073
//        ^        |  out   |  7
3074
//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3075
//     Owned by    +--------+
3076
//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3077
//        |    new |preserve|      Must be even-aligned.
3078
//        |     SP-+--------+----> Matcher::_new_SP, even aligned
3079
//        |        |        |
3080
//
3081
// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3082
//         known from SELF's arguments and the Java calling convention.
3083
//         Region 6-7 is determined per call site.
3084
// Note 2: If the calling convention leaves holes in the incoming argument
3085
//         area, those holes are owned by SELF.  Holes in the outgoing area
3086
//         are owned by the CALLEE.  Holes should not be necessary in the
3087
//         incoming area, as the Java calling convention is completely under
3088
//         the control of the AD file.  Doubles can be sorted and packed to
3089
//         avoid holes.  Holes in the outgoing arguments may be necessary for
3090
//         varargs C calling conventions.
3091
// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3092
//         even aligned with pad0 as needed.
3093
//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3094
//         region 6-11 is even aligned; it may be padded out more so that
3095
//         the region from SP to FP meets the minimum stack alignment.
3096

3097
frame %{
3098
  // These three registers define part of the calling convention
3099
  // between compiled code and the interpreter.
3100
  inline_cache_reg(EAX);                // Inline Cache Register
3101

3102
  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3103
  cisc_spilling_operand_name(indOffset32);
3104

3105
  // Number of stack slots consumed by locking an object
3106
  sync_stack_slots(1);
3107

3108
  // Compiled code's Frame Pointer
3109
  frame_pointer(ESP);
3110
  // Interpreter stores its frame pointer in a register which is
3111
  // stored to the stack by I2CAdaptors.
3112
  // I2CAdaptors convert from interpreted java to compiled java.
3113
  interpreter_frame_pointer(EBP);
3114

3115
  // Stack alignment requirement
3116
  // Alignment size in bytes (128-bit -> 16 bytes)
3117
  stack_alignment(StackAlignmentInBytes);
3118

3119
  // Number of outgoing stack slots killed above the out_preserve_stack_slots
3120
  // for calls to C.  Supports the var-args backing area for register parms.
3121
  varargs_C_out_slots_killed(0);
3122

3123
  // The after-PROLOG location of the return address.  Location of
3124
  // return address specifies a type (REG or STACK) and a number
3125
  // representing the register number (i.e. - use a register name) or
3126
  // stack slot.
3127
  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3128
  // Otherwise, it is above the locks and verification slot and alignment word
3129
  return_addr(STACK - 1 +
3130
              align_up((Compile::current()->in_preserve_stack_slots() +
3131
                        Compile::current()->fixed_slots()),
3132
                       stack_alignment_in_slots()));
3133

3134
  // Location of C & interpreter return values
3135
  c_return_value %{
3136
    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3137
    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3138
    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3139

3140
    // in SSE2+ mode we want to keep the FPU stack clean so pretend
3141
    // that C functions return float and double results in XMM0.
3142
    if( ideal_reg == Op_RegD && UseSSE>=2 )
3143
      return OptoRegPair(XMM0b_num,XMM0_num);
3144
    if( ideal_reg == Op_RegF && UseSSE>=2 )
3145
      return OptoRegPair(OptoReg::Bad,XMM0_num);
3146

3147
    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3148
  %}
3149

3150
  // Location of return values
3151
  return_value %{
3152
    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3153
    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3154
    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3155
    if( ideal_reg == Op_RegD && UseSSE>=2 )
3156
      return OptoRegPair(XMM0b_num,XMM0_num);
3157
    if( ideal_reg == Op_RegF && UseSSE>=1 )
3158
      return OptoRegPair(OptoReg::Bad,XMM0_num);
3159
    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3160
  %}
3161

3162
%}
3163

3164
//----------ATTRIBUTES---------------------------------------------------------
3165
//----------Operand Attributes-------------------------------------------------
3166
op_attrib op_cost(0);        // Required cost attribute
3167

3168
//----------Instruction Attributes---------------------------------------------
3169
ins_attrib ins_cost(100);       // Required cost attribute
3170
ins_attrib ins_size(8);         // Required size attribute (in bits)
3171
ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3172
                                // non-matching short branch variant of some
3173
                                                            // long branch?
3174
ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3175
                                // specifies the alignment that some part of the instruction (not
3176
                                // necessarily the start) requires.  If > 1, a compute_padding()
3177
                                // function must be provided for the instruction
3178

3179
//----------OPERANDS-----------------------------------------------------------
3180
// Operand definitions must precede instruction definitions for correct parsing
3181
// in the ADLC because operands constitute user defined types which are used in
3182
// instruction definitions.
3183

3184
//----------Simple Operands----------------------------------------------------
3185
// Immediate Operands
3186
// Integer Immediate
3187
operand immI() %{
3188
  match(ConI);
3189

3190
  op_cost(10);
3191
  format %{ %}
3192
  interface(CONST_INTER);
3193
%}
3194

3195
// Constant for test vs zero
3196
operand immI_0() %{
3197
  predicate(n->get_int() == 0);
3198
  match(ConI);
3199

3200
  op_cost(0);
3201
  format %{ %}
3202
  interface(CONST_INTER);
3203
%}
3204

3205
// Constant for increment
3206
operand immI_1() %{
3207
  predicate(n->get_int() == 1);
3208
  match(ConI);
3209

3210
  op_cost(0);
3211
  format %{ %}
3212
  interface(CONST_INTER);
3213
%}
3214

3215
// Constant for decrement
3216
operand immI_M1() %{
3217
  predicate(n->get_int() == -1);
3218
  match(ConI);
3219

3220
  op_cost(0);
3221
  format %{ %}
3222
  interface(CONST_INTER);
3223
%}
3224

3225
// Valid scale values for addressing modes
3226
operand immI2() %{
3227
  predicate(0 <= n->get_int() && (n->get_int() <= 3));
3228
  match(ConI);
3229

3230
  format %{ %}
3231
  interface(CONST_INTER);
3232
%}
3233

3234
operand immI8() %{
3235
  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3236
  match(ConI);
3237

3238
  op_cost(5);
3239
  format %{ %}
3240
  interface(CONST_INTER);
3241
%}
3242

3243
operand immU8() %{
3244
  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3245
  match(ConI);
3246

3247
  op_cost(5);
3248
  format %{ %}
3249
  interface(CONST_INTER);
3250
%}
3251

3252
operand immI16() %{
3253
  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3254
  match(ConI);
3255

3256
  op_cost(10);
3257
  format %{ %}
3258
  interface(CONST_INTER);
3259
%}
3260

3261
// Int Immediate non-negative
3262
operand immU31()
3263
%{
3264
  predicate(n->get_int() >= 0);
3265
  match(ConI);
3266

3267
  op_cost(0);
3268
  format %{ %}
3269
  interface(CONST_INTER);
3270
%}
3271

3272
// Constant for long shifts
3273
operand immI_32() %{
3274
  predicate( n->get_int() == 32 );
3275
  match(ConI);
3276

3277
  op_cost(0);
3278
  format %{ %}
3279
  interface(CONST_INTER);
3280
%}
3281

3282
operand immI_1_31() %{
3283
  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3284
  match(ConI);
3285

3286
  op_cost(0);
3287
  format %{ %}
3288
  interface(CONST_INTER);
3289
%}
3290

3291
operand immI_32_63() %{
3292
  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3293
  match(ConI);
3294
  op_cost(0);
3295

3296
  format %{ %}
3297
  interface(CONST_INTER);
3298
%}
3299

3300
operand immI_2() %{
3301
  predicate( n->get_int() == 2 );
3302
  match(ConI);
3303

3304
  op_cost(0);
3305
  format %{ %}
3306
  interface(CONST_INTER);
3307
%}
3308

3309
operand immI_3() %{
3310
  predicate( n->get_int() == 3 );
3311
  match(ConI);
3312

3313
  op_cost(0);
3314
  format %{ %}
3315
  interface(CONST_INTER);
3316
%}
3317

3318
operand immI_4()
3319
%{
3320
  predicate(n->get_int() == 4);
3321
  match(ConI);
3322

3323
  op_cost(0);
3324
  format %{ %}
3325
  interface(CONST_INTER);
3326
%}
3327

3328
operand immI_8()
3329
%{
3330
  predicate(n->get_int() == 8);
3331
  match(ConI);
3332

3333
  op_cost(0);
3334
  format %{ %}
3335
  interface(CONST_INTER);
3336
%}
3337

3338
// Pointer Immediate
3339
operand immP() %{
3340
  match(ConP);
3341

3342
  op_cost(10);
3343
  format %{ %}
3344
  interface(CONST_INTER);
3345
%}
3346

3347
// Null Pointer Immediate
3348
operand immP0() %{
3349
  predicate( n->get_ptr() == 0 );
3350
  match(ConP);
3351
  op_cost(0);
3352

3353
  format %{ %}
3354
  interface(CONST_INTER);
3355
%}
3356

3357
// Long Immediate
3358
operand immL() %{
3359
  match(ConL);
3360

3361
  op_cost(20);
3362
  format %{ %}
3363
  interface(CONST_INTER);
3364
%}
3365

3366
// Long Immediate zero
3367
operand immL0() %{
3368
  predicate( n->get_long() == 0L );
3369
  match(ConL);
3370
  op_cost(0);
3371

3372
  format %{ %}
3373
  interface(CONST_INTER);
3374
%}
3375

3376
// Long Immediate zero
3377
operand immL_M1() %{
3378
  predicate( n->get_long() == -1L );
3379
  match(ConL);
3380
  op_cost(0);
3381

3382
  format %{ %}
3383
  interface(CONST_INTER);
3384
%}
3385

3386
// Long immediate from 0 to 127.
3387
// Used for a shorter form of long mul by 10.
3388
operand immL_127() %{
3389
  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3390
  match(ConL);
3391
  op_cost(0);
3392

3393
  format %{ %}
3394
  interface(CONST_INTER);
3395
%}
3396

3397
// Long Immediate: low 32-bit mask
3398
operand immL_32bits() %{
3399
  predicate(n->get_long() == 0xFFFFFFFFL);
3400
  match(ConL);
3401
  op_cost(0);
3402

3403
  format %{ %}
3404
  interface(CONST_INTER);
3405
%}
3406

3407
// Long Immediate: low 32-bit mask
3408
operand immL32() %{
3409
  predicate(n->get_long() == (int)(n->get_long()));
3410
  match(ConL);
3411
  op_cost(20);
3412

3413
  format %{ %}
3414
  interface(CONST_INTER);
3415
%}
3416

3417
//Double Immediate zero
3418
operand immDPR0() %{
3419
  // Do additional (and counter-intuitive) test against NaN to work around VC++
3420
  // bug that generates code such that NaNs compare equal to 0.0
3421
  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3422
  match(ConD);
3423

3424
  op_cost(5);
3425
  format %{ %}
3426
  interface(CONST_INTER);
3427
%}
3428

3429
// Double Immediate one
3430
operand immDPR1() %{
3431
  predicate( UseSSE<=1 && n->getd() == 1.0 );
3432
  match(ConD);
3433

3434
  op_cost(5);
3435
  format %{ %}
3436
  interface(CONST_INTER);
3437
%}
3438

3439
// Double Immediate
3440
operand immDPR() %{
3441
  predicate(UseSSE<=1);
3442
  match(ConD);
3443

3444
  op_cost(5);
3445
  format %{ %}
3446
  interface(CONST_INTER);
3447
%}
3448

3449
operand immD() %{
3450
  predicate(UseSSE>=2);
3451
  match(ConD);
3452

3453
  op_cost(5);
3454
  format %{ %}
3455
  interface(CONST_INTER);
3456
%}
3457

3458
// Double Immediate zero
3459
operand immD0() %{
3460
  // Do additional (and counter-intuitive) test against NaN to work around VC++
3461
  // bug that generates code such that NaNs compare equal to 0.0 AND do not
3462
  // compare equal to -0.0.
3463
  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3464
  match(ConD);
3465

3466
  format %{ %}
3467
  interface(CONST_INTER);
3468
%}
3469

3470
// Float Immediate zero
3471
operand immFPR0() %{
3472
  predicate(UseSSE == 0 && n->getf() == 0.0F);
3473
  match(ConF);
3474

3475
  op_cost(5);
3476
  format %{ %}
3477
  interface(CONST_INTER);
3478
%}
3479

3480
// Float Immediate one
3481
operand immFPR1() %{
3482
  predicate(UseSSE == 0 && n->getf() == 1.0F);
3483
  match(ConF);
3484

3485
  op_cost(5);
3486
  format %{ %}
3487
  interface(CONST_INTER);
3488
%}
3489

3490
// Float Immediate
3491
operand immFPR() %{
3492
  predicate( UseSSE == 0 );
3493
  match(ConF);
3494

3495
  op_cost(5);
3496
  format %{ %}
3497
  interface(CONST_INTER);
3498
%}
3499

3500
// Float Immediate
3501
operand immF() %{
3502
  predicate(UseSSE >= 1);
3503
  match(ConF);
3504

3505
  op_cost(5);
3506
  format %{ %}
3507
  interface(CONST_INTER);
3508
%}
3509

3510
// Float Immediate zero.  Zero and not -0.0
3511
operand immF0() %{
3512
  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3513
  match(ConF);
3514

3515
  op_cost(5);
3516
  format %{ %}
3517
  interface(CONST_INTER);
3518
%}
3519

3520
// Immediates for special shifts (sign extend)
3521

3522
// Constants for increment
3523
operand immI_16() %{
3524
  predicate( n->get_int() == 16 );
3525
  match(ConI);
3526

3527
  format %{ %}
3528
  interface(CONST_INTER);
3529
%}
3530

3531
operand immI_24() %{
3532
  predicate( n->get_int() == 24 );
3533
  match(ConI);
3534

3535
  format %{ %}
3536
  interface(CONST_INTER);
3537
%}
3538

3539
// Constant for byte-wide masking
3540
operand immI_255() %{
3541
  predicate( n->get_int() == 255 );
3542
  match(ConI);
3543

3544
  format %{ %}
3545
  interface(CONST_INTER);
3546
%}
3547

3548
// Constant for short-wide masking
3549
operand immI_65535() %{
3550
  predicate(n->get_int() == 65535);
3551
  match(ConI);
3552

3553
  format %{ %}
3554
  interface(CONST_INTER);
3555
%}
3556

3557
operand kReg()
3558
%{
3559
  constraint(ALLOC_IN_RC(vectmask_reg));
3560
  match(RegVectMask);
3561
  format %{%}
3562
  interface(REG_INTER);
3563
%}
3564

3565
// Register Operands
3566
// Integer Register
3567
operand rRegI() %{
3568
  constraint(ALLOC_IN_RC(int_reg));
3569
  match(RegI);
3570
  match(xRegI);
3571
  match(eAXRegI);
3572
  match(eBXRegI);
3573
  match(eCXRegI);
3574
  match(eDXRegI);
3575
  match(eDIRegI);
3576
  match(eSIRegI);
3577

3578
  format %{ %}
3579
  interface(REG_INTER);
3580
%}
3581

3582
// Subset of Integer Register
3583
operand xRegI(rRegI reg) %{
3584
  constraint(ALLOC_IN_RC(int_x_reg));
3585
  match(reg);
3586
  match(eAXRegI);
3587
  match(eBXRegI);
3588
  match(eCXRegI);
3589
  match(eDXRegI);
3590

3591
  format %{ %}
3592
  interface(REG_INTER);
3593
%}
3594

3595
// Special Registers
3596
operand eAXRegI(xRegI reg) %{
3597
  constraint(ALLOC_IN_RC(eax_reg));
3598
  match(reg);
3599
  match(rRegI);
3600

3601
  format %{ "EAX" %}
3602
  interface(REG_INTER);
3603
%}
3604

3605
// Special Registers
3606
operand eBXRegI(xRegI reg) %{
3607
  constraint(ALLOC_IN_RC(ebx_reg));
3608
  match(reg);
3609
  match(rRegI);
3610

3611
  format %{ "EBX" %}
3612
  interface(REG_INTER);
3613
%}
3614

3615
operand eCXRegI(xRegI reg) %{
3616
  constraint(ALLOC_IN_RC(ecx_reg));
3617
  match(reg);
3618
  match(rRegI);
3619

3620
  format %{ "ECX" %}
3621
  interface(REG_INTER);
3622
%}
3623

3624
operand eDXRegI(xRegI reg) %{
3625
  constraint(ALLOC_IN_RC(edx_reg));
3626
  match(reg);
3627
  match(rRegI);
3628

3629
  format %{ "EDX" %}
3630
  interface(REG_INTER);
3631
%}
3632

3633
operand eDIRegI(xRegI reg) %{
3634
  constraint(ALLOC_IN_RC(edi_reg));
3635
  match(reg);
3636
  match(rRegI);
3637

3638
  format %{ "EDI" %}
3639
  interface(REG_INTER);
3640
%}
3641

3642
operand nadxRegI() %{
3643
  constraint(ALLOC_IN_RC(nadx_reg));
3644
  match(RegI);
3645
  match(eBXRegI);
3646
  match(eCXRegI);
3647
  match(eSIRegI);
3648
  match(eDIRegI);
3649

3650
  format %{ %}
3651
  interface(REG_INTER);
3652
%}
3653

3654
operand ncxRegI() %{
3655
  constraint(ALLOC_IN_RC(ncx_reg));
3656
  match(RegI);
3657
  match(eAXRegI);
3658
  match(eDXRegI);
3659
  match(eSIRegI);
3660
  match(eDIRegI);
3661

3662
  format %{ %}
3663
  interface(REG_INTER);
3664
%}
3665

3666
// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3667
// //
3668
operand eSIRegI(xRegI reg) %{
3669
   constraint(ALLOC_IN_RC(esi_reg));
3670
   match(reg);
3671
   match(rRegI);
3672

3673
   format %{ "ESI" %}
3674
   interface(REG_INTER);
3675
%}
3676

3677
// Pointer Register
3678
operand anyRegP() %{
3679
  constraint(ALLOC_IN_RC(any_reg));
3680
  match(RegP);
3681
  match(eAXRegP);
3682
  match(eBXRegP);
3683
  match(eCXRegP);
3684
  match(eDIRegP);
3685
  match(eRegP);
3686

3687
  format %{ %}
3688
  interface(REG_INTER);
3689
%}
3690

3691
operand eRegP() %{
3692
  constraint(ALLOC_IN_RC(int_reg));
3693
  match(RegP);
3694
  match(eAXRegP);
3695
  match(eBXRegP);
3696
  match(eCXRegP);
3697
  match(eDIRegP);
3698

3699
  format %{ %}
3700
  interface(REG_INTER);
3701
%}
3702

3703
operand rRegP() %{
3704
  constraint(ALLOC_IN_RC(int_reg));
3705
  match(RegP);
3706
  match(eAXRegP);
3707
  match(eBXRegP);
3708
  match(eCXRegP);
3709
  match(eDIRegP);
3710

3711
  format %{ %}
3712
  interface(REG_INTER);
3713
%}
3714

3715
// On windows95, EBP is not safe to use for implicit null tests.
3716
operand eRegP_no_EBP() %{
3717
  constraint(ALLOC_IN_RC(int_reg_no_ebp));
3718
  match(RegP);
3719
  match(eAXRegP);
3720
  match(eBXRegP);
3721
  match(eCXRegP);
3722
  match(eDIRegP);
3723

3724
  op_cost(100);
3725
  format %{ %}
3726
  interface(REG_INTER);
3727
%}
3728

3729
operand pRegP() %{
3730
  constraint(ALLOC_IN_RC(p_reg));
3731
  match(RegP);
3732
  match(eBXRegP);
3733
  match(eDXRegP);
3734
  match(eSIRegP);
3735
  match(eDIRegP);
3736

3737
  format %{ %}
3738
  interface(REG_INTER);
3739
%}
3740

3741
// Special Registers
3742
// Return a pointer value
3743
operand eAXRegP(eRegP reg) %{
3744
  constraint(ALLOC_IN_RC(eax_reg));
3745
  match(reg);
3746
  format %{ "EAX" %}
3747
  interface(REG_INTER);
3748
%}
3749

3750
// Used in AtomicAdd
3751
operand eBXRegP(eRegP reg) %{
3752
  constraint(ALLOC_IN_RC(ebx_reg));
3753
  match(reg);
3754
  format %{ "EBX" %}
3755
  interface(REG_INTER);
3756
%}
3757

3758
// Tail-call (interprocedural jump) to interpreter
3759
operand eCXRegP(eRegP reg) %{
3760
  constraint(ALLOC_IN_RC(ecx_reg));
3761
  match(reg);
3762
  format %{ "ECX" %}
3763
  interface(REG_INTER);
3764
%}
3765

3766
operand eDXRegP(eRegP reg) %{
3767
  constraint(ALLOC_IN_RC(edx_reg));
3768
  match(reg);
3769
  format %{ "EDX" %}
3770
  interface(REG_INTER);
3771
%}
3772

3773
operand eSIRegP(eRegP reg) %{
3774
  constraint(ALLOC_IN_RC(esi_reg));
3775
  match(reg);
3776
  format %{ "ESI" %}
3777
  interface(REG_INTER);
3778
%}
3779

3780
// Used in rep stosw
3781
operand eDIRegP(eRegP reg) %{
3782
  constraint(ALLOC_IN_RC(edi_reg));
3783
  match(reg);
3784
  format %{ "EDI" %}
3785
  interface(REG_INTER);
3786
%}
3787

3788
operand eRegL() %{
3789
  constraint(ALLOC_IN_RC(long_reg));
3790
  match(RegL);
3791
  match(eADXRegL);
3792

3793
  format %{ %}
3794
  interface(REG_INTER);
3795
%}
3796

3797
operand eADXRegL( eRegL reg ) %{
3798
  constraint(ALLOC_IN_RC(eadx_reg));
3799
  match(reg);
3800

3801
  format %{ "EDX:EAX" %}
3802
  interface(REG_INTER);
3803
%}
3804

3805
operand eBCXRegL( eRegL reg ) %{
3806
  constraint(ALLOC_IN_RC(ebcx_reg));
3807
  match(reg);
3808

3809
  format %{ "EBX:ECX" %}
3810
  interface(REG_INTER);
3811
%}
3812

3813
operand eBDPRegL( eRegL reg ) %{
3814
  constraint(ALLOC_IN_RC(ebpd_reg));
3815
  match(reg);
3816

3817
  format %{ "EBP:EDI" %}
3818
  interface(REG_INTER);
3819
%}
3820
// Special case for integer high multiply
3821
operand eADXRegL_low_only() %{
3822
  constraint(ALLOC_IN_RC(eadx_reg));
3823
  match(RegL);
3824

3825
  format %{ "EAX" %}
3826
  interface(REG_INTER);
3827
%}
3828

3829
// Flags register, used as output of compare instructions
3830
operand rFlagsReg() %{
3831
  constraint(ALLOC_IN_RC(int_flags));
3832
  match(RegFlags);
3833

3834
  format %{ "EFLAGS" %}
3835
  interface(REG_INTER);
3836
%}
3837

3838
// Flags register, used as output of compare instructions
3839
operand eFlagsReg() %{
3840
  constraint(ALLOC_IN_RC(int_flags));
3841
  match(RegFlags);
3842

3843
  format %{ "EFLAGS" %}
3844
  interface(REG_INTER);
3845
%}
3846

3847
// Flags register, used as output of FLOATING POINT compare instructions
3848
operand eFlagsRegU() %{
3849
  constraint(ALLOC_IN_RC(int_flags));
3850
  match(RegFlags);
3851

3852
  format %{ "EFLAGS_U" %}
3853
  interface(REG_INTER);
3854
%}
3855

3856
operand eFlagsRegUCF() %{
3857
  constraint(ALLOC_IN_RC(int_flags));
3858
  match(RegFlags);
3859
  predicate(false);
3860

3861
  format %{ "EFLAGS_U_CF" %}
3862
  interface(REG_INTER);
3863
%}
3864

3865
// Condition Code Register used by long compare
3866
operand flagsReg_long_LTGE() %{
3867
  constraint(ALLOC_IN_RC(int_flags));
3868
  match(RegFlags);
3869
  format %{ "FLAGS_LTGE" %}
3870
  interface(REG_INTER);
3871
%}
3872
operand flagsReg_long_EQNE() %{
3873
  constraint(ALLOC_IN_RC(int_flags));
3874
  match(RegFlags);
3875
  format %{ "FLAGS_EQNE" %}
3876
  interface(REG_INTER);
3877
%}
3878
operand flagsReg_long_LEGT() %{
3879
  constraint(ALLOC_IN_RC(int_flags));
3880
  match(RegFlags);
3881
  format %{ "FLAGS_LEGT" %}
3882
  interface(REG_INTER);
3883
%}
3884

3885
// Condition Code Register used by unsigned long compare
3886
operand flagsReg_ulong_LTGE() %{
3887
  constraint(ALLOC_IN_RC(int_flags));
3888
  match(RegFlags);
3889
  format %{ "FLAGS_U_LTGE" %}
3890
  interface(REG_INTER);
3891
%}
3892
operand flagsReg_ulong_EQNE() %{
3893
  constraint(ALLOC_IN_RC(int_flags));
3894
  match(RegFlags);
3895
  format %{ "FLAGS_U_EQNE" %}
3896
  interface(REG_INTER);
3897
%}
3898
operand flagsReg_ulong_LEGT() %{
3899
  constraint(ALLOC_IN_RC(int_flags));
3900
  match(RegFlags);
3901
  format %{ "FLAGS_U_LEGT" %}
3902
  interface(REG_INTER);
3903
%}
3904

3905
// Float register operands
3906
operand regDPR() %{
3907
  predicate( UseSSE < 2 );
3908
  constraint(ALLOC_IN_RC(fp_dbl_reg));
3909
  match(RegD);
3910
  match(regDPR1);
3911
  match(regDPR2);
3912
  format %{ %}
3913
  interface(REG_INTER);
3914
%}
3915

3916
operand regDPR1(regDPR reg) %{
3917
  predicate( UseSSE < 2 );
3918
  constraint(ALLOC_IN_RC(fp_dbl_reg0));
3919
  match(reg);
3920
  format %{ "FPR1" %}
3921
  interface(REG_INTER);
3922
%}
3923

3924
operand regDPR2(regDPR reg) %{
3925
  predicate( UseSSE < 2 );
3926
  constraint(ALLOC_IN_RC(fp_dbl_reg1));
3927
  match(reg);
3928
  format %{ "FPR2" %}
3929
  interface(REG_INTER);
3930
%}
3931

3932
operand regnotDPR1(regDPR reg) %{
3933
  predicate( UseSSE < 2 );
3934
  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
3935
  match(reg);
3936
  format %{ %}
3937
  interface(REG_INTER);
3938
%}
3939

3940
// Float register operands
3941
operand regFPR() %{
3942
  predicate( UseSSE < 2 );
3943
  constraint(ALLOC_IN_RC(fp_flt_reg));
3944
  match(RegF);
3945
  match(regFPR1);
3946
  format %{ %}
3947
  interface(REG_INTER);
3948
%}
3949

3950
// Float register operands
3951
operand regFPR1(regFPR reg) %{
3952
  predicate( UseSSE < 2 );
3953
  constraint(ALLOC_IN_RC(fp_flt_reg0));
3954
  match(reg);
3955
  format %{ "FPR1" %}
3956
  interface(REG_INTER);
3957
%}
3958

3959
// XMM Float register operands
3960
operand regF() %{
3961
  predicate( UseSSE>=1 );
3962
  constraint(ALLOC_IN_RC(float_reg_legacy));
3963
  match(RegF);
3964
  format %{ %}
3965
  interface(REG_INTER);
3966
%}
3967

3968
operand legRegF() %{
3969
  predicate( UseSSE>=1 );
3970
  constraint(ALLOC_IN_RC(float_reg_legacy));
3971
  match(RegF);
3972
  format %{ %}
3973
  interface(REG_INTER);
3974
%}
3975

3976
// Float register operands
3977
operand vlRegF() %{
3978
   constraint(ALLOC_IN_RC(float_reg_vl));
3979
   match(RegF);
3980

3981
   format %{ %}
3982
   interface(REG_INTER);
3983
%}
3984

3985
// XMM Double register operands
3986
operand regD() %{
3987
  predicate( UseSSE>=2 );
3988
  constraint(ALLOC_IN_RC(double_reg_legacy));
3989
  match(RegD);
3990
  format %{ %}
3991
  interface(REG_INTER);
3992
%}
3993

3994
// Double register operands
3995
operand legRegD() %{
3996
  predicate( UseSSE>=2 );
3997
  constraint(ALLOC_IN_RC(double_reg_legacy));
3998
  match(RegD);
3999
  format %{ %}
4000
  interface(REG_INTER);
4001
%}
4002

4003
operand vlRegD() %{
4004
   constraint(ALLOC_IN_RC(double_reg_vl));
4005
   match(RegD);
4006

4007
   format %{ %}
4008
   interface(REG_INTER);
4009
%}
4010

4011
//----------Memory Operands----------------------------------------------------
4012
// Direct Memory Operand
4013
operand direct(immP addr) %{
4014
  match(addr);
4015

4016
  format %{ "[$addr]" %}
4017
  interface(MEMORY_INTER) %{
4018
    base(0xFFFFFFFF);
4019
    index(0x4);
4020
    scale(0x0);
4021
    disp($addr);
4022
  %}
4023
%}
4024

4025
// Indirect Memory Operand
4026
operand indirect(eRegP reg) %{
4027
  constraint(ALLOC_IN_RC(int_reg));
4028
  match(reg);
4029

4030
  format %{ "[$reg]" %}
4031
  interface(MEMORY_INTER) %{
4032
    base($reg);
4033
    index(0x4);
4034
    scale(0x0);
4035
    disp(0x0);
4036
  %}
4037
%}
4038

4039
// Indirect Memory Plus Short Offset Operand
4040
operand indOffset8(eRegP reg, immI8 off) %{
4041
  match(AddP reg off);
4042

4043
  format %{ "[$reg + $off]" %}
4044
  interface(MEMORY_INTER) %{
4045
    base($reg);
4046
    index(0x4);
4047
    scale(0x0);
4048
    disp($off);
4049
  %}
4050
%}
4051

4052
// Indirect Memory Plus Long Offset Operand
4053
operand indOffset32(eRegP reg, immI off) %{
4054
  match(AddP reg off);
4055

4056
  format %{ "[$reg + $off]" %}
4057
  interface(MEMORY_INTER) %{
4058
    base($reg);
4059
    index(0x4);
4060
    scale(0x0);
4061
    disp($off);
4062
  %}
4063
%}
4064

4065
// Indirect Memory Plus Long Offset Operand
4066
operand indOffset32X(rRegI reg, immP off) %{
4067
  match(AddP off reg);
4068

4069
  format %{ "[$reg + $off]" %}
4070
  interface(MEMORY_INTER) %{
4071
    base($reg);
4072
    index(0x4);
4073
    scale(0x0);
4074
    disp($off);
4075
  %}
4076
%}
4077

4078
// Indirect Memory Plus Index Register Plus Offset Operand
4079
operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4080
  match(AddP (AddP reg ireg) off);
4081

4082
  op_cost(10);
4083
  format %{"[$reg + $off + $ireg]" %}
4084
  interface(MEMORY_INTER) %{
4085
    base($reg);
4086
    index($ireg);
4087
    scale(0x0);
4088
    disp($off);
4089
  %}
4090
%}
4091

4092
// Indirect Memory Plus Index Register Plus Offset Operand
4093
operand indIndex(eRegP reg, rRegI ireg) %{
4094
  match(AddP reg ireg);
4095

4096
  op_cost(10);
4097
  format %{"[$reg + $ireg]" %}
4098
  interface(MEMORY_INTER) %{
4099
    base($reg);
4100
    index($ireg);
4101
    scale(0x0);
4102
    disp(0x0);
4103
  %}
4104
%}
4105

4106
// // -------------------------------------------------------------------------
4107
// // 486 architecture doesn't support "scale * index + offset" with out a base
4108
// // -------------------------------------------------------------------------
4109
// // Scaled Memory Operands
4110
// // Indirect Memory Times Scale Plus Offset Operand
4111
// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4112
//   match(AddP off (LShiftI ireg scale));
4113
//
4114
//   op_cost(10);
4115
//   format %{"[$off + $ireg << $scale]" %}
4116
//   interface(MEMORY_INTER) %{
4117
//     base(0x4);
4118
//     index($ireg);
4119
//     scale($scale);
4120
//     disp($off);
4121
//   %}
4122
// %}
4123

4124
// Indirect Memory Times Scale Plus Index Register
4125
operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4126
  match(AddP reg (LShiftI ireg scale));
4127

4128
  op_cost(10);
4129
  format %{"[$reg + $ireg << $scale]" %}
4130
  interface(MEMORY_INTER) %{
4131
    base($reg);
4132
    index($ireg);
4133
    scale($scale);
4134
    disp(0x0);
4135
  %}
4136
%}
4137

4138
// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4139
operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4140
  match(AddP (AddP reg (LShiftI ireg scale)) off);
4141

4142
  op_cost(10);
4143
  format %{"[$reg + $off + $ireg << $scale]" %}
4144
  interface(MEMORY_INTER) %{
4145
    base($reg);
4146
    index($ireg);
4147
    scale($scale);
4148
    disp($off);
4149
  %}
4150
%}
4151

4152
//----------Load Long Memory Operands------------------------------------------
4153
// The load-long idiom will use it's address expression again after loading
4154
// the first word of the long.  If the load-long destination overlaps with
4155
// registers used in the addressing expression, the 2nd half will be loaded
4156
// from a clobbered address.  Fix this by requiring that load-long use
4157
// address registers that do not overlap with the load-long target.
4158

4159
// load-long support
4160
operand load_long_RegP() %{
4161
  constraint(ALLOC_IN_RC(esi_reg));
4162
  match(RegP);
4163
  match(eSIRegP);
4164
  op_cost(100);
4165
  format %{  %}
4166
  interface(REG_INTER);
4167
%}
4168

4169
// Indirect Memory Operand Long
4170
operand load_long_indirect(load_long_RegP reg) %{
4171
  constraint(ALLOC_IN_RC(esi_reg));
4172
  match(reg);
4173

4174
  format %{ "[$reg]" %}
4175
  interface(MEMORY_INTER) %{
4176
    base($reg);
4177
    index(0x4);
4178
    scale(0x0);
4179
    disp(0x0);
4180
  %}
4181
%}
4182

4183
// Indirect Memory Plus Long Offset Operand
4184
operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4185
  match(AddP reg off);
4186

4187
  format %{ "[$reg + $off]" %}
4188
  interface(MEMORY_INTER) %{
4189
    base($reg);
4190
    index(0x4);
4191
    scale(0x0);
4192
    disp($off);
4193
  %}
4194
%}
4195

4196
opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4197

4198

4199
//----------Special Memory Operands--------------------------------------------
4200
// Stack Slot Operand - This operand is used for loading and storing temporary
4201
//                      values on the stack where a match requires a value to
4202
//                      flow through memory.
4203
operand stackSlotP(sRegP reg) %{
4204
  constraint(ALLOC_IN_RC(stack_slots));
4205
  // No match rule because this operand is only generated in matching
4206
  format %{ "[$reg]" %}
4207
  interface(MEMORY_INTER) %{
4208
    base(0x4);   // ESP
4209
    index(0x4);  // No Index
4210
    scale(0x0);  // No Scale
4211
    disp($reg);  // Stack Offset
4212
  %}
4213
%}
4214

4215
operand stackSlotI(sRegI reg) %{
4216
  constraint(ALLOC_IN_RC(stack_slots));
4217
  // No match rule because this operand is only generated in matching
4218
  format %{ "[$reg]" %}
4219
  interface(MEMORY_INTER) %{
4220
    base(0x4);   // ESP
4221
    index(0x4);  // No Index
4222
    scale(0x0);  // No Scale
4223
    disp($reg);  // Stack Offset
4224
  %}
4225
%}
4226

4227
operand stackSlotF(sRegF reg) %{
4228
  constraint(ALLOC_IN_RC(stack_slots));
4229
  // No match rule because this operand is only generated in matching
4230
  format %{ "[$reg]" %}
4231
  interface(MEMORY_INTER) %{
4232
    base(0x4);   // ESP
4233
    index(0x4);  // No Index
4234
    scale(0x0);  // No Scale
4235
    disp($reg);  // Stack Offset
4236
  %}
4237
%}
4238

4239
operand stackSlotD(sRegD reg) %{
4240
  constraint(ALLOC_IN_RC(stack_slots));
4241
  // No match rule because this operand is only generated in matching
4242
  format %{ "[$reg]" %}
4243
  interface(MEMORY_INTER) %{
4244
    base(0x4);   // ESP
4245
    index(0x4);  // No Index
4246
    scale(0x0);  // No Scale
4247
    disp($reg);  // Stack Offset
4248
  %}
4249
%}
4250

4251
operand stackSlotL(sRegL reg) %{
4252
  constraint(ALLOC_IN_RC(stack_slots));
4253
  // No match rule because this operand is only generated in matching
4254
  format %{ "[$reg]" %}
4255
  interface(MEMORY_INTER) %{
4256
    base(0x4);   // ESP
4257
    index(0x4);  // No Index
4258
    scale(0x0);  // No Scale
4259
    disp($reg);  // Stack Offset
4260
  %}
4261
%}
4262

4263
//----------Conditional Branch Operands----------------------------------------
4264
// Comparison Op  - This is the operation of the comparison, and is limited to
4265
//                  the following set of codes:
4266
//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4267
//
4268
// Other attributes of the comparison, such as unsignedness, are specified
4269
// by the comparison instruction that sets a condition code flags register.
4270
// That result is represented by a flags operand whose subtype is appropriate
4271
// to the unsignedness (etc.) of the comparison.
4272
//
4273
// Later, the instruction which matches both the Comparison Op (a Bool) and
4274
// the flags (produced by the Cmp) specifies the coding of the comparison op
4275
// by matching a specific subtype of Bool operand below, such as cmpOpU.
4276

4277
// Comparison Code
4278
operand cmpOp() %{
4279
  match(Bool);
4280

4281
  format %{ "" %}
4282
  interface(COND_INTER) %{
4283
    equal(0x4, "e");
4284
    not_equal(0x5, "ne");
4285
    less(0xC, "l");
4286
    greater_equal(0xD, "ge");
4287
    less_equal(0xE, "le");
4288
    greater(0xF, "g");
4289
    overflow(0x0, "o");
4290
    no_overflow(0x1, "no");
4291
  %}
4292
%}
4293

4294
// Comparison Code, unsigned compare.  Used by FP also, with
4295
// C2 (unordered) turned into GT or LT already.  The other bits
4296
// C0 and C3 are turned into Carry & Zero flags.
4297
operand cmpOpU() %{
4298
  match(Bool);
4299

4300
  format %{ "" %}
4301
  interface(COND_INTER) %{
4302
    equal(0x4, "e");
4303
    not_equal(0x5, "ne");
4304
    less(0x2, "b");
4305
    greater_equal(0x3, "nb");
4306
    less_equal(0x6, "be");
4307
    greater(0x7, "nbe");
4308
    overflow(0x0, "o");
4309
    no_overflow(0x1, "no");
4310
  %}
4311
%}
4312

4313
// Floating comparisons that don't require any fixup for the unordered case
4314
operand cmpOpUCF() %{
4315
  match(Bool);
4316
  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4317
            n->as_Bool()->_test._test == BoolTest::ge ||
4318
            n->as_Bool()->_test._test == BoolTest::le ||
4319
            n->as_Bool()->_test._test == BoolTest::gt);
4320
  format %{ "" %}
4321
  interface(COND_INTER) %{
4322
    equal(0x4, "e");
4323
    not_equal(0x5, "ne");
4324
    less(0x2, "b");
4325
    greater_equal(0x3, "nb");
4326
    less_equal(0x6, "be");
4327
    greater(0x7, "nbe");
4328
    overflow(0x0, "o");
4329
    no_overflow(0x1, "no");
4330
  %}
4331
%}
4332

4333

4334
// Floating comparisons that can be fixed up with extra conditional jumps
4335
operand cmpOpUCF2() %{
4336
  match(Bool);
4337
  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4338
            n->as_Bool()->_test._test == BoolTest::eq);
4339
  format %{ "" %}
4340
  interface(COND_INTER) %{
4341
    equal(0x4, "e");
4342
    not_equal(0x5, "ne");
4343
    less(0x2, "b");
4344
    greater_equal(0x3, "nb");
4345
    less_equal(0x6, "be");
4346
    greater(0x7, "nbe");
4347
    overflow(0x0, "o");
4348
    no_overflow(0x1, "no");
4349
  %}
4350
%}
4351

4352
// Comparison Code for FP conditional move
4353
operand cmpOp_fcmov() %{
4354
  match(Bool);
4355

4356
  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4357
            n->as_Bool()->_test._test != BoolTest::no_overflow);
4358
  format %{ "" %}
4359
  interface(COND_INTER) %{
4360
    equal        (0x0C8);
4361
    not_equal    (0x1C8);
4362
    less         (0x0C0);
4363
    greater_equal(0x1C0);
4364
    less_equal   (0x0D0);
4365
    greater      (0x1D0);
4366
    overflow(0x0, "o"); // not really supported by the instruction
4367
    no_overflow(0x1, "no"); // not really supported by the instruction
4368
  %}
4369
%}
4370

4371
// Comparison Code used in long compares
4372
operand cmpOp_commute() %{
4373
  match(Bool);
4374

4375
  format %{ "" %}
4376
  interface(COND_INTER) %{
4377
    equal(0x4, "e");
4378
    not_equal(0x5, "ne");
4379
    less(0xF, "g");
4380
    greater_equal(0xE, "le");
4381
    less_equal(0xD, "ge");
4382
    greater(0xC, "l");
4383
    overflow(0x0, "o");
4384
    no_overflow(0x1, "no");
4385
  %}
4386
%}
4387

4388
// Comparison Code used in unsigned long compares
4389
operand cmpOpU_commute() %{
4390
  match(Bool);
4391

4392
  format %{ "" %}
4393
  interface(COND_INTER) %{
4394
    equal(0x4, "e");
4395
    not_equal(0x5, "ne");
4396
    less(0x7, "nbe");
4397
    greater_equal(0x6, "be");
4398
    less_equal(0x3, "nb");
4399
    greater(0x2, "b");
4400
    overflow(0x0, "o");
4401
    no_overflow(0x1, "no");
4402
  %}
4403
%}
4404

4405
//----------OPERAND CLASSES----------------------------------------------------
4406
// Operand Classes are groups of operands that are used as to simplify
4407
// instruction definitions by not requiring the AD writer to specify separate
4408
// instructions for every form of operand when the instruction accepts
4409
// multiple operand types with the same basic encoding and format.  The classic
4410
// case of this is memory operands.
4411

4412
opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4413
               indIndex, indIndexScale, indIndexScaleOffset);
4414

4415
// Long memory operations are encoded in 2 instructions and a +4 offset.
4416
// This means some kind of offset is always required and you cannot use
4417
// an oop as the offset (done when working on static globals).
4418
opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4419
                    indIndex, indIndexScale, indIndexScaleOffset);
4420

4421

4422
//----------PIPELINE-----------------------------------------------------------
4423
// Rules which define the behavior of the target architectures pipeline.
4424
pipeline %{
4425

4426
//----------ATTRIBUTES---------------------------------------------------------
4427
attributes %{
4428
  variable_size_instructions;        // Fixed size instructions
4429
  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4430
  instruction_unit_size = 1;         // An instruction is 1 bytes long
4431
  instruction_fetch_unit_size = 16;  // The processor fetches one line
4432
  instruction_fetch_units = 1;       // of 16 bytes
4433

4434
  // List of nop instructions
4435
  nops( MachNop );
4436
%}
4437

4438
//----------RESOURCES----------------------------------------------------------
4439
// Resources are the functional units available to the machine
4440

4441
// Generic P2/P3 pipeline
4442
// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4443
// 3 instructions decoded per cycle.
4444
// 2 load/store ops per cycle, 1 branch, 1 FPU,
4445
// 2 ALU op, only ALU0 handles mul/div instructions.
4446
resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4447
           MS0, MS1, MEM = MS0 | MS1,
4448
           BR, FPU,
4449
           ALU0, ALU1, ALU = ALU0 | ALU1 );
4450

4451
//----------PIPELINE DESCRIPTION-----------------------------------------------
4452
// Pipeline Description specifies the stages in the machine's pipeline
4453

4454
// Generic P2/P3 pipeline
4455
pipe_desc(S0, S1, S2, S3, S4, S5);
4456

4457
//----------PIPELINE CLASSES---------------------------------------------------
4458
// Pipeline Classes describe the stages in which input and output are
4459
// referenced by the hardware pipeline.
4460

4461
// Naming convention: ialu or fpu
4462
// Then: _reg
4463
// Then: _reg if there is a 2nd register
4464
// Then: _long if it's a pair of instructions implementing a long
4465
// Then: _fat if it requires the big decoder
4466
//   Or: _mem if it requires the big decoder and a memory unit.
4467

4468
// Integer ALU reg operation
4469
pipe_class ialu_reg(rRegI dst) %{
4470
    single_instruction;
4471
    dst    : S4(write);
4472
    dst    : S3(read);
4473
    DECODE : S0;        // any decoder
4474
    ALU    : S3;        // any alu
4475
%}
4476

4477
// Long ALU reg operation
4478
pipe_class ialu_reg_long(eRegL dst) %{
4479
    instruction_count(2);
4480
    dst    : S4(write);
4481
    dst    : S3(read);
4482
    DECODE : S0(2);     // any 2 decoders
4483
    ALU    : S3(2);     // both alus
4484
%}
4485

4486
// Integer ALU reg operation using big decoder
4487
pipe_class ialu_reg_fat(rRegI dst) %{
4488
    single_instruction;
4489
    dst    : S4(write);
4490
    dst    : S3(read);
4491
    D0     : S0;        // big decoder only
4492
    ALU    : S3;        // any alu
4493
%}
4494

4495
// Long ALU reg operation using big decoder
4496
pipe_class ialu_reg_long_fat(eRegL dst) %{
4497
    instruction_count(2);
4498
    dst    : S4(write);
4499
    dst    : S3(read);
4500
    D0     : S0(2);     // big decoder only; twice
4501
    ALU    : S3(2);     // any 2 alus
4502
%}
4503

4504
// Integer ALU reg-reg operation
4505
pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4506
    single_instruction;
4507
    dst    : S4(write);
4508
    src    : S3(read);
4509
    DECODE : S0;        // any decoder
4510
    ALU    : S3;        // any alu
4511
%}
4512

4513
// Long ALU reg-reg operation
4514
pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4515
    instruction_count(2);
4516
    dst    : S4(write);
4517
    src    : S3(read);
4518
    DECODE : S0(2);     // any 2 decoders
4519
    ALU    : S3(2);     // both alus
4520
%}
4521

4522
// Integer ALU reg-reg operation
4523
pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4524
    single_instruction;
4525
    dst    : S4(write);
4526
    src    : S3(read);
4527
    D0     : S0;        // big decoder only
4528
    ALU    : S3;        // any alu
4529
%}
4530

4531
// Long ALU reg-reg operation
4532
pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4533
    instruction_count(2);
4534
    dst    : S4(write);
4535
    src    : S3(read);
4536
    D0     : S0(2);     // big decoder only; twice
4537
    ALU    : S3(2);     // both alus
4538
%}
4539

4540
// Integer ALU reg-mem operation
4541
pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4542
    single_instruction;
4543
    dst    : S5(write);
4544
    mem    : S3(read);
4545
    D0     : S0;        // big decoder only
4546
    ALU    : S4;        // any alu
4547
    MEM    : S3;        // any mem
4548
%}
4549

4550
// Long ALU reg-mem operation
4551
pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4552
    instruction_count(2);
4553
    dst    : S5(write);
4554
    mem    : S3(read);
4555
    D0     : S0(2);     // big decoder only; twice
4556
    ALU    : S4(2);     // any 2 alus
4557
    MEM    : S3(2);     // both mems
4558
%}
4559

4560
// Integer mem operation (prefetch)
4561
pipe_class ialu_mem(memory mem)
4562
%{
4563
    single_instruction;
4564
    mem    : S3(read);
4565
    D0     : S0;        // big decoder only
4566
    MEM    : S3;        // any mem
4567
%}
4568

4569
// Integer Store to Memory
4570
pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4571
    single_instruction;
4572
    mem    : S3(read);
4573
    src    : S5(read);
4574
    D0     : S0;        // big decoder only
4575
    ALU    : S4;        // any alu
4576
    MEM    : S3;
4577
%}
4578

4579
// Long Store to Memory
4580
pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4581
    instruction_count(2);
4582
    mem    : S3(read);
4583
    src    : S5(read);
4584
    D0     : S0(2);     // big decoder only; twice
4585
    ALU    : S4(2);     // any 2 alus
4586
    MEM    : S3(2);     // Both mems
4587
%}
4588

4589
// Integer Store to Memory
4590
pipe_class ialu_mem_imm(memory mem) %{
4591
    single_instruction;
4592
    mem    : S3(read);
4593
    D0     : S0;        // big decoder only
4594
    ALU    : S4;        // any alu
4595
    MEM    : S3;
4596
%}
4597

4598
// Integer ALU0 reg-reg operation
4599
pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4600
    single_instruction;
4601
    dst    : S4(write);
4602
    src    : S3(read);
4603
    D0     : S0;        // Big decoder only
4604
    ALU0   : S3;        // only alu0
4605
%}
4606

4607
// Integer ALU0 reg-mem operation
4608
pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4609
    single_instruction;
4610
    dst    : S5(write);
4611
    mem    : S3(read);
4612
    D0     : S0;        // big decoder only
4613
    ALU0   : S4;        // ALU0 only
4614
    MEM    : S3;        // any mem
4615
%}
4616

4617
// Integer ALU reg-reg operation
4618
pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4619
    single_instruction;
4620
    cr     : S4(write);
4621
    src1   : S3(read);
4622
    src2   : S3(read);
4623
    DECODE : S0;        // any decoder
4624
    ALU    : S3;        // any alu
4625
%}
4626

4627
// Integer ALU reg-imm operation
4628
pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4629
    single_instruction;
4630
    cr     : S4(write);
4631
    src1   : S3(read);
4632
    DECODE : S0;        // any decoder
4633
    ALU    : S3;        // any alu
4634
%}
4635

4636
// Integer ALU reg-mem operation
4637
pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4638
    single_instruction;
4639
    cr     : S4(write);
4640
    src1   : S3(read);
4641
    src2   : S3(read);
4642
    D0     : S0;        // big decoder only
4643
    ALU    : S4;        // any alu
4644
    MEM    : S3;
4645
%}
4646

4647
// Conditional move reg-reg
4648
pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4649
    instruction_count(4);
4650
    y      : S4(read);
4651
    q      : S3(read);
4652
    p      : S3(read);
4653
    DECODE : S0(4);     // any decoder
4654
%}
4655

4656
// Conditional move reg-reg
4657
pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4658
    single_instruction;
4659
    dst    : S4(write);
4660
    src    : S3(read);
4661
    cr     : S3(read);
4662
    DECODE : S0;        // any decoder
4663
%}
4664

4665
// Conditional move reg-mem
4666
pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4667
    single_instruction;
4668
    dst    : S4(write);
4669
    src    : S3(read);
4670
    cr     : S3(read);
4671
    DECODE : S0;        // any decoder
4672
    MEM    : S3;
4673
%}
4674

4675
// Conditional move reg-reg long
4676
pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4677
    single_instruction;
4678
    dst    : S4(write);
4679
    src    : S3(read);
4680
    cr     : S3(read);
4681
    DECODE : S0(2);     // any 2 decoders
4682
%}
4683

4684
// Conditional move double reg-reg
4685
pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4686
    single_instruction;
4687
    dst    : S4(write);
4688
    src    : S3(read);
4689
    cr     : S3(read);
4690
    DECODE : S0;        // any decoder
4691
%}
4692

4693
// Float reg-reg operation
4694
pipe_class fpu_reg(regDPR dst) %{
4695
    instruction_count(2);
4696
    dst    : S3(read);
4697
    DECODE : S0(2);     // any 2 decoders
4698
    FPU    : S3;
4699
%}
4700

4701
// Float reg-reg operation
4702
pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4703
    instruction_count(2);
4704
    dst    : S4(write);
4705
    src    : S3(read);
4706
    DECODE : S0(2);     // any 2 decoders
4707
    FPU    : S3;
4708
%}
4709

4710
// Float reg-reg operation
4711
pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4712
    instruction_count(3);
4713
    dst    : S4(write);
4714
    src1   : S3(read);
4715
    src2   : S3(read);
4716
    DECODE : S0(3);     // any 3 decoders
4717
    FPU    : S3(2);
4718
%}
4719

4720
// Float reg-reg operation
4721
pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4722
    instruction_count(4);
4723
    dst    : S4(write);
4724
    src1   : S3(read);
4725
    src2   : S3(read);
4726
    src3   : S3(read);
4727
    DECODE : S0(4);     // any 3 decoders
4728
    FPU    : S3(2);
4729
%}
4730

4731
// Float reg-reg operation
4732
pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4733
    instruction_count(4);
4734
    dst    : S4(write);
4735
    src1   : S3(read);
4736
    src2   : S3(read);
4737
    src3   : S3(read);
4738
    DECODE : S1(3);     // any 3 decoders
4739
    D0     : S0;        // Big decoder only
4740
    FPU    : S3(2);
4741
    MEM    : S3;
4742
%}
4743

4744
// Float reg-mem operation
4745
pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4746
    instruction_count(2);
4747
    dst    : S5(write);
4748
    mem    : S3(read);
4749
    D0     : S0;        // big decoder only
4750
    DECODE : S1;        // any decoder for FPU POP
4751
    FPU    : S4;
4752
    MEM    : S3;        // any mem
4753
%}
4754

4755
// Float reg-mem operation
4756
pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4757
    instruction_count(3);
4758
    dst    : S5(write);
4759
    src1   : S3(read);
4760
    mem    : S3(read);
4761
    D0     : S0;        // big decoder only
4762
    DECODE : S1(2);     // any decoder for FPU POP
4763
    FPU    : S4;
4764
    MEM    : S3;        // any mem
4765
%}
4766

4767
// Float mem-reg operation
4768
pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4769
    instruction_count(2);
4770
    src    : S5(read);
4771
    mem    : S3(read);
4772
    DECODE : S0;        // any decoder for FPU PUSH
4773
    D0     : S1;        // big decoder only
4774
    FPU    : S4;
4775
    MEM    : S3;        // any mem
4776
%}
4777

4778
pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4779
    instruction_count(3);
4780
    src1   : S3(read);
4781
    src2   : S3(read);
4782
    mem    : S3(read);
4783
    DECODE : S0(2);     // any decoder for FPU PUSH
4784
    D0     : S1;        // big decoder only
4785
    FPU    : S4;
4786
    MEM    : S3;        // any mem
4787
%}
4788

4789
pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4790
    instruction_count(3);
4791
    src1   : S3(read);
4792
    src2   : S3(read);
4793
    mem    : S4(read);
4794
    DECODE : S0;        // any decoder for FPU PUSH
4795
    D0     : S0(2);     // big decoder only
4796
    FPU    : S4;
4797
    MEM    : S3(2);     // any mem
4798
%}
4799

4800
pipe_class fpu_mem_mem(memory dst, memory src1) %{
4801
    instruction_count(2);
4802
    src1   : S3(read);
4803
    dst    : S4(read);
4804
    D0     : S0(2);     // big decoder only
4805
    MEM    : S3(2);     // any mem
4806
%}
4807

4808
pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4809
    instruction_count(3);
4810
    src1   : S3(read);
4811
    src2   : S3(read);
4812
    dst    : S4(read);
4813
    D0     : S0(3);     // big decoder only
4814
    FPU    : S4;
4815
    MEM    : S3(3);     // any mem
4816
%}
4817

4818
pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4819
    instruction_count(3);
4820
    src1   : S4(read);
4821
    mem    : S4(read);
4822
    DECODE : S0;        // any decoder for FPU PUSH
4823
    D0     : S0(2);     // big decoder only
4824
    FPU    : S4;
4825
    MEM    : S3(2);     // any mem
4826
%}
4827

4828
// Float load constant
4829
pipe_class fpu_reg_con(regDPR dst) %{
4830
    instruction_count(2);
4831
    dst    : S5(write);
4832
    D0     : S0;        // big decoder only for the load
4833
    DECODE : S1;        // any decoder for FPU POP
4834
    FPU    : S4;
4835
    MEM    : S3;        // any mem
4836
%}
4837

4838
// Float load constant
4839
pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4840
    instruction_count(3);
4841
    dst    : S5(write);
4842
    src    : S3(read);
4843
    D0     : S0;        // big decoder only for the load
4844
    DECODE : S1(2);     // any decoder for FPU POP
4845
    FPU    : S4;
4846
    MEM    : S3;        // any mem
4847
%}
4848

4849
// UnConditional branch
4850
pipe_class pipe_jmp( label labl ) %{
4851
    single_instruction;
4852
    BR   : S3;
4853
%}
4854

4855
// Conditional branch
4856
pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
4857
    single_instruction;
4858
    cr    : S1(read);
4859
    BR    : S3;
4860
%}
4861

4862
// Allocation idiom
4863
pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
4864
    instruction_count(1); force_serialization;
4865
    fixed_latency(6);
4866
    heap_ptr : S3(read);
4867
    DECODE   : S0(3);
4868
    D0       : S2;
4869
    MEM      : S3;
4870
    ALU      : S3(2);
4871
    dst      : S5(write);
4872
    BR       : S5;
4873
%}
4874

4875
// Generic big/slow expanded idiom
4876
pipe_class pipe_slow(  ) %{
4877
    instruction_count(10); multiple_bundles; force_serialization;
4878
    fixed_latency(100);
4879
    D0  : S0(2);
4880
    MEM : S3(2);
4881
%}
4882

4883
// The real do-nothing guy
4884
pipe_class empty( ) %{
4885
    instruction_count(0);
4886
%}
4887

4888
// Define the class for the Nop node
4889
define %{
4890
   MachNop = empty;
4891
%}
4892

4893
%}
4894

4895
//----------INSTRUCTIONS-------------------------------------------------------
4896
//
4897
// match      -- States which machine-independent subtree may be replaced
4898
//               by this instruction.
4899
// ins_cost   -- The estimated cost of this instruction is used by instruction
4900
//               selection to identify a minimum cost tree of machine
4901
//               instructions that matches a tree of machine-independent
4902
//               instructions.
4903
// format     -- A string providing the disassembly for this instruction.
4904
//               The value of an instruction's operand may be inserted
4905
//               by referring to it with a '$' prefix.
4906
// opcode     -- Three instruction opcodes may be provided.  These are referred
4907
//               to within an encode class as $primary, $secondary, and $tertiary
4908
//               respectively.  The primary opcode is commonly used to
4909
//               indicate the type of machine instruction, while secondary
4910
//               and tertiary are often used for prefix options or addressing
4911
//               modes.
4912
// ins_encode -- A list of encode classes with parameters. The encode class
4913
//               name must have been defined in an 'enc_class' specification
4914
//               in the encode section of the architecture description.
4915

4916
// Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
4917
// Load Float
4918
instruct MoveF2LEG(legRegF dst, regF src) %{
4919
  match(Set dst src);
4920
  format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
4921
  ins_encode %{
4922
    ShouldNotReachHere();
4923
  %}
4924
  ins_pipe( fpu_reg_reg );
4925
%}
4926

4927
// Load Float
4928
instruct MoveLEG2F(regF dst, legRegF src) %{
4929
  match(Set dst src);
4930
  format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
4931
  ins_encode %{
4932
    ShouldNotReachHere();
4933
  %}
4934
  ins_pipe( fpu_reg_reg );
4935
%}
4936

4937
// Load Float
4938
instruct MoveF2VL(vlRegF dst, regF src) %{
4939
  match(Set dst src);
4940
  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
4941
  ins_encode %{
4942
    ShouldNotReachHere();
4943
  %}
4944
  ins_pipe( fpu_reg_reg );
4945
%}
4946

4947
// Load Float
4948
instruct MoveVL2F(regF dst, vlRegF src) %{
4949
  match(Set dst src);
4950
  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
4951
  ins_encode %{
4952
    ShouldNotReachHere();
4953
  %}
4954
  ins_pipe( fpu_reg_reg );
4955
%}
4956

4957

4958

4959
// Load Double
4960
instruct MoveD2LEG(legRegD dst, regD src) %{
4961
  match(Set dst src);
4962
  format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
4963
  ins_encode %{
4964
    ShouldNotReachHere();
4965
  %}
4966
  ins_pipe( fpu_reg_reg );
4967
%}
4968

4969
// Load Double
4970
instruct MoveLEG2D(regD dst, legRegD src) %{
4971
  match(Set dst src);
4972
  format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
4973
  ins_encode %{
4974
    ShouldNotReachHere();
4975
  %}
4976
  ins_pipe( fpu_reg_reg );
4977
%}
4978

4979
// Load Double
4980
instruct MoveD2VL(vlRegD dst, regD src) %{
4981
  match(Set dst src);
4982
  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
4983
  ins_encode %{
4984
    ShouldNotReachHere();
4985
  %}
4986
  ins_pipe( fpu_reg_reg );
4987
%}
4988

4989
// Load Double
4990
instruct MoveVL2D(regD dst, vlRegD src) %{
4991
  match(Set dst src);
4992
  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
4993
  ins_encode %{
4994
    ShouldNotReachHere();
4995
  %}
4996
  ins_pipe( fpu_reg_reg );
4997
%}
4998

4999
//----------BSWAP-Instruction--------------------------------------------------
5000
instruct bytes_reverse_int(rRegI dst) %{
5001
  match(Set dst (ReverseBytesI dst));
5002

5003
  format %{ "BSWAP  $dst" %}
5004
  opcode(0x0F, 0xC8);
5005
  ins_encode( OpcP, OpcSReg(dst) );
5006
  ins_pipe( ialu_reg );
5007
%}
5008

5009
instruct bytes_reverse_long(eRegL dst) %{
5010
  match(Set dst (ReverseBytesL dst));
5011

5012
  format %{ "BSWAP  $dst.lo\n\t"
5013
            "BSWAP  $dst.hi\n\t"
5014
            "XCHG   $dst.lo $dst.hi" %}
5015

5016
  ins_cost(125);
5017
  ins_encode( bswap_long_bytes(dst) );
5018
  ins_pipe( ialu_reg_reg);
5019
%}
5020

5021
instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5022
  match(Set dst (ReverseBytesUS dst));
5023
  effect(KILL cr);
5024

5025
  format %{ "BSWAP  $dst\n\t"
5026
            "SHR    $dst,16\n\t" %}
5027
  ins_encode %{
5028
    __ bswapl($dst$$Register);
5029
    __ shrl($dst$$Register, 16);
5030
  %}
5031
  ins_pipe( ialu_reg );
5032
%}
5033

5034
instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5035
  match(Set dst (ReverseBytesS dst));
5036
  effect(KILL cr);
5037

5038
  format %{ "BSWAP  $dst\n\t"
5039
            "SAR    $dst,16\n\t" %}
5040
  ins_encode %{
5041
    __ bswapl($dst$$Register);
5042
    __ sarl($dst$$Register, 16);
5043
  %}
5044
  ins_pipe( ialu_reg );
5045
%}
5046

5047

5048
//---------- Zeros Count Instructions ------------------------------------------
5049

5050
instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5051
  predicate(UseCountLeadingZerosInstruction);
5052
  match(Set dst (CountLeadingZerosI src));
5053
  effect(KILL cr);
5054

5055
  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5056
  ins_encode %{
5057
    __ lzcntl($dst$$Register, $src$$Register);
5058
  %}
5059
  ins_pipe(ialu_reg);
5060
%}
5061

5062
instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5063
  predicate(!UseCountLeadingZerosInstruction);
5064
  match(Set dst (CountLeadingZerosI src));
5065
  effect(KILL cr);
5066

5067
  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5068
            "JNZ    skip\n\t"
5069
            "MOV    $dst, -1\n"
5070
      "skip:\n\t"
5071
            "NEG    $dst\n\t"
5072
            "ADD    $dst, 31" %}
5073
  ins_encode %{
5074
    Register Rdst = $dst$$Register;
5075
    Register Rsrc = $src$$Register;
5076
    Label skip;
5077
    __ bsrl(Rdst, Rsrc);
5078
    __ jccb(Assembler::notZero, skip);
5079
    __ movl(Rdst, -1);
5080
    __ bind(skip);
5081
    __ negl(Rdst);
5082
    __ addl(Rdst, BitsPerInt - 1);
5083
  %}
5084
  ins_pipe(ialu_reg);
5085
%}
5086

5087
instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5088
  predicate(UseCountLeadingZerosInstruction);
5089
  match(Set dst (CountLeadingZerosL src));
5090
  effect(TEMP dst, KILL cr);
5091

5092
  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5093
            "JNC    done\n\t"
5094
            "LZCNT  $dst, $src.lo\n\t"
5095
            "ADD    $dst, 32\n"
5096
      "done:" %}
5097
  ins_encode %{
5098
    Register Rdst = $dst$$Register;
5099
    Register Rsrc = $src$$Register;
5100
    Label done;
5101
    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5102
    __ jccb(Assembler::carryClear, done);
5103
    __ lzcntl(Rdst, Rsrc);
5104
    __ addl(Rdst, BitsPerInt);
5105
    __ bind(done);
5106
  %}
5107
  ins_pipe(ialu_reg);
5108
%}
5109

5110
instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5111
  predicate(!UseCountLeadingZerosInstruction);
5112
  match(Set dst (CountLeadingZerosL src));
5113
  effect(TEMP dst, KILL cr);
5114

5115
  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5116
            "JZ     msw_is_zero\n\t"
5117
            "ADD    $dst, 32\n\t"
5118
            "JMP    not_zero\n"
5119
      "msw_is_zero:\n\t"
5120
            "BSR    $dst, $src.lo\n\t"
5121
            "JNZ    not_zero\n\t"
5122
            "MOV    $dst, -1\n"
5123
      "not_zero:\n\t"
5124
            "NEG    $dst\n\t"
5125
            "ADD    $dst, 63\n" %}
5126
 ins_encode %{
5127
    Register Rdst = $dst$$Register;
5128
    Register Rsrc = $src$$Register;
5129
    Label msw_is_zero;
5130
    Label not_zero;
5131
    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5132
    __ jccb(Assembler::zero, msw_is_zero);
5133
    __ addl(Rdst, BitsPerInt);
5134
    __ jmpb(not_zero);
5135
    __ bind(msw_is_zero);
5136
    __ bsrl(Rdst, Rsrc);
5137
    __ jccb(Assembler::notZero, not_zero);
5138
    __ movl(Rdst, -1);
5139
    __ bind(not_zero);
5140
    __ negl(Rdst);
5141
    __ addl(Rdst, BitsPerLong - 1);
5142
  %}
5143
  ins_pipe(ialu_reg);
5144
%}
5145

5146
instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5147
  predicate(UseCountTrailingZerosInstruction);
5148
  match(Set dst (CountTrailingZerosI src));
5149
  effect(KILL cr);
5150

5151
  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5152
  ins_encode %{
5153
    __ tzcntl($dst$$Register, $src$$Register);
5154
  %}
5155
  ins_pipe(ialu_reg);
5156
%}
5157

5158
instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5159
  predicate(!UseCountTrailingZerosInstruction);
5160
  match(Set dst (CountTrailingZerosI src));
5161
  effect(KILL cr);
5162

5163
  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5164
            "JNZ    done\n\t"
5165
            "MOV    $dst, 32\n"
5166
      "done:" %}
5167
  ins_encode %{
5168
    Register Rdst = $dst$$Register;
5169
    Label done;
5170
    __ bsfl(Rdst, $src$$Register);
5171
    __ jccb(Assembler::notZero, done);
5172
    __ movl(Rdst, BitsPerInt);
5173
    __ bind(done);
5174
  %}
5175
  ins_pipe(ialu_reg);
5176
%}
5177

5178
instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5179
  predicate(UseCountTrailingZerosInstruction);
5180
  match(Set dst (CountTrailingZerosL src));
5181
  effect(TEMP dst, KILL cr);
5182

5183
  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5184
            "JNC    done\n\t"
5185
            "TZCNT  $dst, $src.hi\n\t"
5186
            "ADD    $dst, 32\n"
5187
            "done:" %}
5188
  ins_encode %{
5189
    Register Rdst = $dst$$Register;
5190
    Register Rsrc = $src$$Register;
5191
    Label done;
5192
    __ tzcntl(Rdst, Rsrc);
5193
    __ jccb(Assembler::carryClear, done);
5194
    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5195
    __ addl(Rdst, BitsPerInt);
5196
    __ bind(done);
5197
  %}
5198
  ins_pipe(ialu_reg);
5199
%}
5200

5201
instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5202
  predicate(!UseCountTrailingZerosInstruction);
5203
  match(Set dst (CountTrailingZerosL src));
5204
  effect(TEMP dst, KILL cr);
5205

5206
  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5207
            "JNZ    done\n\t"
5208
            "BSF    $dst, $src.hi\n\t"
5209
            "JNZ    msw_not_zero\n\t"
5210
            "MOV    $dst, 32\n"
5211
      "msw_not_zero:\n\t"
5212
            "ADD    $dst, 32\n"
5213
      "done:" %}
5214
  ins_encode %{
5215
    Register Rdst = $dst$$Register;
5216
    Register Rsrc = $src$$Register;
5217
    Label msw_not_zero;
5218
    Label done;
5219
    __ bsfl(Rdst, Rsrc);
5220
    __ jccb(Assembler::notZero, done);
5221
    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5222
    __ jccb(Assembler::notZero, msw_not_zero);
5223
    __ movl(Rdst, BitsPerInt);
5224
    __ bind(msw_not_zero);
5225
    __ addl(Rdst, BitsPerInt);
5226
    __ bind(done);
5227
  %}
5228
  ins_pipe(ialu_reg);
5229
%}
5230

5231

5232
//---------- Population Count Instructions -------------------------------------
5233

5234
instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5235
  predicate(UsePopCountInstruction);
5236
  match(Set dst (PopCountI src));
5237
  effect(KILL cr);
5238

5239
  format %{ "POPCNT $dst, $src" %}
5240
  ins_encode %{
5241
    __ popcntl($dst$$Register, $src$$Register);
5242
  %}
5243
  ins_pipe(ialu_reg);
5244
%}
5245

5246
instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5247
  predicate(UsePopCountInstruction);
5248
  match(Set dst (PopCountI (LoadI mem)));
5249
  effect(KILL cr);
5250

5251
  format %{ "POPCNT $dst, $mem" %}
5252
  ins_encode %{
5253
    __ popcntl($dst$$Register, $mem$$Address);
5254
  %}
5255
  ins_pipe(ialu_reg);
5256
%}
5257

5258
// Note: Long.bitCount(long) returns an int.
5259
instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5260
  predicate(UsePopCountInstruction);
5261
  match(Set dst (PopCountL src));
5262
  effect(KILL cr, TEMP tmp, TEMP dst);
5263

5264
  format %{ "POPCNT $dst, $src.lo\n\t"
5265
            "POPCNT $tmp, $src.hi\n\t"
5266
            "ADD    $dst, $tmp" %}
5267
  ins_encode %{
5268
    __ popcntl($dst$$Register, $src$$Register);
5269
    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5270
    __ addl($dst$$Register, $tmp$$Register);
5271
  %}
5272
  ins_pipe(ialu_reg);
5273
%}
5274

5275
// Note: Long.bitCount(long) returns an int.
5276
instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5277
  predicate(UsePopCountInstruction);
5278
  match(Set dst (PopCountL (LoadL mem)));
5279
  effect(KILL cr, TEMP tmp, TEMP dst);
5280

5281
  format %{ "POPCNT $dst, $mem\n\t"
5282
            "POPCNT $tmp, $mem+4\n\t"
5283
            "ADD    $dst, $tmp" %}
5284
  ins_encode %{
5285
    //__ popcntl($dst$$Register, $mem$$Address$$first);
5286
    //__ popcntl($tmp$$Register, $mem$$Address$$second);
5287
    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5288
    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5289
    __ addl($dst$$Register, $tmp$$Register);
5290
  %}
5291
  ins_pipe(ialu_reg);
5292
%}
5293

5294

5295
//----------Load/Store/Move Instructions---------------------------------------
5296
//----------Load Instructions--------------------------------------------------
5297
// Load Byte (8bit signed)
5298
instruct loadB(xRegI dst, memory mem) %{
5299
  match(Set dst (LoadB mem));
5300

5301
  ins_cost(125);
5302
  format %{ "MOVSX8 $dst,$mem\t# byte" %}
5303

5304
  ins_encode %{
5305
    __ movsbl($dst$$Register, $mem$$Address);
5306
  %}
5307

5308
  ins_pipe(ialu_reg_mem);
5309
%}
5310

5311
// Load Byte (8bit signed) into Long Register
5312
instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5313
  match(Set dst (ConvI2L (LoadB mem)));
5314
  effect(KILL cr);
5315

5316
  ins_cost(375);
5317
  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5318
            "MOV    $dst.hi,$dst.lo\n\t"
5319
            "SAR    $dst.hi,7" %}
5320

5321
  ins_encode %{
5322
    __ movsbl($dst$$Register, $mem$$Address);
5323
    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5324
    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5325
  %}
5326

5327
  ins_pipe(ialu_reg_mem);
5328
%}
5329

5330
// Load Unsigned Byte (8bit UNsigned)
5331
instruct loadUB(xRegI dst, memory mem) %{
5332
  match(Set dst (LoadUB mem));
5333

5334
  ins_cost(125);
5335
  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5336

5337
  ins_encode %{
5338
    __ movzbl($dst$$Register, $mem$$Address);
5339
  %}
5340

5341
  ins_pipe(ialu_reg_mem);
5342
%}
5343

5344
// Load Unsigned Byte (8 bit UNsigned) into Long Register
5345
instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5346
  match(Set dst (ConvI2L (LoadUB mem)));
5347
  effect(KILL cr);
5348

5349
  ins_cost(250);
5350
  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5351
            "XOR    $dst.hi,$dst.hi" %}
5352

5353
  ins_encode %{
5354
    Register Rdst = $dst$$Register;
5355
    __ movzbl(Rdst, $mem$$Address);
5356
    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5357
  %}
5358

5359
  ins_pipe(ialu_reg_mem);
5360
%}
5361

5362
// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5363
instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5364
  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5365
  effect(KILL cr);
5366

5367
  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5368
            "XOR    $dst.hi,$dst.hi\n\t"
5369
            "AND    $dst.lo,right_n_bits($mask, 8)" %}
5370
  ins_encode %{
5371
    Register Rdst = $dst$$Register;
5372
    __ movzbl(Rdst, $mem$$Address);
5373
    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5374
    __ andl(Rdst, $mask$$constant & right_n_bits(8));
5375
  %}
5376
  ins_pipe(ialu_reg_mem);
5377
%}
5378

5379
// Load Short (16bit signed)
5380
instruct loadS(rRegI dst, memory mem) %{
5381
  match(Set dst (LoadS mem));
5382

5383
  ins_cost(125);
5384
  format %{ "MOVSX  $dst,$mem\t# short" %}
5385

5386
  ins_encode %{
5387
    __ movswl($dst$$Register, $mem$$Address);
5388
  %}
5389

5390
  ins_pipe(ialu_reg_mem);
5391
%}
5392

5393
// Load Short (16 bit signed) to Byte (8 bit signed)
5394
instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5395
  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5396

5397
  ins_cost(125);
5398
  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5399
  ins_encode %{
5400
    __ movsbl($dst$$Register, $mem$$Address);
5401
  %}
5402
  ins_pipe(ialu_reg_mem);
5403
%}
5404

5405
// Load Short (16bit signed) into Long Register
5406
instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5407
  match(Set dst (ConvI2L (LoadS mem)));
5408
  effect(KILL cr);
5409

5410
  ins_cost(375);
5411
  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5412
            "MOV    $dst.hi,$dst.lo\n\t"
5413
            "SAR    $dst.hi,15" %}
5414

5415
  ins_encode %{
5416
    __ movswl($dst$$Register, $mem$$Address);
5417
    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5418
    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5419
  %}
5420

5421
  ins_pipe(ialu_reg_mem);
5422
%}
5423

5424
// Load Unsigned Short/Char (16bit unsigned)
5425
instruct loadUS(rRegI dst, memory mem) %{
5426
  match(Set dst (LoadUS mem));
5427

5428
  ins_cost(125);
5429
  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5430

5431
  ins_encode %{
5432
    __ movzwl($dst$$Register, $mem$$Address);
5433
  %}
5434

5435
  ins_pipe(ialu_reg_mem);
5436
%}
5437

5438
// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5439
instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5440
  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5441

5442
  ins_cost(125);
5443
  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5444
  ins_encode %{
5445
    __ movsbl($dst$$Register, $mem$$Address);
5446
  %}
5447
  ins_pipe(ialu_reg_mem);
5448
%}
5449

5450
// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5451
instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5452
  match(Set dst (ConvI2L (LoadUS mem)));
5453
  effect(KILL cr);
5454

5455
  ins_cost(250);
5456
  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5457
            "XOR    $dst.hi,$dst.hi" %}
5458

5459
  ins_encode %{
5460
    __ movzwl($dst$$Register, $mem$$Address);
5461
    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5462
  %}
5463

5464
  ins_pipe(ialu_reg_mem);
5465
%}
5466

5467
// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5468
instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5469
  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5470
  effect(KILL cr);
5471

5472
  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5473
            "XOR    $dst.hi,$dst.hi" %}
5474
  ins_encode %{
5475
    Register Rdst = $dst$$Register;
5476
    __ movzbl(Rdst, $mem$$Address);
5477
    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5478
  %}
5479
  ins_pipe(ialu_reg_mem);
5480
%}
5481

5482
// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5483
instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5484
  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5485
  effect(KILL cr);
5486

5487
  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5488
            "XOR    $dst.hi,$dst.hi\n\t"
5489
            "AND    $dst.lo,right_n_bits($mask, 16)" %}
5490
  ins_encode %{
5491
    Register Rdst = $dst$$Register;
5492
    __ movzwl(Rdst, $mem$$Address);
5493
    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5494
    __ andl(Rdst, $mask$$constant & right_n_bits(16));
5495
  %}
5496
  ins_pipe(ialu_reg_mem);
5497
%}
5498

5499
// Load Integer
5500
instruct loadI(rRegI dst, memory mem) %{
5501
  match(Set dst (LoadI mem));
5502

5503
  ins_cost(125);
5504
  format %{ "MOV    $dst,$mem\t# int" %}
5505

5506
  ins_encode %{
5507
    __ movl($dst$$Register, $mem$$Address);
5508
  %}
5509

5510
  ins_pipe(ialu_reg_mem);
5511
%}
5512

5513
// Load Integer (32 bit signed) to Byte (8 bit signed)
5514
instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5515
  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5516

5517
  ins_cost(125);
5518
  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5519
  ins_encode %{
5520
    __ movsbl($dst$$Register, $mem$$Address);
5521
  %}
5522
  ins_pipe(ialu_reg_mem);
5523
%}
5524

5525
// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5526
instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5527
  match(Set dst (AndI (LoadI mem) mask));
5528

5529
  ins_cost(125);
5530
  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5531
  ins_encode %{
5532
    __ movzbl($dst$$Register, $mem$$Address);
5533
  %}
5534
  ins_pipe(ialu_reg_mem);
5535
%}
5536

5537
// Load Integer (32 bit signed) to Short (16 bit signed)
5538
instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5539
  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5540

5541
  ins_cost(125);
5542
  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5543
  ins_encode %{
5544
    __ movswl($dst$$Register, $mem$$Address);
5545
  %}
5546
  ins_pipe(ialu_reg_mem);
5547
%}
5548

5549
// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5550
instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5551
  match(Set dst (AndI (LoadI mem) mask));
5552

5553
  ins_cost(125);
5554
  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5555
  ins_encode %{
5556
    __ movzwl($dst$$Register, $mem$$Address);
5557
  %}
5558
  ins_pipe(ialu_reg_mem);
5559
%}
5560

5561
// Load Integer into Long Register
5562
instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5563
  match(Set dst (ConvI2L (LoadI mem)));
5564
  effect(KILL cr);
5565

5566
  ins_cost(375);
5567
  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5568
            "MOV    $dst.hi,$dst.lo\n\t"
5569
            "SAR    $dst.hi,31" %}
5570

5571
  ins_encode %{
5572
    __ movl($dst$$Register, $mem$$Address);
5573
    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5574
    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5575
  %}
5576

5577
  ins_pipe(ialu_reg_mem);
5578
%}
5579

5580
// Load Integer with mask 0xFF into Long Register
5581
instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5582
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5583
  effect(KILL cr);
5584

5585
  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5586
            "XOR    $dst.hi,$dst.hi" %}
5587
  ins_encode %{
5588
    Register Rdst = $dst$$Register;
5589
    __ movzbl(Rdst, $mem$$Address);
5590
    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5591
  %}
5592
  ins_pipe(ialu_reg_mem);
5593
%}
5594

5595
// Load Integer with mask 0xFFFF into Long Register
5596
instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5597
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5598
  effect(KILL cr);
5599

5600
  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5601
            "XOR    $dst.hi,$dst.hi" %}
5602
  ins_encode %{
5603
    Register Rdst = $dst$$Register;
5604
    __ movzwl(Rdst, $mem$$Address);
5605
    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5606
  %}
5607
  ins_pipe(ialu_reg_mem);
5608
%}
5609

5610
// Load Integer with 31-bit mask into Long Register
5611
instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5612
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5613
  effect(KILL cr);
5614

5615
  format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5616
            "XOR    $dst.hi,$dst.hi\n\t"
5617
            "AND    $dst.lo,$mask" %}
5618
  ins_encode %{
5619
    Register Rdst = $dst$$Register;
5620
    __ movl(Rdst, $mem$$Address);
5621
    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5622
    __ andl(Rdst, $mask$$constant);
5623
  %}
5624
  ins_pipe(ialu_reg_mem);
5625
%}
5626

5627
// Load Unsigned Integer into Long Register
5628
instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5629
  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5630
  effect(KILL cr);
5631

5632
  ins_cost(250);
5633
  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5634
            "XOR    $dst.hi,$dst.hi" %}
5635

5636
  ins_encode %{
5637
    __ movl($dst$$Register, $mem$$Address);
5638
    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5639
  %}
5640

5641
  ins_pipe(ialu_reg_mem);
5642
%}
5643

5644
// Load Long.  Cannot clobber address while loading, so restrict address
5645
// register to ESI
5646
instruct loadL(eRegL dst, load_long_memory mem) %{
5647
  predicate(!((LoadLNode*)n)->require_atomic_access());
5648
  match(Set dst (LoadL mem));
5649

5650
  ins_cost(250);
5651
  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5652
            "MOV    $dst.hi,$mem+4" %}
5653

5654
  ins_encode %{
5655
    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5656
    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5657
    __ movl($dst$$Register, Amemlo);
5658
    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5659
  %}
5660

5661
  ins_pipe(ialu_reg_long_mem);
5662
%}
5663

5664
// Volatile Load Long.  Must be atomic, so do 64-bit FILD
5665
// then store it down to the stack and reload on the int
5666
// side.
5667
instruct loadL_volatile(stackSlotL dst, memory mem) %{
5668
  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5669
  match(Set dst (LoadL mem));
5670

5671
  ins_cost(200);
5672
  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5673
            "FISTp  $dst" %}
5674
  ins_encode(enc_loadL_volatile(mem,dst));
5675
  ins_pipe( fpu_reg_mem );
5676
%}
5677

5678
instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5679
  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5680
  match(Set dst (LoadL mem));
5681
  effect(TEMP tmp);
5682
  ins_cost(180);
5683
  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5684
            "MOVSD  $dst,$tmp" %}
5685
  ins_encode %{
5686
    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5687
    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5688
  %}
5689
  ins_pipe( pipe_slow );
5690
%}
5691

5692
instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5693
  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5694
  match(Set dst (LoadL mem));
5695
  effect(TEMP tmp);
5696
  ins_cost(160);
5697
  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5698
            "MOVD   $dst.lo,$tmp\n\t"
5699
            "PSRLQ  $tmp,32\n\t"
5700
            "MOVD   $dst.hi,$tmp" %}
5701
  ins_encode %{
5702
    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5703
    __ movdl($dst$$Register, $tmp$$XMMRegister);
5704
    __ psrlq($tmp$$XMMRegister, 32);
5705
    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5706
  %}
5707
  ins_pipe( pipe_slow );
5708
%}
5709

5710
// Load Range
5711
instruct loadRange(rRegI dst, memory mem) %{
5712
  match(Set dst (LoadRange mem));
5713

5714
  ins_cost(125);
5715
  format %{ "MOV    $dst,$mem" %}
5716
  opcode(0x8B);
5717
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5718
  ins_pipe( ialu_reg_mem );
5719
%}
5720

5721

5722
// Load Pointer
5723
instruct loadP(eRegP dst, memory mem) %{
5724
  match(Set dst (LoadP mem));
5725

5726
  ins_cost(125);
5727
  format %{ "MOV    $dst,$mem" %}
5728
  opcode(0x8B);
5729
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5730
  ins_pipe( ialu_reg_mem );
5731
%}
5732

5733
// Load Klass Pointer
5734
instruct loadKlass(eRegP dst, memory mem) %{
5735
  match(Set dst (LoadKlass mem));
5736

5737
  ins_cost(125);
5738
  format %{ "MOV    $dst,$mem" %}
5739
  opcode(0x8B);
5740
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5741
  ins_pipe( ialu_reg_mem );
5742
%}
5743

5744
// Load Double
5745
instruct loadDPR(regDPR dst, memory mem) %{
5746
  predicate(UseSSE<=1);
5747
  match(Set dst (LoadD mem));
5748

5749
  ins_cost(150);
5750
  format %{ "FLD_D  ST,$mem\n\t"
5751
            "FSTP   $dst" %}
5752
  opcode(0xDD);               /* DD /0 */
5753
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
5754
              Pop_Reg_DPR(dst), ClearInstMark );
5755
  ins_pipe( fpu_reg_mem );
5756
%}
5757

5758
// Load Double to XMM
5759
instruct loadD(regD dst, memory mem) %{
5760
  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5761
  match(Set dst (LoadD mem));
5762
  ins_cost(145);
5763
  format %{ "MOVSD  $dst,$mem" %}
5764
  ins_encode %{
5765
    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5766
  %}
5767
  ins_pipe( pipe_slow );
5768
%}
5769

5770
instruct loadD_partial(regD dst, memory mem) %{
5771
  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5772
  match(Set dst (LoadD mem));
5773
  ins_cost(145);
5774
  format %{ "MOVLPD $dst,$mem" %}
5775
  ins_encode %{
5776
    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5777
  %}
5778
  ins_pipe( pipe_slow );
5779
%}
5780

5781
// Load to XMM register (single-precision floating point)
5782
// MOVSS instruction
5783
instruct loadF(regF dst, memory mem) %{
5784
  predicate(UseSSE>=1);
5785
  match(Set dst (LoadF mem));
5786
  ins_cost(145);
5787
  format %{ "MOVSS  $dst,$mem" %}
5788
  ins_encode %{
5789
    __ movflt ($dst$$XMMRegister, $mem$$Address);
5790
  %}
5791
  ins_pipe( pipe_slow );
5792
%}
5793

5794
// Load Float
5795
instruct loadFPR(regFPR dst, memory mem) %{
5796
  predicate(UseSSE==0);
5797
  match(Set dst (LoadF mem));
5798

5799
  ins_cost(150);
5800
  format %{ "FLD_S  ST,$mem\n\t"
5801
            "FSTP   $dst" %}
5802
  opcode(0xD9);               /* D9 /0 */
5803
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
5804
              Pop_Reg_FPR(dst), ClearInstMark );
5805
  ins_pipe( fpu_reg_mem );
5806
%}
5807

5808
// Load Effective Address
5809
instruct leaP8(eRegP dst, indOffset8 mem) %{
5810
  match(Set dst mem);
5811

5812
  ins_cost(110);
5813
  format %{ "LEA    $dst,$mem" %}
5814
  opcode(0x8D);
5815
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5816
  ins_pipe( ialu_reg_reg_fat );
5817
%}
5818

5819
instruct leaP32(eRegP dst, indOffset32 mem) %{
5820
  match(Set dst mem);
5821

5822
  ins_cost(110);
5823
  format %{ "LEA    $dst,$mem" %}
5824
  opcode(0x8D);
5825
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5826
  ins_pipe( ialu_reg_reg_fat );
5827
%}
5828

5829
instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5830
  match(Set dst mem);
5831

5832
  ins_cost(110);
5833
  format %{ "LEA    $dst,$mem" %}
5834
  opcode(0x8D);
5835
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5836
  ins_pipe( ialu_reg_reg_fat );
5837
%}
5838

5839
instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5840
  match(Set dst mem);
5841

5842
  ins_cost(110);
5843
  format %{ "LEA    $dst,$mem" %}
5844
  opcode(0x8D);
5845
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5846
  ins_pipe( ialu_reg_reg_fat );
5847
%}
5848

5849
instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5850
  match(Set dst mem);
5851

5852
  ins_cost(110);
5853
  format %{ "LEA    $dst,$mem" %}
5854
  opcode(0x8D);
5855
  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
5856
  ins_pipe( ialu_reg_reg_fat );
5857
%}
5858

5859
// Load Constant
5860
instruct loadConI(rRegI dst, immI src) %{
5861
  match(Set dst src);
5862

5863
  format %{ "MOV    $dst,$src" %}
5864
  ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
5865
  ins_pipe( ialu_reg_fat );
5866
%}
5867

5868
// Load Constant zero
5869
instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
5870
  match(Set dst src);
5871
  effect(KILL cr);
5872

5873
  ins_cost(50);
5874
  format %{ "XOR    $dst,$dst" %}
5875
  opcode(0x33);  /* + rd */
5876
  ins_encode( OpcP, RegReg( dst, dst ) );
5877
  ins_pipe( ialu_reg );
5878
%}
5879

5880
instruct loadConP(eRegP dst, immP src) %{
5881
  match(Set dst src);
5882

5883
  format %{ "MOV    $dst,$src" %}
5884
  opcode(0xB8);  /* + rd */
5885
  ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
5886
  ins_pipe( ialu_reg_fat );
5887
%}
5888

5889
instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5890
  match(Set dst src);
5891
  effect(KILL cr);
5892
  ins_cost(200);
5893
  format %{ "MOV    $dst.lo,$src.lo\n\t"
5894
            "MOV    $dst.hi,$src.hi" %}
5895
  opcode(0xB8);
5896
  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5897
  ins_pipe( ialu_reg_long_fat );
5898
%}
5899

5900
instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5901
  match(Set dst src);
5902
  effect(KILL cr);
5903
  ins_cost(150);
5904
  format %{ "XOR    $dst.lo,$dst.lo\n\t"
5905
            "XOR    $dst.hi,$dst.hi" %}
5906
  opcode(0x33,0x33);
5907
  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5908
  ins_pipe( ialu_reg_long );
5909
%}
5910

5911
// The instruction usage is guarded by predicate in operand immFPR().
5912
instruct loadConFPR(regFPR dst, immFPR con) %{
5913
  match(Set dst con);
5914
  ins_cost(125);
5915
  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5916
            "FSTP   $dst" %}
5917
  ins_encode %{
5918
    __ fld_s($constantaddress($con));
5919
    __ fstp_d($dst$$reg);
5920
  %}
5921
  ins_pipe(fpu_reg_con);
5922
%}
5923

5924
// The instruction usage is guarded by predicate in operand immFPR0().
5925
instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5926
  match(Set dst con);
5927
  ins_cost(125);
5928
  format %{ "FLDZ   ST\n\t"
5929
            "FSTP   $dst" %}
5930
  ins_encode %{
5931
    __ fldz();
5932
    __ fstp_d($dst$$reg);
5933
  %}
5934
  ins_pipe(fpu_reg_con);
5935
%}
5936

5937
// The instruction usage is guarded by predicate in operand immFPR1().
5938
instruct loadConFPR1(regFPR dst, immFPR1 con) %{
5939
  match(Set dst con);
5940
  ins_cost(125);
5941
  format %{ "FLD1   ST\n\t"
5942
            "FSTP   $dst" %}
5943
  ins_encode %{
5944
    __ fld1();
5945
    __ fstp_d($dst$$reg);
5946
  %}
5947
  ins_pipe(fpu_reg_con);
5948
%}
5949

5950
// The instruction usage is guarded by predicate in operand immF().
5951
instruct loadConF(regF dst, immF con) %{
5952
  match(Set dst con);
5953
  ins_cost(125);
5954
  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
5955
  ins_encode %{
5956
    __ movflt($dst$$XMMRegister, $constantaddress($con));
5957
  %}
5958
  ins_pipe(pipe_slow);
5959
%}
5960

5961
// The instruction usage is guarded by predicate in operand immF0().
5962
instruct loadConF0(regF dst, immF0 src) %{
5963
  match(Set dst src);
5964
  ins_cost(100);
5965
  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
5966
  ins_encode %{
5967
    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
5968
  %}
5969
  ins_pipe(pipe_slow);
5970
%}
5971

5972
// The instruction usage is guarded by predicate in operand immDPR().
5973
instruct loadConDPR(regDPR dst, immDPR con) %{
5974
  match(Set dst con);
5975
  ins_cost(125);
5976

5977
  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
5978
            "FSTP   $dst" %}
5979
  ins_encode %{
5980
    __ fld_d($constantaddress($con));
5981
    __ fstp_d($dst$$reg);
5982
  %}
5983
  ins_pipe(fpu_reg_con);
5984
%}
5985

5986
// The instruction usage is guarded by predicate in operand immDPR0().
5987
instruct loadConDPR0(regDPR dst, immDPR0 con) %{
5988
  match(Set dst con);
5989
  ins_cost(125);
5990

5991
  format %{ "FLDZ   ST\n\t"
5992
            "FSTP   $dst" %}
5993
  ins_encode %{
5994
    __ fldz();
5995
    __ fstp_d($dst$$reg);
5996
  %}
5997
  ins_pipe(fpu_reg_con);
5998
%}
5999

6000
// The instruction usage is guarded by predicate in operand immDPR1().
6001
instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6002
  match(Set dst con);
6003
  ins_cost(125);
6004

6005
  format %{ "FLD1   ST\n\t"
6006
            "FSTP   $dst" %}
6007
  ins_encode %{
6008
    __ fld1();
6009
    __ fstp_d($dst$$reg);
6010
  %}
6011
  ins_pipe(fpu_reg_con);
6012
%}
6013

6014
// The instruction usage is guarded by predicate in operand immD().
6015
instruct loadConD(regD dst, immD con) %{
6016
  match(Set dst con);
6017
  ins_cost(125);
6018
  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6019
  ins_encode %{
6020
    __ movdbl($dst$$XMMRegister, $constantaddress($con));
6021
  %}
6022
  ins_pipe(pipe_slow);
6023
%}
6024

6025
// The instruction usage is guarded by predicate in operand immD0().
6026
instruct loadConD0(regD dst, immD0 src) %{
6027
  match(Set dst src);
6028
  ins_cost(100);
6029
  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6030
  ins_encode %{
6031
    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6032
  %}
6033
  ins_pipe( pipe_slow );
6034
%}
6035

6036
// Load Stack Slot
6037
instruct loadSSI(rRegI dst, stackSlotI src) %{
6038
  match(Set dst src);
6039
  ins_cost(125);
6040

6041
  format %{ "MOV    $dst,$src" %}
6042
  opcode(0x8B);
6043
  ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
6044
  ins_pipe( ialu_reg_mem );
6045
%}
6046

6047
instruct loadSSL(eRegL dst, stackSlotL src) %{
6048
  match(Set dst src);
6049

6050
  ins_cost(200);
6051
  format %{ "MOV    $dst,$src.lo\n\t"
6052
            "MOV    $dst+4,$src.hi" %}
6053
  opcode(0x8B, 0x8B);
6054
  ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
6055
  ins_pipe( ialu_mem_long_reg );
6056
%}
6057

6058
// Load Stack Slot
6059
instruct loadSSP(eRegP dst, stackSlotP src) %{
6060
  match(Set dst src);
6061
  ins_cost(125);
6062

6063
  format %{ "MOV    $dst,$src" %}
6064
  opcode(0x8B);
6065
  ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
6066
  ins_pipe( ialu_reg_mem );
6067
%}
6068

6069
// Load Stack Slot
6070
instruct loadSSF(regFPR dst, stackSlotF src) %{
6071
  match(Set dst src);
6072
  ins_cost(125);
6073

6074
  format %{ "FLD_S  $src\n\t"
6075
            "FSTP   $dst" %}
6076
  opcode(0xD9);               /* D9 /0, FLD m32real */
6077
  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
6078
              Pop_Reg_FPR(dst), ClearInstMark );
6079
  ins_pipe( fpu_reg_mem );
6080
%}
6081

6082
// Load Stack Slot
6083
instruct loadSSD(regDPR dst, stackSlotD src) %{
6084
  match(Set dst src);
6085
  ins_cost(125);
6086

6087
  format %{ "FLD_D  $src\n\t"
6088
            "FSTP   $dst" %}
6089
  opcode(0xDD);               /* DD /0, FLD m64real */
6090
  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
6091
              Pop_Reg_DPR(dst), ClearInstMark );
6092
  ins_pipe( fpu_reg_mem );
6093
%}
6094

6095
// Prefetch instructions for allocation.
6096
// Must be safe to execute with invalid address (cannot fault).
6097

6098
instruct prefetchAlloc0( memory mem ) %{
6099
  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6100
  match(PrefetchAllocation mem);
6101
  ins_cost(0);
6102
  size(0);
6103
  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6104
  ins_encode();
6105
  ins_pipe(empty);
6106
%}
6107

6108
instruct prefetchAlloc( memory mem ) %{
6109
  predicate(AllocatePrefetchInstr==3);
6110
  match( PrefetchAllocation mem );
6111
  ins_cost(100);
6112

6113
  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6114
  ins_encode %{
6115
    __ prefetchw($mem$$Address);
6116
  %}
6117
  ins_pipe(ialu_mem);
6118
%}
6119

6120
instruct prefetchAllocNTA( memory mem ) %{
6121
  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6122
  match(PrefetchAllocation mem);
6123
  ins_cost(100);
6124

6125
  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6126
  ins_encode %{
6127
    __ prefetchnta($mem$$Address);
6128
  %}
6129
  ins_pipe(ialu_mem);
6130
%}
6131

6132
instruct prefetchAllocT0( memory mem ) %{
6133
  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6134
  match(PrefetchAllocation mem);
6135
  ins_cost(100);
6136

6137
  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6138
  ins_encode %{
6139
    __ prefetcht0($mem$$Address);
6140
  %}
6141
  ins_pipe(ialu_mem);
6142
%}
6143

6144
instruct prefetchAllocT2( memory mem ) %{
6145
  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6146
  match(PrefetchAllocation mem);
6147
  ins_cost(100);
6148

6149
  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6150
  ins_encode %{
6151
    __ prefetcht2($mem$$Address);
6152
  %}
6153
  ins_pipe(ialu_mem);
6154
%}
6155

6156
//----------Store Instructions-------------------------------------------------
6157

6158
// Store Byte
6159
instruct storeB(memory mem, xRegI src) %{
6160
  match(Set mem (StoreB mem src));
6161

6162
  ins_cost(125);
6163
  format %{ "MOV8   $mem,$src" %}
6164
  opcode(0x88);
6165
  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6166
  ins_pipe( ialu_mem_reg );
6167
%}
6168

6169
// Store Char/Short
6170
instruct storeC(memory mem, rRegI src) %{
6171
  match(Set mem (StoreC mem src));
6172

6173
  ins_cost(125);
6174
  format %{ "MOV16  $mem,$src" %}
6175
  opcode(0x89, 0x66);
6176
  ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
6177
  ins_pipe( ialu_mem_reg );
6178
%}
6179

6180
// Store Integer
6181
instruct storeI(memory mem, rRegI src) %{
6182
  match(Set mem (StoreI mem src));
6183

6184
  ins_cost(125);
6185
  format %{ "MOV    $mem,$src" %}
6186
  opcode(0x89);
6187
  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6188
  ins_pipe( ialu_mem_reg );
6189
%}
6190

6191
// Store Long
6192
instruct storeL(long_memory mem, eRegL src) %{
6193
  predicate(!((StoreLNode*)n)->require_atomic_access());
6194
  match(Set mem (StoreL mem src));
6195

6196
  ins_cost(200);
6197
  format %{ "MOV    $mem,$src.lo\n\t"
6198
            "MOV    $mem+4,$src.hi" %}
6199
  opcode(0x89, 0x89);
6200
  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
6201
  ins_pipe( ialu_mem_long_reg );
6202
%}
6203

6204
// Store Long to Integer
6205
instruct storeL2I(memory mem, eRegL src) %{
6206
  match(Set mem (StoreI mem (ConvL2I src)));
6207

6208
  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6209
  ins_encode %{
6210
    __ movl($mem$$Address, $src$$Register);
6211
  %}
6212
  ins_pipe(ialu_mem_reg);
6213
%}
6214

6215
// Volatile Store Long.  Must be atomic, so move it into
6216
// the FP TOS and then do a 64-bit FIST.  Has to probe the
6217
// target address before the store (for null-ptr checks)
6218
// so the memory operand is used twice in the encoding.
6219
instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6220
  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6221
  match(Set mem (StoreL mem src));
6222
  effect( KILL cr );
6223
  ins_cost(400);
6224
  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6225
            "FILD   $src\n\t"
6226
            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6227
  opcode(0x3B);
6228
  ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
6229
  ins_pipe( fpu_reg_mem );
6230
%}
6231

6232
instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6233
  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6234
  match(Set mem (StoreL mem src));
6235
  effect( TEMP tmp, KILL cr );
6236
  ins_cost(380);
6237
  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6238
            "MOVSD  $tmp,$src\n\t"
6239
            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6240
  ins_encode %{
6241
    __ cmpl(rax, $mem$$Address);
6242
    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6243
    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6244
  %}
6245
  ins_pipe( pipe_slow );
6246
%}
6247

6248
instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6249
  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6250
  match(Set mem (StoreL mem src));
6251
  effect( TEMP tmp2 , TEMP tmp, KILL cr );
6252
  ins_cost(360);
6253
  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6254
            "MOVD   $tmp,$src.lo\n\t"
6255
            "MOVD   $tmp2,$src.hi\n\t"
6256
            "PUNPCKLDQ $tmp,$tmp2\n\t"
6257
            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6258
  ins_encode %{
6259
    __ cmpl(rax, $mem$$Address);
6260
    __ movdl($tmp$$XMMRegister, $src$$Register);
6261
    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6262
    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6263
    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6264
  %}
6265
  ins_pipe( pipe_slow );
6266
%}
6267

6268
// Store Pointer; for storing unknown oops and raw pointers
6269
instruct storeP(memory mem, anyRegP src) %{
6270
  match(Set mem (StoreP mem src));
6271

6272
  ins_cost(125);
6273
  format %{ "MOV    $mem,$src" %}
6274
  opcode(0x89);
6275
  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
6276
  ins_pipe( ialu_mem_reg );
6277
%}
6278

6279
// Store Integer Immediate
6280
instruct storeImmI(memory mem, immI src) %{
6281
  match(Set mem (StoreI mem src));
6282

6283
  ins_cost(150);
6284
  format %{ "MOV    $mem,$src" %}
6285
  opcode(0xC7);               /* C7 /0 */
6286
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
6287
  ins_pipe( ialu_mem_imm );
6288
%}
6289

6290
// Store Short/Char Immediate
6291
instruct storeImmI16(memory mem, immI16 src) %{
6292
  predicate(UseStoreImmI16);
6293
  match(Set mem (StoreC mem src));
6294

6295
  ins_cost(150);
6296
  format %{ "MOV16  $mem,$src" %}
6297
  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6298
  ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
6299
  ins_pipe( ialu_mem_imm );
6300
%}
6301

6302
// Store Pointer Immediate; null pointers or constant oops that do not
6303
// need card-mark barriers.
6304
instruct storeImmP(memory mem, immP src) %{
6305
  match(Set mem (StoreP mem src));
6306

6307
  ins_cost(150);
6308
  format %{ "MOV    $mem,$src" %}
6309
  opcode(0xC7);               /* C7 /0 */
6310
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
6311
  ins_pipe( ialu_mem_imm );
6312
%}
6313

6314
// Store Byte Immediate
6315
instruct storeImmB(memory mem, immI8 src) %{
6316
  match(Set mem (StoreB mem src));
6317

6318
  ins_cost(150);
6319
  format %{ "MOV8   $mem,$src" %}
6320
  opcode(0xC6);               /* C6 /0 */
6321
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
6322
  ins_pipe( ialu_mem_imm );
6323
%}
6324

6325
// Store CMS card-mark Immediate
6326
instruct storeImmCM(memory mem, immI8 src) %{
6327
  match(Set mem (StoreCM mem src));
6328

6329
  ins_cost(150);
6330
  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6331
  opcode(0xC6);               /* C6 /0 */
6332
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
6333
  ins_pipe( ialu_mem_imm );
6334
%}
6335

6336
// Store Double
6337
instruct storeDPR( memory mem, regDPR1 src) %{
6338
  predicate(UseSSE<=1);
6339
  match(Set mem (StoreD mem src));
6340

6341
  ins_cost(100);
6342
  format %{ "FST_D  $mem,$src" %}
6343
  opcode(0xDD);       /* DD /2 */
6344
  ins_encode( enc_FPR_store(mem,src) );
6345
  ins_pipe( fpu_mem_reg );
6346
%}
6347

6348
// Store double does rounding on x86
6349
instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6350
  predicate(UseSSE<=1);
6351
  match(Set mem (StoreD mem (RoundDouble src)));
6352

6353
  ins_cost(100);
6354
  format %{ "FST_D  $mem,$src\t# round" %}
6355
  opcode(0xDD);       /* DD /2 */
6356
  ins_encode( enc_FPR_store(mem,src) );
6357
  ins_pipe( fpu_mem_reg );
6358
%}
6359

6360
// Store XMM register to memory (double-precision floating points)
6361
// MOVSD instruction
6362
instruct storeD(memory mem, regD src) %{
6363
  predicate(UseSSE>=2);
6364
  match(Set mem (StoreD mem src));
6365
  ins_cost(95);
6366
  format %{ "MOVSD  $mem,$src" %}
6367
  ins_encode %{
6368
    __ movdbl($mem$$Address, $src$$XMMRegister);
6369
  %}
6370
  ins_pipe( pipe_slow );
6371
%}
6372

6373
// Store XMM register to memory (single-precision floating point)
6374
// MOVSS instruction
6375
instruct storeF(memory mem, regF src) %{
6376
  predicate(UseSSE>=1);
6377
  match(Set mem (StoreF mem src));
6378
  ins_cost(95);
6379
  format %{ "MOVSS  $mem,$src" %}
6380
  ins_encode %{
6381
    __ movflt($mem$$Address, $src$$XMMRegister);
6382
  %}
6383
  ins_pipe( pipe_slow );
6384
%}
6385

6386

6387
// Store Float
6388
instruct storeFPR( memory mem, regFPR1 src) %{
6389
  predicate(UseSSE==0);
6390
  match(Set mem (StoreF mem src));
6391

6392
  ins_cost(100);
6393
  format %{ "FST_S  $mem,$src" %}
6394
  opcode(0xD9);       /* D9 /2 */
6395
  ins_encode( enc_FPR_store(mem,src) );
6396
  ins_pipe( fpu_mem_reg );
6397
%}
6398

6399
// Store Float does rounding on x86
6400
instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6401
  predicate(UseSSE==0);
6402
  match(Set mem (StoreF mem (RoundFloat src)));
6403

6404
  ins_cost(100);
6405
  format %{ "FST_S  $mem,$src\t# round" %}
6406
  opcode(0xD9);       /* D9 /2 */
6407
  ins_encode( enc_FPR_store(mem,src) );
6408
  ins_pipe( fpu_mem_reg );
6409
%}
6410

6411
// Store Float does rounding on x86
6412
instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6413
  predicate(UseSSE<=1);
6414
  match(Set mem (StoreF mem (ConvD2F src)));
6415

6416
  ins_cost(100);
6417
  format %{ "FST_S  $mem,$src\t# D-round" %}
6418
  opcode(0xD9);       /* D9 /2 */
6419
  ins_encode( enc_FPR_store(mem,src) );
6420
  ins_pipe( fpu_mem_reg );
6421
%}
6422

6423
// Store immediate Float value (it is faster than store from FPU register)
6424
// The instruction usage is guarded by predicate in operand immFPR().
6425
instruct storeFPR_imm( memory mem, immFPR src) %{
6426
  match(Set mem (StoreF mem src));
6427

6428
  ins_cost(50);
6429
  format %{ "MOV    $mem,$src\t# store float" %}
6430
  opcode(0xC7);               /* C7 /0 */
6431
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
6432
  ins_pipe( ialu_mem_imm );
6433
%}
6434

6435
// Store immediate Float value (it is faster than store from XMM register)
6436
// The instruction usage is guarded by predicate in operand immF().
6437
instruct storeF_imm( memory mem, immF src) %{
6438
  match(Set mem (StoreF mem src));
6439

6440
  ins_cost(50);
6441
  format %{ "MOV    $mem,$src\t# store float" %}
6442
  opcode(0xC7);               /* C7 /0 */
6443
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
6444
  ins_pipe( ialu_mem_imm );
6445
%}
6446

6447
// Store Integer to stack slot
6448
instruct storeSSI(stackSlotI dst, rRegI src) %{
6449
  match(Set dst src);
6450

6451
  ins_cost(100);
6452
  format %{ "MOV    $dst,$src" %}
6453
  opcode(0x89);
6454
  ins_encode( OpcPRegSS( dst, src ) );
6455
  ins_pipe( ialu_mem_reg );
6456
%}
6457

6458
// Store Integer to stack slot
6459
instruct storeSSP(stackSlotP dst, eRegP src) %{
6460
  match(Set dst src);
6461

6462
  ins_cost(100);
6463
  format %{ "MOV    $dst,$src" %}
6464
  opcode(0x89);
6465
  ins_encode( OpcPRegSS( dst, src ) );
6466
  ins_pipe( ialu_mem_reg );
6467
%}
6468

6469
// Store Long to stack slot
6470
instruct storeSSL(stackSlotL dst, eRegL src) %{
6471
  match(Set dst src);
6472

6473
  ins_cost(200);
6474
  format %{ "MOV    $dst,$src.lo\n\t"
6475
            "MOV    $dst+4,$src.hi" %}
6476
  opcode(0x89, 0x89);
6477
  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
6478
  ins_pipe( ialu_mem_long_reg );
6479
%}
6480

6481
//----------MemBar Instructions-----------------------------------------------
6482
// Memory barrier flavors
6483

6484
instruct membar_acquire() %{
6485
  match(MemBarAcquire);
6486
  match(LoadFence);
6487
  ins_cost(400);
6488

6489
  size(0);
6490
  format %{ "MEMBAR-acquire ! (empty encoding)" %}
6491
  ins_encode();
6492
  ins_pipe(empty);
6493
%}
6494

6495
instruct membar_acquire_lock() %{
6496
  match(MemBarAcquireLock);
6497
  ins_cost(0);
6498

6499
  size(0);
6500
  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6501
  ins_encode( );
6502
  ins_pipe(empty);
6503
%}
6504

6505
instruct membar_release() %{
6506
  match(MemBarRelease);
6507
  match(StoreFence);
6508
  ins_cost(400);
6509

6510
  size(0);
6511
  format %{ "MEMBAR-release ! (empty encoding)" %}
6512
  ins_encode( );
6513
  ins_pipe(empty);
6514
%}
6515

6516
instruct membar_release_lock() %{
6517
  match(MemBarReleaseLock);
6518
  ins_cost(0);
6519

6520
  size(0);
6521
  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6522
  ins_encode( );
6523
  ins_pipe(empty);
6524
%}
6525

6526
instruct membar_volatile(eFlagsReg cr) %{
6527
  match(MemBarVolatile);
6528
  effect(KILL cr);
6529
  ins_cost(400);
6530

6531
  format %{
6532
    $$template
6533
    $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6534
  %}
6535
  ins_encode %{
6536
    __ membar(Assembler::StoreLoad);
6537
  %}
6538
  ins_pipe(pipe_slow);
6539
%}
6540

6541
instruct unnecessary_membar_volatile() %{
6542
  match(MemBarVolatile);
6543
  predicate(Matcher::post_store_load_barrier(n));
6544
  ins_cost(0);
6545

6546
  size(0);
6547
  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6548
  ins_encode( );
6549
  ins_pipe(empty);
6550
%}
6551

6552
instruct membar_storestore() %{
6553
  match(MemBarStoreStore);
6554
  match(StoreStoreFence);
6555
  ins_cost(0);
6556

6557
  size(0);
6558
  format %{ "MEMBAR-storestore (empty encoding)" %}
6559
  ins_encode( );
6560
  ins_pipe(empty);
6561
%}
6562

6563
//----------Move Instructions--------------------------------------------------
6564
instruct castX2P(eAXRegP dst, eAXRegI src) %{
6565
  match(Set dst (CastX2P src));
6566
  format %{ "# X2P  $dst, $src" %}
6567
  ins_encode( /*empty encoding*/ );
6568
  ins_cost(0);
6569
  ins_pipe(empty);
6570
%}
6571

6572
instruct castP2X(rRegI dst, eRegP src ) %{
6573
  match(Set dst (CastP2X src));
6574
  ins_cost(50);
6575
  format %{ "MOV    $dst, $src\t# CastP2X" %}
6576
  ins_encode( enc_Copy( dst, src) );
6577
  ins_pipe( ialu_reg_reg );
6578
%}
6579

6580
//----------Conditional Move---------------------------------------------------
6581
// Conditional move
6582
instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6583
  predicate(!VM_Version::supports_cmov() );
6584
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6585
  ins_cost(200);
6586
  format %{ "J$cop,us skip\t# signed cmove\n\t"
6587
            "MOV    $dst,$src\n"
6588
      "skip:" %}
6589
  ins_encode %{
6590
    Label Lskip;
6591
    // Invert sense of branch from sense of CMOV
6592
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6593
    __ movl($dst$$Register, $src$$Register);
6594
    __ bind(Lskip);
6595
  %}
6596
  ins_pipe( pipe_cmov_reg );
6597
%}
6598

6599
instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6600
  predicate(!VM_Version::supports_cmov() );
6601
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6602
  ins_cost(200);
6603
  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6604
            "MOV    $dst,$src\n"
6605
      "skip:" %}
6606
  ins_encode %{
6607
    Label Lskip;
6608
    // Invert sense of branch from sense of CMOV
6609
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6610
    __ movl($dst$$Register, $src$$Register);
6611
    __ bind(Lskip);
6612
  %}
6613
  ins_pipe( pipe_cmov_reg );
6614
%}
6615

6616
instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6617
  predicate(VM_Version::supports_cmov() );
6618
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6619
  ins_cost(200);
6620
  format %{ "CMOV$cop $dst,$src" %}
6621
  opcode(0x0F,0x40);
6622
  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6623
  ins_pipe( pipe_cmov_reg );
6624
%}
6625

6626
instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6627
  predicate(VM_Version::supports_cmov() );
6628
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6629
  ins_cost(200);
6630
  format %{ "CMOV$cop $dst,$src" %}
6631
  opcode(0x0F,0x40);
6632
  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6633
  ins_pipe( pipe_cmov_reg );
6634
%}
6635

6636
instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6637
  predicate(VM_Version::supports_cmov() );
6638
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6639
  ins_cost(200);
6640
  expand %{
6641
    cmovI_regU(cop, cr, dst, src);
6642
  %}
6643
%}
6644

6645
// Conditional move
6646
instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6647
  predicate(VM_Version::supports_cmov() );
6648
  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6649
  ins_cost(250);
6650
  format %{ "CMOV$cop $dst,$src" %}
6651
  opcode(0x0F,0x40);
6652
  ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
6653
  ins_pipe( pipe_cmov_mem );
6654
%}
6655

6656
// Conditional move
6657
instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6658
  predicate(VM_Version::supports_cmov() );
6659
  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6660
  ins_cost(250);
6661
  format %{ "CMOV$cop $dst,$src" %}
6662
  opcode(0x0F,0x40);
6663
  ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
6664
  ins_pipe( pipe_cmov_mem );
6665
%}
6666

6667
instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6668
  predicate(VM_Version::supports_cmov() );
6669
  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6670
  ins_cost(250);
6671
  expand %{
6672
    cmovI_memU(cop, cr, dst, src);
6673
  %}
6674
%}
6675

6676
// Conditional move
6677
instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6678
  predicate(VM_Version::supports_cmov() );
6679
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6680
  ins_cost(200);
6681
  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6682
  opcode(0x0F,0x40);
6683
  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6684
  ins_pipe( pipe_cmov_reg );
6685
%}
6686

6687
// Conditional move (non-P6 version)
6688
// Note:  a CMoveP is generated for  stubs and native wrappers
6689
//        regardless of whether we are on a P6, so we
6690
//        emulate a cmov here
6691
instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6692
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6693
  ins_cost(300);
6694
  format %{ "Jn$cop   skip\n\t"
6695
          "MOV    $dst,$src\t# pointer\n"
6696
      "skip:" %}
6697
  opcode(0x8b);
6698
  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6699
  ins_pipe( pipe_cmov_reg );
6700
%}
6701

6702
// Conditional move
6703
instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6704
  predicate(VM_Version::supports_cmov() );
6705
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6706
  ins_cost(200);
6707
  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6708
  opcode(0x0F,0x40);
6709
  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6710
  ins_pipe( pipe_cmov_reg );
6711
%}
6712

6713
instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6714
  predicate(VM_Version::supports_cmov() );
6715
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6716
  ins_cost(200);
6717
  expand %{
6718
    cmovP_regU(cop, cr, dst, src);
6719
  %}
6720
%}
6721

6722
// DISABLED: Requires the ADLC to emit a bottom_type call that
6723
// correctly meets the two pointer arguments; one is an incoming
6724
// register but the other is a memory operand.  ALSO appears to
6725
// be buggy with implicit null checks.
6726
//
6727
//// Conditional move
6728
//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6729
//  predicate(VM_Version::supports_cmov() );
6730
//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6731
//  ins_cost(250);
6732
//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6733
//  opcode(0x0F,0x40);
6734
//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6735
//  ins_pipe( pipe_cmov_mem );
6736
//%}
6737
//
6738
//// Conditional move
6739
//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6740
//  predicate(VM_Version::supports_cmov() );
6741
//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6742
//  ins_cost(250);
6743
//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6744
//  opcode(0x0F,0x40);
6745
//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6746
//  ins_pipe( pipe_cmov_mem );
6747
//%}
6748

6749
// Conditional move
6750
instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6751
  predicate(UseSSE<=1);
6752
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6753
  ins_cost(200);
6754
  format %{ "FCMOV$cop $dst,$src\t# double" %}
6755
  opcode(0xDA);
6756
  ins_encode( enc_cmov_dpr(cop,src) );
6757
  ins_pipe( pipe_cmovDPR_reg );
6758
%}
6759

6760
// Conditional move
6761
instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6762
  predicate(UseSSE==0);
6763
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6764
  ins_cost(200);
6765
  format %{ "FCMOV$cop $dst,$src\t# float" %}
6766
  opcode(0xDA);
6767
  ins_encode( enc_cmov_dpr(cop,src) );
6768
  ins_pipe( pipe_cmovDPR_reg );
6769
%}
6770

6771
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6772
instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6773
  predicate(UseSSE<=1);
6774
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6775
  ins_cost(200);
6776
  format %{ "Jn$cop   skip\n\t"
6777
            "MOV    $dst,$src\t# double\n"
6778
      "skip:" %}
6779
  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6780
  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6781
  ins_pipe( pipe_cmovDPR_reg );
6782
%}
6783

6784
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6785
instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6786
  predicate(UseSSE==0);
6787
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6788
  ins_cost(200);
6789
  format %{ "Jn$cop    skip\n\t"
6790
            "MOV    $dst,$src\t# float\n"
6791
      "skip:" %}
6792
  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6793
  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6794
  ins_pipe( pipe_cmovDPR_reg );
6795
%}
6796

6797
// No CMOVE with SSE/SSE2
6798
instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6799
  predicate (UseSSE>=1);
6800
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6801
  ins_cost(200);
6802
  format %{ "Jn$cop   skip\n\t"
6803
            "MOVSS  $dst,$src\t# float\n"
6804
      "skip:" %}
6805
  ins_encode %{
6806
    Label skip;
6807
    // Invert sense of branch from sense of CMOV
6808
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6809
    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6810
    __ bind(skip);
6811
  %}
6812
  ins_pipe( pipe_slow );
6813
%}
6814

6815
// No CMOVE with SSE/SSE2
6816
instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6817
  predicate (UseSSE>=2);
6818
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6819
  ins_cost(200);
6820
  format %{ "Jn$cop   skip\n\t"
6821
            "MOVSD  $dst,$src\t# float\n"
6822
      "skip:" %}
6823
  ins_encode %{
6824
    Label skip;
6825
    // Invert sense of branch from sense of CMOV
6826
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6827
    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6828
    __ bind(skip);
6829
  %}
6830
  ins_pipe( pipe_slow );
6831
%}
6832

6833
// unsigned version
6834
instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6835
  predicate (UseSSE>=1);
6836
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6837
  ins_cost(200);
6838
  format %{ "Jn$cop   skip\n\t"
6839
            "MOVSS  $dst,$src\t# float\n"
6840
      "skip:" %}
6841
  ins_encode %{
6842
    Label skip;
6843
    // Invert sense of branch from sense of CMOV
6844
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6845
    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6846
    __ bind(skip);
6847
  %}
6848
  ins_pipe( pipe_slow );
6849
%}
6850

6851
instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6852
  predicate (UseSSE>=1);
6853
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6854
  ins_cost(200);
6855
  expand %{
6856
    fcmovF_regU(cop, cr, dst, src);
6857
  %}
6858
%}
6859

6860
// unsigned version
6861
instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6862
  predicate (UseSSE>=2);
6863
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6864
  ins_cost(200);
6865
  format %{ "Jn$cop   skip\n\t"
6866
            "MOVSD  $dst,$src\t# float\n"
6867
      "skip:" %}
6868
  ins_encode %{
6869
    Label skip;
6870
    // Invert sense of branch from sense of CMOV
6871
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6872
    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6873
    __ bind(skip);
6874
  %}
6875
  ins_pipe( pipe_slow );
6876
%}
6877

6878
instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6879
  predicate (UseSSE>=2);
6880
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6881
  ins_cost(200);
6882
  expand %{
6883
    fcmovD_regU(cop, cr, dst, src);
6884
  %}
6885
%}
6886

6887
instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6888
  predicate(VM_Version::supports_cmov() );
6889
  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6890
  ins_cost(200);
6891
  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6892
            "CMOV$cop $dst.hi,$src.hi" %}
6893
  opcode(0x0F,0x40);
6894
  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6895
  ins_pipe( pipe_cmov_reg_long );
6896
%}
6897

6898
instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6899
  predicate(VM_Version::supports_cmov() );
6900
  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6901
  ins_cost(200);
6902
  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6903
            "CMOV$cop $dst.hi,$src.hi" %}
6904
  opcode(0x0F,0x40);
6905
  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6906
  ins_pipe( pipe_cmov_reg_long );
6907
%}
6908

6909
instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6910
  predicate(VM_Version::supports_cmov() );
6911
  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6912
  ins_cost(200);
6913
  expand %{
6914
    cmovL_regU(cop, cr, dst, src);
6915
  %}
6916
%}
6917

6918
//----------Arithmetic Instructions--------------------------------------------
6919
//----------Addition Instructions----------------------------------------------
6920

6921
// Integer Addition Instructions
6922
instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
6923
  match(Set dst (AddI dst src));
6924
  effect(KILL cr);
6925

6926
  size(2);
6927
  format %{ "ADD    $dst,$src" %}
6928
  opcode(0x03);
6929
  ins_encode( OpcP, RegReg( dst, src) );
6930
  ins_pipe( ialu_reg_reg );
6931
%}
6932

6933
instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
6934
  match(Set dst (AddI dst src));
6935
  effect(KILL cr);
6936

6937
  format %{ "ADD    $dst,$src" %}
6938
  opcode(0x81, 0x00); /* /0 id */
6939
  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
6940
  ins_pipe( ialu_reg );
6941
%}
6942

6943
instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
6944
  predicate(UseIncDec);
6945
  match(Set dst (AddI dst src));
6946
  effect(KILL cr);
6947

6948
  size(1);
6949
  format %{ "INC    $dst" %}
6950
  opcode(0x40); /*  */
6951
  ins_encode( Opc_plus( primary, dst ) );
6952
  ins_pipe( ialu_reg );
6953
%}
6954

6955
instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
6956
  match(Set dst (AddI src0 src1));
6957
  ins_cost(110);
6958

6959
  format %{ "LEA    $dst,[$src0 + $src1]" %}
6960
  opcode(0x8D); /* 0x8D /r */
6961
  ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
6962
  ins_pipe( ialu_reg_reg );
6963
%}
6964

6965
instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
6966
  match(Set dst (AddP src0 src1));
6967
  ins_cost(110);
6968

6969
  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
6970
  opcode(0x8D); /* 0x8D /r */
6971
  ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
6972
  ins_pipe( ialu_reg_reg );
6973
%}
6974

6975
instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
6976
  predicate(UseIncDec);
6977
  match(Set dst (AddI dst src));
6978
  effect(KILL cr);
6979

6980
  size(1);
6981
  format %{ "DEC    $dst" %}
6982
  opcode(0x48); /*  */
6983
  ins_encode( Opc_plus( primary, dst ) );
6984
  ins_pipe( ialu_reg );
6985
%}
6986

6987
instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
6988
  match(Set dst (AddP dst src));
6989
  effect(KILL cr);
6990

6991
  size(2);
6992
  format %{ "ADD    $dst,$src" %}
6993
  opcode(0x03);
6994
  ins_encode( OpcP, RegReg( dst, src) );
6995
  ins_pipe( ialu_reg_reg );
6996
%}
6997

6998
instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
6999
  match(Set dst (AddP dst src));
7000
  effect(KILL cr);
7001

7002
  format %{ "ADD    $dst,$src" %}
7003
  opcode(0x81,0x00); /* Opcode 81 /0 id */
7004
  // ins_encode( RegImm( dst, src) );
7005
  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7006
  ins_pipe( ialu_reg );
7007
%}
7008

7009
instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7010
  match(Set dst (AddI dst (LoadI src)));
7011
  effect(KILL cr);
7012

7013
  ins_cost(150);
7014
  format %{ "ADD    $dst,$src" %}
7015
  opcode(0x03);
7016
  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
7017
  ins_pipe( ialu_reg_mem );
7018
%}
7019

7020
instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7021
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7022
  effect(KILL cr);
7023

7024
  ins_cost(150);
7025
  format %{ "ADD    $dst,$src" %}
7026
  opcode(0x01);  /* Opcode 01 /r */
7027
  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
7028
  ins_pipe( ialu_mem_reg );
7029
%}
7030

7031
// Add Memory with Immediate
7032
instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7033
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7034
  effect(KILL cr);
7035

7036
  ins_cost(125);
7037
  format %{ "ADD    $dst,$src" %}
7038
  opcode(0x81);               /* Opcode 81 /0 id */
7039
  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
7040
  ins_pipe( ialu_mem_imm );
7041
%}
7042

7043
instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
7044
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7045
  effect(KILL cr);
7046

7047
  ins_cost(125);
7048
  format %{ "INC    $dst" %}
7049
  opcode(0xFF);               /* Opcode FF /0 */
7050
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
7051
  ins_pipe( ialu_mem_imm );
7052
%}
7053

7054
instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7055
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7056
  effect(KILL cr);
7057

7058
  ins_cost(125);
7059
  format %{ "DEC    $dst" %}
7060
  opcode(0xFF);               /* Opcode FF /1 */
7061
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
7062
  ins_pipe( ialu_mem_imm );
7063
%}
7064

7065

7066
instruct checkCastPP( eRegP dst ) %{
7067
  match(Set dst (CheckCastPP dst));
7068

7069
  size(0);
7070
  format %{ "#checkcastPP of $dst" %}
7071
  ins_encode( /*empty encoding*/ );
7072
  ins_pipe( empty );
7073
%}
7074

7075
instruct castPP( eRegP dst ) %{
7076
  match(Set dst (CastPP dst));
7077
  format %{ "#castPP of $dst" %}
7078
  ins_encode( /*empty encoding*/ );
7079
  ins_pipe( empty );
7080
%}
7081

7082
instruct castII( rRegI dst ) %{
7083
  match(Set dst (CastII dst));
7084
  format %{ "#castII of $dst" %}
7085
  ins_encode( /*empty encoding*/ );
7086
  ins_cost(0);
7087
  ins_pipe( empty );
7088
%}
7089

7090
instruct castLL( eRegL dst ) %{
7091
  match(Set dst (CastLL dst));
7092
  format %{ "#castLL of $dst" %}
7093
  ins_encode( /*empty encoding*/ );
7094
  ins_cost(0);
7095
  ins_pipe( empty );
7096
%}
7097

7098
instruct castFF( regF dst ) %{
7099
  predicate(UseSSE >= 1);
7100
  match(Set dst (CastFF dst));
7101
  format %{ "#castFF of $dst" %}
7102
  ins_encode( /*empty encoding*/ );
7103
  ins_cost(0);
7104
  ins_pipe( empty );
7105
%}
7106

7107
instruct castDD( regD dst ) %{
7108
  predicate(UseSSE >= 2);
7109
  match(Set dst (CastDD dst));
7110
  format %{ "#castDD of $dst" %}
7111
  ins_encode( /*empty encoding*/ );
7112
  ins_cost(0);
7113
  ins_pipe( empty );
7114
%}
7115

7116
instruct castFF_PR( regFPR dst ) %{
7117
  predicate(UseSSE < 1);
7118
  match(Set dst (CastFF dst));
7119
  format %{ "#castFF of $dst" %}
7120
  ins_encode( /*empty encoding*/ );
7121
  ins_cost(0);
7122
  ins_pipe( empty );
7123
%}
7124

7125
instruct castDD_PR( regDPR dst ) %{
7126
  predicate(UseSSE < 2);
7127
  match(Set dst (CastDD dst));
7128
  format %{ "#castDD of $dst" %}
7129
  ins_encode( /*empty encoding*/ );
7130
  ins_cost(0);
7131
  ins_pipe( empty );
7132
%}
7133

7134
// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7135

7136
instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7137
  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7138
  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7139
  effect(KILL cr, KILL oldval);
7140
  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7141
            "MOV    $res,0\n\t"
7142
            "JNE,s  fail\n\t"
7143
            "MOV    $res,1\n"
7144
          "fail:" %}
7145
  ins_encode( enc_cmpxchg8(mem_ptr),
7146
              enc_flags_ne_to_boolean(res) );
7147
  ins_pipe( pipe_cmpxchg );
7148
%}
7149

7150
instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7151
  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7152
  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7153
  effect(KILL cr, KILL oldval);
7154
  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7155
            "MOV    $res,0\n\t"
7156
            "JNE,s  fail\n\t"
7157
            "MOV    $res,1\n"
7158
          "fail:" %}
7159
  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7160
  ins_pipe( pipe_cmpxchg );
7161
%}
7162

7163
instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7164
  match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7165
  match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7166
  effect(KILL cr, KILL oldval);
7167
  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7168
            "MOV    $res,0\n\t"
7169
            "JNE,s  fail\n\t"
7170
            "MOV    $res,1\n"
7171
          "fail:" %}
7172
  ins_encode( enc_cmpxchgb(mem_ptr),
7173
              enc_flags_ne_to_boolean(res) );
7174
  ins_pipe( pipe_cmpxchg );
7175
%}
7176

7177
instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7178
  match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7179
  match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7180
  effect(KILL cr, KILL oldval);
7181
  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7182
            "MOV    $res,0\n\t"
7183
            "JNE,s  fail\n\t"
7184
            "MOV    $res,1\n"
7185
          "fail:" %}
7186
  ins_encode( enc_cmpxchgw(mem_ptr),
7187
              enc_flags_ne_to_boolean(res) );
7188
  ins_pipe( pipe_cmpxchg );
7189
%}
7190

7191
instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7192
  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7193
  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7194
  effect(KILL cr, KILL oldval);
7195
  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7196
            "MOV    $res,0\n\t"
7197
            "JNE,s  fail\n\t"
7198
            "MOV    $res,1\n"
7199
          "fail:" %}
7200
  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7201
  ins_pipe( pipe_cmpxchg );
7202
%}
7203

7204
instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7205
  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7206
  effect(KILL cr);
7207
  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7208
  ins_encode( enc_cmpxchg8(mem_ptr) );
7209
  ins_pipe( pipe_cmpxchg );
7210
%}
7211

7212
instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7213
  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7214
  effect(KILL cr);
7215
  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7216
  ins_encode( enc_cmpxchg(mem_ptr) );
7217
  ins_pipe( pipe_cmpxchg );
7218
%}
7219

7220
instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7221
  match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7222
  effect(KILL cr);
7223
  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7224
  ins_encode( enc_cmpxchgb(mem_ptr) );
7225
  ins_pipe( pipe_cmpxchg );
7226
%}
7227

7228
instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7229
  match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7230
  effect(KILL cr);
7231
  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7232
  ins_encode( enc_cmpxchgw(mem_ptr) );
7233
  ins_pipe( pipe_cmpxchg );
7234
%}
7235

7236
instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7237
  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7238
  effect(KILL cr);
7239
  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7240
  ins_encode( enc_cmpxchg(mem_ptr) );
7241
  ins_pipe( pipe_cmpxchg );
7242
%}
7243

7244
instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7245
  predicate(n->as_LoadStore()->result_not_used());
7246
  match(Set dummy (GetAndAddB mem add));
7247
  effect(KILL cr);
7248
  format %{ "ADDB  [$mem],$add" %}
7249
  ins_encode %{
7250
    __ lock();
7251
    __ addb($mem$$Address, $add$$constant);
7252
  %}
7253
  ins_pipe( pipe_cmpxchg );
7254
%}
7255

7256
// Important to match to xRegI: only 8-bit regs.
7257
instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7258
  match(Set newval (GetAndAddB mem newval));
7259
  effect(KILL cr);
7260
  format %{ "XADDB  [$mem],$newval" %}
7261
  ins_encode %{
7262
    __ lock();
7263
    __ xaddb($mem$$Address, $newval$$Register);
7264
  %}
7265
  ins_pipe( pipe_cmpxchg );
7266
%}
7267

7268
instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7269
  predicate(n->as_LoadStore()->result_not_used());
7270
  match(Set dummy (GetAndAddS mem add));
7271
  effect(KILL cr);
7272
  format %{ "ADDS  [$mem],$add" %}
7273
  ins_encode %{
7274
    __ lock();
7275
    __ addw($mem$$Address, $add$$constant);
7276
  %}
7277
  ins_pipe( pipe_cmpxchg );
7278
%}
7279

7280
instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7281
  match(Set newval (GetAndAddS mem newval));
7282
  effect(KILL cr);
7283
  format %{ "XADDS  [$mem],$newval" %}
7284
  ins_encode %{
7285
    __ lock();
7286
    __ xaddw($mem$$Address, $newval$$Register);
7287
  %}
7288
  ins_pipe( pipe_cmpxchg );
7289
%}
7290

7291
instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7292
  predicate(n->as_LoadStore()->result_not_used());
7293
  match(Set dummy (GetAndAddI mem add));
7294
  effect(KILL cr);
7295
  format %{ "ADDL  [$mem],$add" %}
7296
  ins_encode %{
7297
    __ lock();
7298
    __ addl($mem$$Address, $add$$constant);
7299
  %}
7300
  ins_pipe( pipe_cmpxchg );
7301
%}
7302

7303
instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7304
  match(Set newval (GetAndAddI mem newval));
7305
  effect(KILL cr);
7306
  format %{ "XADDL  [$mem],$newval" %}
7307
  ins_encode %{
7308
    __ lock();
7309
    __ xaddl($mem$$Address, $newval$$Register);
7310
  %}
7311
  ins_pipe( pipe_cmpxchg );
7312
%}
7313

7314
// Important to match to xRegI: only 8-bit regs.
7315
instruct xchgB( memory mem, xRegI newval) %{
7316
  match(Set newval (GetAndSetB mem newval));
7317
  format %{ "XCHGB  $newval,[$mem]" %}
7318
  ins_encode %{
7319
    __ xchgb($newval$$Register, $mem$$Address);
7320
  %}
7321
  ins_pipe( pipe_cmpxchg );
7322
%}
7323

7324
instruct xchgS( memory mem, rRegI newval) %{
7325
  match(Set newval (GetAndSetS mem newval));
7326
  format %{ "XCHGW  $newval,[$mem]" %}
7327
  ins_encode %{
7328
    __ xchgw($newval$$Register, $mem$$Address);
7329
  %}
7330
  ins_pipe( pipe_cmpxchg );
7331
%}
7332

7333
instruct xchgI( memory mem, rRegI newval) %{
7334
  match(Set newval (GetAndSetI mem newval));
7335
  format %{ "XCHGL  $newval,[$mem]" %}
7336
  ins_encode %{
7337
    __ xchgl($newval$$Register, $mem$$Address);
7338
  %}
7339
  ins_pipe( pipe_cmpxchg );
7340
%}
7341

7342
instruct xchgP( memory mem, pRegP newval) %{
7343
  match(Set newval (GetAndSetP mem newval));
7344
  format %{ "XCHGL  $newval,[$mem]" %}
7345
  ins_encode %{
7346
    __ xchgl($newval$$Register, $mem$$Address);
7347
  %}
7348
  ins_pipe( pipe_cmpxchg );
7349
%}
7350

7351
//----------Subtraction Instructions-------------------------------------------
7352

7353
// Integer Subtraction Instructions
7354
instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7355
  match(Set dst (SubI dst src));
7356
  effect(KILL cr);
7357

7358
  size(2);
7359
  format %{ "SUB    $dst,$src" %}
7360
  opcode(0x2B);
7361
  ins_encode( OpcP, RegReg( dst, src) );
7362
  ins_pipe( ialu_reg_reg );
7363
%}
7364

7365
instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7366
  match(Set dst (SubI dst src));
7367
  effect(KILL cr);
7368

7369
  format %{ "SUB    $dst,$src" %}
7370
  opcode(0x81,0x05);  /* Opcode 81 /5 */
7371
  // ins_encode( RegImm( dst, src) );
7372
  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7373
  ins_pipe( ialu_reg );
7374
%}
7375

7376
instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7377
  match(Set dst (SubI dst (LoadI src)));
7378
  effect(KILL cr);
7379

7380
  ins_cost(150);
7381
  format %{ "SUB    $dst,$src" %}
7382
  opcode(0x2B);
7383
  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
7384
  ins_pipe( ialu_reg_mem );
7385
%}
7386

7387
instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7388
  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7389
  effect(KILL cr);
7390

7391
  ins_cost(150);
7392
  format %{ "SUB    $dst,$src" %}
7393
  opcode(0x29);  /* Opcode 29 /r */
7394
  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
7395
  ins_pipe( ialu_mem_reg );
7396
%}
7397

7398
// Subtract from a pointer
7399
instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
7400
  match(Set dst (AddP dst (SubI zero src)));
7401
  effect(KILL cr);
7402

7403
  size(2);
7404
  format %{ "SUB    $dst,$src" %}
7405
  opcode(0x2B);
7406
  ins_encode( OpcP, RegReg( dst, src) );
7407
  ins_pipe( ialu_reg_reg );
7408
%}
7409

7410
instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
7411
  match(Set dst (SubI zero dst));
7412
  effect(KILL cr);
7413

7414
  size(2);
7415
  format %{ "NEG    $dst" %}
7416
  opcode(0xF7,0x03);  // Opcode F7 /3
7417
  ins_encode( OpcP, RegOpc( dst ) );
7418
  ins_pipe( ialu_reg );
7419
%}
7420

7421
//----------Multiplication/Division Instructions-------------------------------
7422
// Integer Multiplication Instructions
7423
// Multiply Register
7424
instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7425
  match(Set dst (MulI dst src));
7426
  effect(KILL cr);
7427

7428
  size(3);
7429
  ins_cost(300);
7430
  format %{ "IMUL   $dst,$src" %}
7431
  opcode(0xAF, 0x0F);
7432
  ins_encode( OpcS, OpcP, RegReg( dst, src) );
7433
  ins_pipe( ialu_reg_reg_alu0 );
7434
%}
7435

7436
// Multiply 32-bit Immediate
7437
instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7438
  match(Set dst (MulI src imm));
7439
  effect(KILL cr);
7440

7441
  ins_cost(300);
7442
  format %{ "IMUL   $dst,$src,$imm" %}
7443
  opcode(0x69);  /* 69 /r id */
7444
  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7445
  ins_pipe( ialu_reg_reg_alu0 );
7446
%}
7447

7448
instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7449
  match(Set dst src);
7450
  effect(KILL cr);
7451

7452
  // Note that this is artificially increased to make it more expensive than loadConL
7453
  ins_cost(250);
7454
  format %{ "MOV    EAX,$src\t// low word only" %}
7455
  opcode(0xB8);
7456
  ins_encode( LdImmL_Lo(dst, src) );
7457
  ins_pipe( ialu_reg_fat );
7458
%}
7459

7460
// Multiply by 32-bit Immediate, taking the shifted high order results
7461
//  (special case for shift by 32)
7462
instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7463
  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7464
  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7465
             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7466
             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7467
  effect(USE src1, KILL cr);
7468

7469
  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7470
  ins_cost(0*100 + 1*400 - 150);
7471
  format %{ "IMUL   EDX:EAX,$src1" %}
7472
  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7473
  ins_pipe( pipe_slow );
7474
%}
7475

7476
// Multiply by 32-bit Immediate, taking the shifted high order results
7477
instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7478
  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7479
  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7480
             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7481
             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7482
  effect(USE src1, KILL cr);
7483

7484
  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7485
  ins_cost(1*100 + 1*400 - 150);
7486
  format %{ "IMUL   EDX:EAX,$src1\n\t"
7487
            "SAR    EDX,$cnt-32" %}
7488
  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7489
  ins_pipe( pipe_slow );
7490
%}
7491

7492
// Multiply Memory 32-bit Immediate
7493
instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7494
  match(Set dst (MulI (LoadI src) imm));
7495
  effect(KILL cr);
7496

7497
  ins_cost(300);
7498
  format %{ "IMUL   $dst,$src,$imm" %}
7499
  opcode(0x69);  /* 69 /r id */
7500
  ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
7501
  ins_pipe( ialu_reg_mem_alu0 );
7502
%}
7503

7504
// Multiply Memory
7505
instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7506
  match(Set dst (MulI dst (LoadI src)));
7507
  effect(KILL cr);
7508

7509
  ins_cost(350);
7510
  format %{ "IMUL   $dst,$src" %}
7511
  opcode(0xAF, 0x0F);
7512
  ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
7513
  ins_pipe( ialu_reg_mem_alu0 );
7514
%}
7515

7516
instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7517
%{
7518
  match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7519
  effect(KILL cr, KILL src2);
7520

7521
  expand %{ mulI_eReg(dst, src1, cr);
7522
           mulI_eReg(src2, src3, cr);
7523
           addI_eReg(dst, src2, cr); %}
7524
%}
7525

7526
// Multiply Register Int to Long
7527
instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7528
  // Basic Idea: long = (long)int * (long)int
7529
  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7530
  effect(DEF dst, USE src, USE src1, KILL flags);
7531

7532
  ins_cost(300);
7533
  format %{ "IMUL   $dst,$src1" %}
7534

7535
  ins_encode( long_int_multiply( dst, src1 ) );
7536
  ins_pipe( ialu_reg_reg_alu0 );
7537
%}
7538

7539
instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7540
  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7541
  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7542
  effect(KILL flags);
7543

7544
  ins_cost(300);
7545
  format %{ "MUL    $dst,$src1" %}
7546

7547
  ins_encode( long_uint_multiply(dst, src1) );
7548
  ins_pipe( ialu_reg_reg_alu0 );
7549
%}
7550

7551
// Multiply Register Long
7552
instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7553
  match(Set dst (MulL dst src));
7554
  effect(KILL cr, TEMP tmp);
7555
  ins_cost(4*100+3*400);
7556
// Basic idea: lo(result) = lo(x_lo * y_lo)
7557
//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7558
  format %{ "MOV    $tmp,$src.lo\n\t"
7559
            "IMUL   $tmp,EDX\n\t"
7560
            "MOV    EDX,$src.hi\n\t"
7561
            "IMUL   EDX,EAX\n\t"
7562
            "ADD    $tmp,EDX\n\t"
7563
            "MUL    EDX:EAX,$src.lo\n\t"
7564
            "ADD    EDX,$tmp" %}
7565
  ins_encode( long_multiply( dst, src, tmp ) );
7566
  ins_pipe( pipe_slow );
7567
%}
7568

7569
// Multiply Register Long where the left operand's high 32 bits are zero
7570
instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7571
  predicate(is_operand_hi32_zero(n->in(1)));
7572
  match(Set dst (MulL dst src));
7573
  effect(KILL cr, TEMP tmp);
7574
  ins_cost(2*100+2*400);
7575
// Basic idea: lo(result) = lo(x_lo * y_lo)
7576
//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7577
  format %{ "MOV    $tmp,$src.hi\n\t"
7578
            "IMUL   $tmp,EAX\n\t"
7579
            "MUL    EDX:EAX,$src.lo\n\t"
7580
            "ADD    EDX,$tmp" %}
7581
  ins_encode %{
7582
    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7583
    __ imull($tmp$$Register, rax);
7584
    __ mull($src$$Register);
7585
    __ addl(rdx, $tmp$$Register);
7586
  %}
7587
  ins_pipe( pipe_slow );
7588
%}
7589

7590
// Multiply Register Long where the right operand's high 32 bits are zero
7591
instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7592
  predicate(is_operand_hi32_zero(n->in(2)));
7593
  match(Set dst (MulL dst src));
7594
  effect(KILL cr, TEMP tmp);
7595
  ins_cost(2*100+2*400);
7596
// Basic idea: lo(result) = lo(x_lo * y_lo)
7597
//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7598
  format %{ "MOV    $tmp,$src.lo\n\t"
7599
            "IMUL   $tmp,EDX\n\t"
7600
            "MUL    EDX:EAX,$src.lo\n\t"
7601
            "ADD    EDX,$tmp" %}
7602
  ins_encode %{
7603
    __ movl($tmp$$Register, $src$$Register);
7604
    __ imull($tmp$$Register, rdx);
7605
    __ mull($src$$Register);
7606
    __ addl(rdx, $tmp$$Register);
7607
  %}
7608
  ins_pipe( pipe_slow );
7609
%}
7610

7611
// Multiply Register Long where the left and the right operands' high 32 bits are zero
7612
instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7613
  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7614
  match(Set dst (MulL dst src));
7615
  effect(KILL cr);
7616
  ins_cost(1*400);
7617
// Basic idea: lo(result) = lo(x_lo * y_lo)
7618
//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7619
  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7620
  ins_encode %{
7621
    __ mull($src$$Register);
7622
  %}
7623
  ins_pipe( pipe_slow );
7624
%}
7625

7626
// Multiply Register Long by small constant
7627
instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7628
  match(Set dst (MulL dst src));
7629
  effect(KILL cr, TEMP tmp);
7630
  ins_cost(2*100+2*400);
7631
  size(12);
7632
// Basic idea: lo(result) = lo(src * EAX)
7633
//             hi(result) = hi(src * EAX) + lo(src * EDX)
7634
  format %{ "IMUL   $tmp,EDX,$src\n\t"
7635
            "MOV    EDX,$src\n\t"
7636
            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7637
            "ADD    EDX,$tmp" %}
7638
  ins_encode( long_multiply_con( dst, src, tmp ) );
7639
  ins_pipe( pipe_slow );
7640
%}
7641

7642
// Integer DIV with Register
7643
instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7644
  match(Set rax (DivI rax div));
7645
  effect(KILL rdx, KILL cr);
7646
  size(26);
7647
  ins_cost(30*100+10*100);
7648
  format %{ "CMP    EAX,0x80000000\n\t"
7649
            "JNE,s  normal\n\t"
7650
            "XOR    EDX,EDX\n\t"
7651
            "CMP    ECX,-1\n\t"
7652
            "JE,s   done\n"
7653
    "normal: CDQ\n\t"
7654
            "IDIV   $div\n\t"
7655
    "done:"        %}
7656
  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7657
  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7658
  ins_pipe( ialu_reg_reg_alu0 );
7659
%}
7660

7661
// Divide Register Long
7662
instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7663
  match(Set dst (DivL src1 src2));
7664
  effect(CALL);
7665
  ins_cost(10000);
7666
  format %{ "PUSH   $src1.hi\n\t"
7667
            "PUSH   $src1.lo\n\t"
7668
            "PUSH   $src2.hi\n\t"
7669
            "PUSH   $src2.lo\n\t"
7670
            "CALL   SharedRuntime::ldiv\n\t"
7671
            "ADD    ESP,16" %}
7672
  ins_encode( long_div(src1,src2) );
7673
  ins_pipe( pipe_slow );
7674
%}
7675

7676
// Integer DIVMOD with Register, both quotient and mod results
7677
instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7678
  match(DivModI rax div);
7679
  effect(KILL cr);
7680
  size(26);
7681
  ins_cost(30*100+10*100);
7682
  format %{ "CMP    EAX,0x80000000\n\t"
7683
            "JNE,s  normal\n\t"
7684
            "XOR    EDX,EDX\n\t"
7685
            "CMP    ECX,-1\n\t"
7686
            "JE,s   done\n"
7687
    "normal: CDQ\n\t"
7688
            "IDIV   $div\n\t"
7689
    "done:"        %}
7690
  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7691
  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7692
  ins_pipe( pipe_slow );
7693
%}
7694

7695
// Integer MOD with Register
7696
instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7697
  match(Set rdx (ModI rax div));
7698
  effect(KILL rax, KILL cr);
7699

7700
  size(26);
7701
  ins_cost(300);
7702
  format %{ "CDQ\n\t"
7703
            "IDIV   $div" %}
7704
  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7705
  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7706
  ins_pipe( ialu_reg_reg_alu0 );
7707
%}
7708

7709
// Remainder Register Long
7710
instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
7711
  match(Set dst (ModL src1 src2));
7712
  effect(CALL);
7713
  ins_cost(10000);
7714
  format %{ "PUSH   $src1.hi\n\t"
7715
            "PUSH   $src1.lo\n\t"
7716
            "PUSH   $src2.hi\n\t"
7717
            "PUSH   $src2.lo\n\t"
7718
            "CALL   SharedRuntime::lrem\n\t"
7719
            "ADD    ESP,16" %}
7720
  ins_encode( long_mod(src1,src2) );
7721
  ins_pipe( pipe_slow );
7722
%}
7723

7724
// Divide Register Long (no special case since divisor != -1)
7725
instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7726
  match(Set dst (DivL dst imm));
7727
  effect( TEMP tmp, TEMP tmp2, KILL cr );
7728
  ins_cost(1000);
7729
  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7730
            "XOR    $tmp2,$tmp2\n\t"
7731
            "CMP    $tmp,EDX\n\t"
7732
            "JA,s   fast\n\t"
7733
            "MOV    $tmp2,EAX\n\t"
7734
            "MOV    EAX,EDX\n\t"
7735
            "MOV    EDX,0\n\t"
7736
            "JLE,s  pos\n\t"
7737
            "LNEG   EAX : $tmp2\n\t"
7738
            "DIV    $tmp # unsigned division\n\t"
7739
            "XCHG   EAX,$tmp2\n\t"
7740
            "DIV    $tmp\n\t"
7741
            "LNEG   $tmp2 : EAX\n\t"
7742
            "JMP,s  done\n"
7743
    "pos:\n\t"
7744
            "DIV    $tmp\n\t"
7745
            "XCHG   EAX,$tmp2\n"
7746
    "fast:\n\t"
7747
            "DIV    $tmp\n"
7748
    "done:\n\t"
7749
            "MOV    EDX,$tmp2\n\t"
7750
            "NEG    EDX:EAX # if $imm < 0" %}
7751
  ins_encode %{
7752
    int con = (int)$imm$$constant;
7753
    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7754
    int pcon = (con > 0) ? con : -con;
7755
    Label Lfast, Lpos, Ldone;
7756

7757
    __ movl($tmp$$Register, pcon);
7758
    __ xorl($tmp2$$Register,$tmp2$$Register);
7759
    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7760
    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7761

7762
    __ movl($tmp2$$Register, $dst$$Register); // save
7763
    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7764
    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7765
    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7766

7767
    // Negative dividend.
7768
    // convert value to positive to use unsigned division
7769
    __ lneg($dst$$Register, $tmp2$$Register);
7770
    __ divl($tmp$$Register);
7771
    __ xchgl($dst$$Register, $tmp2$$Register);
7772
    __ divl($tmp$$Register);
7773
    // revert result back to negative
7774
    __ lneg($tmp2$$Register, $dst$$Register);
7775
    __ jmpb(Ldone);
7776

7777
    __ bind(Lpos);
7778
    __ divl($tmp$$Register); // Use unsigned division
7779
    __ xchgl($dst$$Register, $tmp2$$Register);
7780
    // Fallthrow for final divide, tmp2 has 32 bit hi result
7781

7782
    __ bind(Lfast);
7783
    // fast path: src is positive
7784
    __ divl($tmp$$Register); // Use unsigned division
7785

7786
    __ bind(Ldone);
7787
    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7788
    if (con < 0) {
7789
      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7790
    }
7791
  %}
7792
  ins_pipe( pipe_slow );
7793
%}
7794

7795
// Remainder Register Long (remainder fit into 32 bits)
7796
instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7797
  match(Set dst (ModL dst imm));
7798
  effect( TEMP tmp, TEMP tmp2, KILL cr );
7799
  ins_cost(1000);
7800
  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7801
            "CMP    $tmp,EDX\n\t"
7802
            "JA,s   fast\n\t"
7803
            "MOV    $tmp2,EAX\n\t"
7804
            "MOV    EAX,EDX\n\t"
7805
            "MOV    EDX,0\n\t"
7806
            "JLE,s  pos\n\t"
7807
            "LNEG   EAX : $tmp2\n\t"
7808
            "DIV    $tmp # unsigned division\n\t"
7809
            "MOV    EAX,$tmp2\n\t"
7810
            "DIV    $tmp\n\t"
7811
            "NEG    EDX\n\t"
7812
            "JMP,s  done\n"
7813
    "pos:\n\t"
7814
            "DIV    $tmp\n\t"
7815
            "MOV    EAX,$tmp2\n"
7816
    "fast:\n\t"
7817
            "DIV    $tmp\n"
7818
    "done:\n\t"
7819
            "MOV    EAX,EDX\n\t"
7820
            "SAR    EDX,31\n\t" %}
7821
  ins_encode %{
7822
    int con = (int)$imm$$constant;
7823
    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7824
    int pcon = (con > 0) ? con : -con;
7825
    Label  Lfast, Lpos, Ldone;
7826

7827
    __ movl($tmp$$Register, pcon);
7828
    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7829
    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7830

7831
    __ movl($tmp2$$Register, $dst$$Register); // save
7832
    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7833
    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7834
    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7835

7836
    // Negative dividend.
7837
    // convert value to positive to use unsigned division
7838
    __ lneg($dst$$Register, $tmp2$$Register);
7839
    __ divl($tmp$$Register);
7840
    __ movl($dst$$Register, $tmp2$$Register);
7841
    __ divl($tmp$$Register);
7842
    // revert remainder back to negative
7843
    __ negl(HIGH_FROM_LOW($dst$$Register));
7844
    __ jmpb(Ldone);
7845

7846
    __ bind(Lpos);
7847
    __ divl($tmp$$Register);
7848
    __ movl($dst$$Register, $tmp2$$Register);
7849

7850
    __ bind(Lfast);
7851
    // fast path: src is positive
7852
    __ divl($tmp$$Register);
7853

7854
    __ bind(Ldone);
7855
    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7856
    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7857

7858
  %}
7859
  ins_pipe( pipe_slow );
7860
%}
7861

7862
// Integer Shift Instructions
7863
// Shift Left by one
7864
instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7865
  match(Set dst (LShiftI dst shift));
7866
  effect(KILL cr);
7867

7868
  size(2);
7869
  format %{ "SHL    $dst,$shift" %}
7870
  opcode(0xD1, 0x4);  /* D1 /4 */
7871
  ins_encode( OpcP, RegOpc( dst ) );
7872
  ins_pipe( ialu_reg );
7873
%}
7874

7875
// Shift Left by 8-bit immediate
7876
instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7877
  match(Set dst (LShiftI dst shift));
7878
  effect(KILL cr);
7879

7880
  size(3);
7881
  format %{ "SHL    $dst,$shift" %}
7882
  opcode(0xC1, 0x4);  /* C1 /4 ib */
7883
  ins_encode( RegOpcImm( dst, shift) );
7884
  ins_pipe( ialu_reg );
7885
%}
7886

7887
// Shift Left by variable
7888
instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7889
  match(Set dst (LShiftI dst shift));
7890
  effect(KILL cr);
7891

7892
  size(2);
7893
  format %{ "SHL    $dst,$shift" %}
7894
  opcode(0xD3, 0x4);  /* D3 /4 */
7895
  ins_encode( OpcP, RegOpc( dst ) );
7896
  ins_pipe( ialu_reg_reg );
7897
%}
7898

7899
// Arithmetic shift right by one
7900
instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7901
  match(Set dst (RShiftI dst shift));
7902
  effect(KILL cr);
7903

7904
  size(2);
7905
  format %{ "SAR    $dst,$shift" %}
7906
  opcode(0xD1, 0x7);  /* D1 /7 */
7907
  ins_encode( OpcP, RegOpc( dst ) );
7908
  ins_pipe( ialu_reg );
7909
%}
7910

7911
// Arithmetic shift right by one
7912
instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
7913
  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7914
  effect(KILL cr);
7915
  format %{ "SAR    $dst,$shift" %}
7916
  opcode(0xD1, 0x7);  /* D1 /7 */
7917
  ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
7918
  ins_pipe( ialu_mem_imm );
7919
%}
7920

7921
// Arithmetic Shift Right by 8-bit immediate
7922
instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7923
  match(Set dst (RShiftI dst shift));
7924
  effect(KILL cr);
7925

7926
  size(3);
7927
  format %{ "SAR    $dst,$shift" %}
7928
  opcode(0xC1, 0x7);  /* C1 /7 ib */
7929
  ins_encode( RegOpcImm( dst, shift ) );
7930
  ins_pipe( ialu_mem_imm );
7931
%}
7932

7933
// Arithmetic Shift Right by 8-bit immediate
7934
instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7935
  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7936
  effect(KILL cr);
7937

7938
  format %{ "SAR    $dst,$shift" %}
7939
  opcode(0xC1, 0x7);  /* C1 /7 ib */
7940
  ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
7941
  ins_pipe( ialu_mem_imm );
7942
%}
7943

7944
// Arithmetic Shift Right by variable
7945
instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7946
  match(Set dst (RShiftI dst shift));
7947
  effect(KILL cr);
7948

7949
  size(2);
7950
  format %{ "SAR    $dst,$shift" %}
7951
  opcode(0xD3, 0x7);  /* D3 /7 */
7952
  ins_encode( OpcP, RegOpc( dst ) );
7953
  ins_pipe( ialu_reg_reg );
7954
%}
7955

7956
// Logical shift right by one
7957
instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
7958
  match(Set dst (URShiftI dst shift));
7959
  effect(KILL cr);
7960

7961
  size(2);
7962
  format %{ "SHR    $dst,$shift" %}
7963
  opcode(0xD1, 0x5);  /* D1 /5 */
7964
  ins_encode( OpcP, RegOpc( dst ) );
7965
  ins_pipe( ialu_reg );
7966
%}
7967

7968
// Logical Shift Right by 8-bit immediate
7969
instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7970
  match(Set dst (URShiftI dst shift));
7971
  effect(KILL cr);
7972

7973
  size(3);
7974
  format %{ "SHR    $dst,$shift" %}
7975
  opcode(0xC1, 0x5);  /* C1 /5 ib */
7976
  ins_encode( RegOpcImm( dst, shift) );
7977
  ins_pipe( ialu_reg );
7978
%}
7979

7980

7981
// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7982
// This idiom is used by the compiler for the i2b bytecode.
7983
instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7984
  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7985

7986
  size(3);
7987
  format %{ "MOVSX  $dst,$src :8" %}
7988
  ins_encode %{
7989
    __ movsbl($dst$$Register, $src$$Register);
7990
  %}
7991
  ins_pipe(ialu_reg_reg);
7992
%}
7993

7994
// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7995
// This idiom is used by the compiler the i2s bytecode.
7996
instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7997
  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7998

7999
  size(3);
8000
  format %{ "MOVSX  $dst,$src :16" %}
8001
  ins_encode %{
8002
    __ movswl($dst$$Register, $src$$Register);
8003
  %}
8004
  ins_pipe(ialu_reg_reg);
8005
%}
8006

8007

8008
// Logical Shift Right by variable
8009
instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8010
  match(Set dst (URShiftI dst shift));
8011
  effect(KILL cr);
8012

8013
  size(2);
8014
  format %{ "SHR    $dst,$shift" %}
8015
  opcode(0xD3, 0x5);  /* D3 /5 */
8016
  ins_encode( OpcP, RegOpc( dst ) );
8017
  ins_pipe( ialu_reg_reg );
8018
%}
8019

8020

8021
//----------Logical Instructions-----------------------------------------------
8022
//----------Integer Logical Instructions---------------------------------------
8023
// And Instructions
8024
// And Register with Register
8025
instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8026
  match(Set dst (AndI dst src));
8027
  effect(KILL cr);
8028

8029
  size(2);
8030
  format %{ "AND    $dst,$src" %}
8031
  opcode(0x23);
8032
  ins_encode( OpcP, RegReg( dst, src) );
8033
  ins_pipe( ialu_reg_reg );
8034
%}
8035

8036
// And Register with Immediate
8037
instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8038
  match(Set dst (AndI dst src));
8039
  effect(KILL cr);
8040

8041
  format %{ "AND    $dst,$src" %}
8042
  opcode(0x81,0x04);  /* Opcode 81 /4 */
8043
  // ins_encode( RegImm( dst, src) );
8044
  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8045
  ins_pipe( ialu_reg );
8046
%}
8047

8048
// And Register with Memory
8049
instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8050
  match(Set dst (AndI dst (LoadI src)));
8051
  effect(KILL cr);
8052

8053
  ins_cost(150);
8054
  format %{ "AND    $dst,$src" %}
8055
  opcode(0x23);
8056
  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
8057
  ins_pipe( ialu_reg_mem );
8058
%}
8059

8060
// And Memory with Register
8061
instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8062
  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8063
  effect(KILL cr);
8064

8065
  ins_cost(150);
8066
  format %{ "AND    $dst,$src" %}
8067
  opcode(0x21);  /* Opcode 21 /r */
8068
  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8069
  ins_pipe( ialu_mem_reg );
8070
%}
8071

8072
// And Memory with Immediate
8073
instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8074
  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8075
  effect(KILL cr);
8076

8077
  ins_cost(125);
8078
  format %{ "AND    $dst,$src" %}
8079
  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8080
  // ins_encode( MemImm( dst, src) );
8081
  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8082
  ins_pipe( ialu_mem_imm );
8083
%}
8084

8085
// BMI1 instructions
8086
instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8087
  match(Set dst (AndI (XorI src1 minus_1) src2));
8088
  predicate(UseBMI1Instructions);
8089
  effect(KILL cr);
8090

8091
  format %{ "ANDNL  $dst, $src1, $src2" %}
8092

8093
  ins_encode %{
8094
    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8095
  %}
8096
  ins_pipe(ialu_reg);
8097
%}
8098

8099
instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8100
  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8101
  predicate(UseBMI1Instructions);
8102
  effect(KILL cr);
8103

8104
  ins_cost(125);
8105
  format %{ "ANDNL  $dst, $src1, $src2" %}
8106

8107
  ins_encode %{
8108
    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8109
  %}
8110
  ins_pipe(ialu_reg_mem);
8111
%}
8112

8113
instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
8114
  match(Set dst (AndI (SubI imm_zero src) src));
8115
  predicate(UseBMI1Instructions);
8116
  effect(KILL cr);
8117

8118
  format %{ "BLSIL  $dst, $src" %}
8119

8120
  ins_encode %{
8121
    __ blsil($dst$$Register, $src$$Register);
8122
  %}
8123
  ins_pipe(ialu_reg);
8124
%}
8125

8126
instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
8127
  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8128
  predicate(UseBMI1Instructions);
8129
  effect(KILL cr);
8130

8131
  ins_cost(125);
8132
  format %{ "BLSIL  $dst, $src" %}
8133

8134
  ins_encode %{
8135
    __ blsil($dst$$Register, $src$$Address);
8136
  %}
8137
  ins_pipe(ialu_reg_mem);
8138
%}
8139

8140
instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8141
%{
8142
  match(Set dst (XorI (AddI src minus_1) src));
8143
  predicate(UseBMI1Instructions);
8144
  effect(KILL cr);
8145

8146
  format %{ "BLSMSKL $dst, $src" %}
8147

8148
  ins_encode %{
8149
    __ blsmskl($dst$$Register, $src$$Register);
8150
  %}
8151

8152
  ins_pipe(ialu_reg);
8153
%}
8154

8155
instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8156
%{
8157
  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8158
  predicate(UseBMI1Instructions);
8159
  effect(KILL cr);
8160

8161
  ins_cost(125);
8162
  format %{ "BLSMSKL $dst, $src" %}
8163

8164
  ins_encode %{
8165
    __ blsmskl($dst$$Register, $src$$Address);
8166
  %}
8167

8168
  ins_pipe(ialu_reg_mem);
8169
%}
8170

8171
instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8172
%{
8173
  match(Set dst (AndI (AddI src minus_1) src) );
8174
  predicate(UseBMI1Instructions);
8175
  effect(KILL cr);
8176

8177
  format %{ "BLSRL  $dst, $src" %}
8178

8179
  ins_encode %{
8180
    __ blsrl($dst$$Register, $src$$Register);
8181
  %}
8182

8183
  ins_pipe(ialu_reg);
8184
%}
8185

8186
instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8187
%{
8188
  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8189
  predicate(UseBMI1Instructions);
8190
  effect(KILL cr);
8191

8192
  ins_cost(125);
8193
  format %{ "BLSRL  $dst, $src" %}
8194

8195
  ins_encode %{
8196
    __ blsrl($dst$$Register, $src$$Address);
8197
  %}
8198

8199
  ins_pipe(ialu_reg_mem);
8200
%}
8201

8202
// Or Instructions
8203
// Or Register with Register
8204
instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8205
  match(Set dst (OrI dst src));
8206
  effect(KILL cr);
8207

8208
  size(2);
8209
  format %{ "OR     $dst,$src" %}
8210
  opcode(0x0B);
8211
  ins_encode( OpcP, RegReg( dst, src) );
8212
  ins_pipe( ialu_reg_reg );
8213
%}
8214

8215
instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8216
  match(Set dst (OrI dst (CastP2X src)));
8217
  effect(KILL cr);
8218

8219
  size(2);
8220
  format %{ "OR     $dst,$src" %}
8221
  opcode(0x0B);
8222
  ins_encode( OpcP, RegReg( dst, src) );
8223
  ins_pipe( ialu_reg_reg );
8224
%}
8225

8226

8227
// Or Register with Immediate
8228
instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8229
  match(Set dst (OrI dst src));
8230
  effect(KILL cr);
8231

8232
  format %{ "OR     $dst,$src" %}
8233
  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8234
  // ins_encode( RegImm( dst, src) );
8235
  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8236
  ins_pipe( ialu_reg );
8237
%}
8238

8239
// Or Register with Memory
8240
instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8241
  match(Set dst (OrI dst (LoadI src)));
8242
  effect(KILL cr);
8243

8244
  ins_cost(150);
8245
  format %{ "OR     $dst,$src" %}
8246
  opcode(0x0B);
8247
  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
8248
  ins_pipe( ialu_reg_mem );
8249
%}
8250

8251
// Or Memory with Register
8252
instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8253
  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8254
  effect(KILL cr);
8255

8256
  ins_cost(150);
8257
  format %{ "OR     $dst,$src" %}
8258
  opcode(0x09);  /* Opcode 09 /r */
8259
  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8260
  ins_pipe( ialu_mem_reg );
8261
%}
8262

8263
// Or Memory with Immediate
8264
instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8265
  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8266
  effect(KILL cr);
8267

8268
  ins_cost(125);
8269
  format %{ "OR     $dst,$src" %}
8270
  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8271
  // ins_encode( MemImm( dst, src) );
8272
  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8273
  ins_pipe( ialu_mem_imm );
8274
%}
8275

8276
// ROL/ROR
8277
// ROL expand
8278
instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8279
  effect(USE_DEF dst, USE shift, KILL cr);
8280

8281
  format %{ "ROL    $dst, $shift" %}
8282
  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8283
  ins_encode( OpcP, RegOpc( dst ));
8284
  ins_pipe( ialu_reg );
8285
%}
8286

8287
instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8288
  effect(USE_DEF dst, USE shift, KILL cr);
8289

8290
  format %{ "ROL    $dst, $shift" %}
8291
  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8292
  ins_encode( RegOpcImm(dst, shift) );
8293
  ins_pipe(ialu_reg);
8294
%}
8295

8296
instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8297
  effect(USE_DEF dst, USE shift, KILL cr);
8298

8299
  format %{ "ROL    $dst, $shift" %}
8300
  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8301
  ins_encode(OpcP, RegOpc(dst));
8302
  ins_pipe( ialu_reg_reg );
8303
%}
8304
// end of ROL expand
8305

8306
// ROL 32bit by one once
8307
instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8308
  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8309

8310
  expand %{
8311
    rolI_eReg_imm1(dst, lshift, cr);
8312
  %}
8313
%}
8314

8315
// ROL 32bit var by imm8 once
8316
instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8317
  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8318
  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8319

8320
  expand %{
8321
    rolI_eReg_imm8(dst, lshift, cr);
8322
  %}
8323
%}
8324

8325
// ROL 32bit var by var once
8326
instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8327
  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8328

8329
  expand %{
8330
    rolI_eReg_CL(dst, shift, cr);
8331
  %}
8332
%}
8333

8334
// ROL 32bit var by var once
8335
instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8336
  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8337

8338
  expand %{
8339
    rolI_eReg_CL(dst, shift, cr);
8340
  %}
8341
%}
8342

8343
// ROR expand
8344
instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8345
  effect(USE_DEF dst, USE shift, KILL cr);
8346

8347
  format %{ "ROR    $dst, $shift" %}
8348
  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8349
  ins_encode( OpcP, RegOpc( dst ) );
8350
  ins_pipe( ialu_reg );
8351
%}
8352

8353
instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8354
  effect (USE_DEF dst, USE shift, KILL cr);
8355

8356
  format %{ "ROR    $dst, $shift" %}
8357
  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8358
  ins_encode( RegOpcImm(dst, shift) );
8359
  ins_pipe( ialu_reg );
8360
%}
8361

8362
instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8363
  effect(USE_DEF dst, USE shift, KILL cr);
8364

8365
  format %{ "ROR    $dst, $shift" %}
8366
  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8367
  ins_encode(OpcP, RegOpc(dst));
8368
  ins_pipe( ialu_reg_reg );
8369
%}
8370
// end of ROR expand
8371

8372
// ROR right once
8373
instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8374
  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8375

8376
  expand %{
8377
    rorI_eReg_imm1(dst, rshift, cr);
8378
  %}
8379
%}
8380

8381
// ROR 32bit by immI8 once
8382
instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8383
  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8384
  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8385

8386
  expand %{
8387
    rorI_eReg_imm8(dst, rshift, cr);
8388
  %}
8389
%}
8390

8391
// ROR 32bit var by var once
8392
instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8393
  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8394

8395
  expand %{
8396
    rorI_eReg_CL(dst, shift, cr);
8397
  %}
8398
%}
8399

8400
// ROR 32bit var by var once
8401
instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8402
  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8403

8404
  expand %{
8405
    rorI_eReg_CL(dst, shift, cr);
8406
  %}
8407
%}
8408

8409
// Xor Instructions
8410
// Xor Register with Register
8411
instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8412
  match(Set dst (XorI dst src));
8413
  effect(KILL cr);
8414

8415
  size(2);
8416
  format %{ "XOR    $dst,$src" %}
8417
  opcode(0x33);
8418
  ins_encode( OpcP, RegReg( dst, src) );
8419
  ins_pipe( ialu_reg_reg );
8420
%}
8421

8422
// Xor Register with Immediate -1
8423
instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8424
  match(Set dst (XorI dst imm));
8425

8426
  size(2);
8427
  format %{ "NOT    $dst" %}
8428
  ins_encode %{
8429
     __ notl($dst$$Register);
8430
  %}
8431
  ins_pipe( ialu_reg );
8432
%}
8433

8434
// Xor Register with Immediate
8435
instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8436
  match(Set dst (XorI dst src));
8437
  effect(KILL cr);
8438

8439
  format %{ "XOR    $dst,$src" %}
8440
  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8441
  // ins_encode( RegImm( dst, src) );
8442
  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8443
  ins_pipe( ialu_reg );
8444
%}
8445

8446
// Xor Register with Memory
8447
instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8448
  match(Set dst (XorI dst (LoadI src)));
8449
  effect(KILL cr);
8450

8451
  ins_cost(150);
8452
  format %{ "XOR    $dst,$src" %}
8453
  opcode(0x33);
8454
  ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
8455
  ins_pipe( ialu_reg_mem );
8456
%}
8457

8458
// Xor Memory with Register
8459
instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8460
  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8461
  effect(KILL cr);
8462

8463
  ins_cost(150);
8464
  format %{ "XOR    $dst,$src" %}
8465
  opcode(0x31);  /* Opcode 31 /r */
8466
  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
8467
  ins_pipe( ialu_mem_reg );
8468
%}
8469

8470
// Xor Memory with Immediate
8471
instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8472
  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8473
  effect(KILL cr);
8474

8475
  ins_cost(125);
8476
  format %{ "XOR    $dst,$src" %}
8477
  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8478
  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
8479
  ins_pipe( ialu_mem_imm );
8480
%}
8481

8482
//----------Convert Int to Boolean---------------------------------------------
8483

8484
instruct movI_nocopy(rRegI dst, rRegI src) %{
8485
  effect( DEF dst, USE src );
8486
  format %{ "MOV    $dst,$src" %}
8487
  ins_encode( enc_Copy( dst, src) );
8488
  ins_pipe( ialu_reg_reg );
8489
%}
8490

8491
instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8492
  effect( USE_DEF dst, USE src, KILL cr );
8493

8494
  size(4);
8495
  format %{ "NEG    $dst\n\t"
8496
            "ADC    $dst,$src" %}
8497
  ins_encode( neg_reg(dst),
8498
              OpcRegReg(0x13,dst,src) );
8499
  ins_pipe( ialu_reg_reg_long );
8500
%}
8501

8502
instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8503
  match(Set dst (Conv2B src));
8504

8505
  expand %{
8506
    movI_nocopy(dst,src);
8507
    ci2b(dst,src,cr);
8508
  %}
8509
%}
8510

8511
instruct movP_nocopy(rRegI dst, eRegP src) %{
8512
  effect( DEF dst, USE src );
8513
  format %{ "MOV    $dst,$src" %}
8514
  ins_encode( enc_Copy( dst, src) );
8515
  ins_pipe( ialu_reg_reg );
8516
%}
8517

8518
instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8519
  effect( USE_DEF dst, USE src, KILL cr );
8520
  format %{ "NEG    $dst\n\t"
8521
            "ADC    $dst,$src" %}
8522
  ins_encode( neg_reg(dst),
8523
              OpcRegReg(0x13,dst,src) );
8524
  ins_pipe( ialu_reg_reg_long );
8525
%}
8526

8527
instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8528
  match(Set dst (Conv2B src));
8529

8530
  expand %{
8531
    movP_nocopy(dst,src);
8532
    cp2b(dst,src,cr);
8533
  %}
8534
%}
8535

8536
instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8537
  match(Set dst (CmpLTMask p q));
8538
  effect(KILL cr);
8539
  ins_cost(400);
8540

8541
  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8542
  format %{ "XOR    $dst,$dst\n\t"
8543
            "CMP    $p,$q\n\t"
8544
            "SETlt  $dst\n\t"
8545
            "NEG    $dst" %}
8546
  ins_encode %{
8547
    Register Rp = $p$$Register;
8548
    Register Rq = $q$$Register;
8549
    Register Rd = $dst$$Register;
8550
    Label done;
8551
    __ xorl(Rd, Rd);
8552
    __ cmpl(Rp, Rq);
8553
    __ setb(Assembler::less, Rd);
8554
    __ negl(Rd);
8555
  %}
8556

8557
  ins_pipe(pipe_slow);
8558
%}
8559

8560
instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
8561
  match(Set dst (CmpLTMask dst zero));
8562
  effect(DEF dst, KILL cr);
8563
  ins_cost(100);
8564

8565
  format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8566
  ins_encode %{
8567
  __ sarl($dst$$Register, 31);
8568
  %}
8569
  ins_pipe(ialu_reg);
8570
%}
8571

8572
/* better to save a register than avoid a branch */
8573
instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8574
  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8575
  effect(KILL cr);
8576
  ins_cost(400);
8577
  format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8578
            "JGE    done\n\t"
8579
            "ADD    $p,$y\n"
8580
            "done:  " %}
8581
  ins_encode %{
8582
    Register Rp = $p$$Register;
8583
    Register Rq = $q$$Register;
8584
    Register Ry = $y$$Register;
8585
    Label done;
8586
    __ subl(Rp, Rq);
8587
    __ jccb(Assembler::greaterEqual, done);
8588
    __ addl(Rp, Ry);
8589
    __ bind(done);
8590
  %}
8591

8592
  ins_pipe(pipe_cmplt);
8593
%}
8594

8595
/* better to save a register than avoid a branch */
8596
instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8597
  match(Set y (AndI (CmpLTMask p q) y));
8598
  effect(KILL cr);
8599

8600
  ins_cost(300);
8601

8602
  format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8603
            "JLT      done\n\t"
8604
            "XORL     $y, $y\n"
8605
            "done:  " %}
8606
  ins_encode %{
8607
    Register Rp = $p$$Register;
8608
    Register Rq = $q$$Register;
8609
    Register Ry = $y$$Register;
8610
    Label done;
8611
    __ cmpl(Rp, Rq);
8612
    __ jccb(Assembler::less, done);
8613
    __ xorl(Ry, Ry);
8614
    __ bind(done);
8615
  %}
8616

8617
  ins_pipe(pipe_cmplt);
8618
%}
8619

8620
/* If I enable this, I encourage spilling in the inner loop of compress.
8621
instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8622
  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8623
*/
8624
//----------Overflow Math Instructions-----------------------------------------
8625

8626
instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8627
%{
8628
  match(Set cr (OverflowAddI op1 op2));
8629
  effect(DEF cr, USE_KILL op1, USE op2);
8630

8631
  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8632

8633
  ins_encode %{
8634
    __ addl($op1$$Register, $op2$$Register);
8635
  %}
8636
  ins_pipe(ialu_reg_reg);
8637
%}
8638

8639
instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8640
%{
8641
  match(Set cr (OverflowAddI op1 op2));
8642
  effect(DEF cr, USE_KILL op1, USE op2);
8643

8644
  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8645

8646
  ins_encode %{
8647
    __ addl($op1$$Register, $op2$$constant);
8648
  %}
8649
  ins_pipe(ialu_reg_reg);
8650
%}
8651

8652
instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8653
%{
8654
  match(Set cr (OverflowSubI op1 op2));
8655

8656
  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8657
  ins_encode %{
8658
    __ cmpl($op1$$Register, $op2$$Register);
8659
  %}
8660
  ins_pipe(ialu_reg_reg);
8661
%}
8662

8663
instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8664
%{
8665
  match(Set cr (OverflowSubI op1 op2));
8666

8667
  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8668
  ins_encode %{
8669
    __ cmpl($op1$$Register, $op2$$constant);
8670
  %}
8671
  ins_pipe(ialu_reg_reg);
8672
%}
8673

8674
instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
8675
%{
8676
  match(Set cr (OverflowSubI zero op2));
8677
  effect(DEF cr, USE_KILL op2);
8678

8679
  format %{ "NEG    $op2\t# overflow check int" %}
8680
  ins_encode %{
8681
    __ negl($op2$$Register);
8682
  %}
8683
  ins_pipe(ialu_reg_reg);
8684
%}
8685

8686
instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8687
%{
8688
  match(Set cr (OverflowMulI op1 op2));
8689
  effect(DEF cr, USE_KILL op1, USE op2);
8690

8691
  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8692
  ins_encode %{
8693
    __ imull($op1$$Register, $op2$$Register);
8694
  %}
8695
  ins_pipe(ialu_reg_reg_alu0);
8696
%}
8697

8698
instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8699
%{
8700
  match(Set cr (OverflowMulI op1 op2));
8701
  effect(DEF cr, TEMP tmp, USE op1, USE op2);
8702

8703
  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8704
  ins_encode %{
8705
    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8706
  %}
8707
  ins_pipe(ialu_reg_reg_alu0);
8708
%}
8709

8710
// Integer Absolute Instructions
8711
instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8712
%{
8713
  match(Set dst (AbsI src));
8714
  effect(TEMP dst, TEMP tmp, KILL cr);
8715
  format %{ "movl $tmp, $src\n\t"
8716
            "sarl $tmp, 31\n\t"
8717
            "movl $dst, $src\n\t"
8718
            "xorl $dst, $tmp\n\t"
8719
            "subl $dst, $tmp\n"
8720
          %}
8721
  ins_encode %{
8722
    __ movl($tmp$$Register, $src$$Register);
8723
    __ sarl($tmp$$Register, 31);
8724
    __ movl($dst$$Register, $src$$Register);
8725
    __ xorl($dst$$Register, $tmp$$Register);
8726
    __ subl($dst$$Register, $tmp$$Register);
8727
  %}
8728

8729
  ins_pipe(ialu_reg_reg);
8730
%}
8731

8732
//----------Long Instructions------------------------------------------------
8733
// Add Long Register with Register
8734
instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8735
  match(Set dst (AddL dst src));
8736
  effect(KILL cr);
8737
  ins_cost(200);
8738
  format %{ "ADD    $dst.lo,$src.lo\n\t"
8739
            "ADC    $dst.hi,$src.hi" %}
8740
  opcode(0x03, 0x13);
8741
  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8742
  ins_pipe( ialu_reg_reg_long );
8743
%}
8744

8745
// Add Long Register with Immediate
8746
instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8747
  match(Set dst (AddL dst src));
8748
  effect(KILL cr);
8749
  format %{ "ADD    $dst.lo,$src.lo\n\t"
8750
            "ADC    $dst.hi,$src.hi" %}
8751
  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8752
  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8753
  ins_pipe( ialu_reg_long );
8754
%}
8755

8756
// Add Long Register with Memory
8757
instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8758
  match(Set dst (AddL dst (LoadL mem)));
8759
  effect(KILL cr);
8760
  ins_cost(125);
8761
  format %{ "ADD    $dst.lo,$mem\n\t"
8762
            "ADC    $dst.hi,$mem+4" %}
8763
  opcode(0x03, 0x13);
8764
  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8765
  ins_pipe( ialu_reg_long_mem );
8766
%}
8767

8768
// Subtract Long Register with Register.
8769
instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8770
  match(Set dst (SubL dst src));
8771
  effect(KILL cr);
8772
  ins_cost(200);
8773
  format %{ "SUB    $dst.lo,$src.lo\n\t"
8774
            "SBB    $dst.hi,$src.hi" %}
8775
  opcode(0x2B, 0x1B);
8776
  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8777
  ins_pipe( ialu_reg_reg_long );
8778
%}
8779

8780
// Subtract Long Register with Immediate
8781
instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8782
  match(Set dst (SubL dst src));
8783
  effect(KILL cr);
8784
  format %{ "SUB    $dst.lo,$src.lo\n\t"
8785
            "SBB    $dst.hi,$src.hi" %}
8786
  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8787
  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8788
  ins_pipe( ialu_reg_long );
8789
%}
8790

8791
// Subtract Long Register with Memory
8792
instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8793
  match(Set dst (SubL dst (LoadL mem)));
8794
  effect(KILL cr);
8795
  ins_cost(125);
8796
  format %{ "SUB    $dst.lo,$mem\n\t"
8797
            "SBB    $dst.hi,$mem+4" %}
8798
  opcode(0x2B, 0x1B);
8799
  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8800
  ins_pipe( ialu_reg_long_mem );
8801
%}
8802

8803
instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8804
  match(Set dst (SubL zero dst));
8805
  effect(KILL cr);
8806
  ins_cost(300);
8807
  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8808
  ins_encode( neg_long(dst) );
8809
  ins_pipe( ialu_reg_reg_long );
8810
%}
8811

8812
// And Long Register with Register
8813
instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8814
  match(Set dst (AndL dst src));
8815
  effect(KILL cr);
8816
  format %{ "AND    $dst.lo,$src.lo\n\t"
8817
            "AND    $dst.hi,$src.hi" %}
8818
  opcode(0x23,0x23);
8819
  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8820
  ins_pipe( ialu_reg_reg_long );
8821
%}
8822

8823
// And Long Register with Immediate
8824
instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8825
  match(Set dst (AndL dst src));
8826
  effect(KILL cr);
8827
  format %{ "AND    $dst.lo,$src.lo\n\t"
8828
            "AND    $dst.hi,$src.hi" %}
8829
  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8830
  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8831
  ins_pipe( ialu_reg_long );
8832
%}
8833

8834
// And Long Register with Memory
8835
instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8836
  match(Set dst (AndL dst (LoadL mem)));
8837
  effect(KILL cr);
8838
  ins_cost(125);
8839
  format %{ "AND    $dst.lo,$mem\n\t"
8840
            "AND    $dst.hi,$mem+4" %}
8841
  opcode(0x23, 0x23);
8842
  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
8843
  ins_pipe( ialu_reg_long_mem );
8844
%}
8845

8846
// BMI1 instructions
8847
instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8848
  match(Set dst (AndL (XorL src1 minus_1) src2));
8849
  predicate(UseBMI1Instructions);
8850
  effect(KILL cr, TEMP dst);
8851

8852
  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8853
            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8854
         %}
8855

8856
  ins_encode %{
8857
    Register Rdst = $dst$$Register;
8858
    Register Rsrc1 = $src1$$Register;
8859
    Register Rsrc2 = $src2$$Register;
8860
    __ andnl(Rdst, Rsrc1, Rsrc2);
8861
    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8862
  %}
8863
  ins_pipe(ialu_reg_reg_long);
8864
%}
8865

8866
instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8867
  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8868
  predicate(UseBMI1Instructions);
8869
  effect(KILL cr, TEMP dst);
8870

8871
  ins_cost(125);
8872
  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8873
            "ANDNL  $dst.hi, $src1.hi, $src2+4"
8874
         %}
8875

8876
  ins_encode %{
8877
    Register Rdst = $dst$$Register;
8878
    Register Rsrc1 = $src1$$Register;
8879
    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8880

8881
    __ andnl(Rdst, Rsrc1, $src2$$Address);
8882
    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8883
  %}
8884
  ins_pipe(ialu_reg_mem);
8885
%}
8886

8887
instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8888
  match(Set dst (AndL (SubL imm_zero src) src));
8889
  predicate(UseBMI1Instructions);
8890
  effect(KILL cr, TEMP dst);
8891

8892
  format %{ "MOVL   $dst.hi, 0\n\t"
8893
            "BLSIL  $dst.lo, $src.lo\n\t"
8894
            "JNZ    done\n\t"
8895
            "BLSIL  $dst.hi, $src.hi\n"
8896
            "done:"
8897
         %}
8898

8899
  ins_encode %{
8900
    Label done;
8901
    Register Rdst = $dst$$Register;
8902
    Register Rsrc = $src$$Register;
8903
    __ movl(HIGH_FROM_LOW(Rdst), 0);
8904
    __ blsil(Rdst, Rsrc);
8905
    __ jccb(Assembler::notZero, done);
8906
    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8907
    __ bind(done);
8908
  %}
8909
  ins_pipe(ialu_reg);
8910
%}
8911

8912
instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8913
  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8914
  predicate(UseBMI1Instructions);
8915
  effect(KILL cr, TEMP dst);
8916

8917
  ins_cost(125);
8918
  format %{ "MOVL   $dst.hi, 0\n\t"
8919
            "BLSIL  $dst.lo, $src\n\t"
8920
            "JNZ    done\n\t"
8921
            "BLSIL  $dst.hi, $src+4\n"
8922
            "done:"
8923
         %}
8924

8925
  ins_encode %{
8926
    Label done;
8927
    Register Rdst = $dst$$Register;
8928
    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8929

8930
    __ movl(HIGH_FROM_LOW(Rdst), 0);
8931
    __ blsil(Rdst, $src$$Address);
8932
    __ jccb(Assembler::notZero, done);
8933
    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8934
    __ bind(done);
8935
  %}
8936
  ins_pipe(ialu_reg_mem);
8937
%}
8938

8939
instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8940
%{
8941
  match(Set dst (XorL (AddL src minus_1) src));
8942
  predicate(UseBMI1Instructions);
8943
  effect(KILL cr, TEMP dst);
8944

8945
  format %{ "MOVL    $dst.hi, 0\n\t"
8946
            "BLSMSKL $dst.lo, $src.lo\n\t"
8947
            "JNC     done\n\t"
8948
            "BLSMSKL $dst.hi, $src.hi\n"
8949
            "done:"
8950
         %}
8951

8952
  ins_encode %{
8953
    Label done;
8954
    Register Rdst = $dst$$Register;
8955
    Register Rsrc = $src$$Register;
8956
    __ movl(HIGH_FROM_LOW(Rdst), 0);
8957
    __ blsmskl(Rdst, Rsrc);
8958
    __ jccb(Assembler::carryClear, done);
8959
    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8960
    __ bind(done);
8961
  %}
8962

8963
  ins_pipe(ialu_reg);
8964
%}
8965

8966
instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8967
%{
8968
  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8969
  predicate(UseBMI1Instructions);
8970
  effect(KILL cr, TEMP dst);
8971

8972
  ins_cost(125);
8973
  format %{ "MOVL    $dst.hi, 0\n\t"
8974
            "BLSMSKL $dst.lo, $src\n\t"
8975
            "JNC     done\n\t"
8976
            "BLSMSKL $dst.hi, $src+4\n"
8977
            "done:"
8978
         %}
8979

8980
  ins_encode %{
8981
    Label done;
8982
    Register Rdst = $dst$$Register;
8983
    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8984

8985
    __ movl(HIGH_FROM_LOW(Rdst), 0);
8986
    __ blsmskl(Rdst, $src$$Address);
8987
    __ jccb(Assembler::carryClear, done);
8988
    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8989
    __ bind(done);
8990
  %}
8991

8992
  ins_pipe(ialu_reg_mem);
8993
%}
8994

8995
instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8996
%{
8997
  match(Set dst (AndL (AddL src minus_1) src) );
8998
  predicate(UseBMI1Instructions);
8999
  effect(KILL cr, TEMP dst);
9000

9001
  format %{ "MOVL   $dst.hi, $src.hi\n\t"
9002
            "BLSRL  $dst.lo, $src.lo\n\t"
9003
            "JNC    done\n\t"
9004
            "BLSRL  $dst.hi, $src.hi\n"
9005
            "done:"
9006
  %}
9007

9008
  ins_encode %{
9009
    Label done;
9010
    Register Rdst = $dst$$Register;
9011
    Register Rsrc = $src$$Register;
9012
    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9013
    __ blsrl(Rdst, Rsrc);
9014
    __ jccb(Assembler::carryClear, done);
9015
    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9016
    __ bind(done);
9017
  %}
9018

9019
  ins_pipe(ialu_reg);
9020
%}
9021

9022
instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9023
%{
9024
  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9025
  predicate(UseBMI1Instructions);
9026
  effect(KILL cr, TEMP dst);
9027

9028
  ins_cost(125);
9029
  format %{ "MOVL   $dst.hi, $src+4\n\t"
9030
            "BLSRL  $dst.lo, $src\n\t"
9031
            "JNC    done\n\t"
9032
            "BLSRL  $dst.hi, $src+4\n"
9033
            "done:"
9034
  %}
9035

9036
  ins_encode %{
9037
    Label done;
9038
    Register Rdst = $dst$$Register;
9039
    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9040
    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9041
    __ blsrl(Rdst, $src$$Address);
9042
    __ jccb(Assembler::carryClear, done);
9043
    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9044
    __ bind(done);
9045
  %}
9046

9047
  ins_pipe(ialu_reg_mem);
9048
%}
9049

9050
// Or Long Register with Register
9051
instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9052
  match(Set dst (OrL dst src));
9053
  effect(KILL cr);
9054
  format %{ "OR     $dst.lo,$src.lo\n\t"
9055
            "OR     $dst.hi,$src.hi" %}
9056
  opcode(0x0B,0x0B);
9057
  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9058
  ins_pipe( ialu_reg_reg_long );
9059
%}
9060

9061
// Or Long Register with Immediate
9062
instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9063
  match(Set dst (OrL dst src));
9064
  effect(KILL cr);
9065
  format %{ "OR     $dst.lo,$src.lo\n\t"
9066
            "OR     $dst.hi,$src.hi" %}
9067
  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9068
  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9069
  ins_pipe( ialu_reg_long );
9070
%}
9071

9072
// Or Long Register with Memory
9073
instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9074
  match(Set dst (OrL dst (LoadL mem)));
9075
  effect(KILL cr);
9076
  ins_cost(125);
9077
  format %{ "OR     $dst.lo,$mem\n\t"
9078
            "OR     $dst.hi,$mem+4" %}
9079
  opcode(0x0B,0x0B);
9080
  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
9081
  ins_pipe( ialu_reg_long_mem );
9082
%}
9083

9084
// Xor Long Register with Register
9085
instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9086
  match(Set dst (XorL dst src));
9087
  effect(KILL cr);
9088
  format %{ "XOR    $dst.lo,$src.lo\n\t"
9089
            "XOR    $dst.hi,$src.hi" %}
9090
  opcode(0x33,0x33);
9091
  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9092
  ins_pipe( ialu_reg_reg_long );
9093
%}
9094

9095
// Xor Long Register with Immediate -1
9096
instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9097
  match(Set dst (XorL dst imm));
9098
  format %{ "NOT    $dst.lo\n\t"
9099
            "NOT    $dst.hi" %}
9100
  ins_encode %{
9101
     __ notl($dst$$Register);
9102
     __ notl(HIGH_FROM_LOW($dst$$Register));
9103
  %}
9104
  ins_pipe( ialu_reg_long );
9105
%}
9106

9107
// Xor Long Register with Immediate
9108
instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9109
  match(Set dst (XorL dst src));
9110
  effect(KILL cr);
9111
  format %{ "XOR    $dst.lo,$src.lo\n\t"
9112
            "XOR    $dst.hi,$src.hi" %}
9113
  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9114
  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9115
  ins_pipe( ialu_reg_long );
9116
%}
9117

9118
// Xor Long Register with Memory
9119
instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9120
  match(Set dst (XorL dst (LoadL mem)));
9121
  effect(KILL cr);
9122
  ins_cost(125);
9123
  format %{ "XOR    $dst.lo,$mem\n\t"
9124
            "XOR    $dst.hi,$mem+4" %}
9125
  opcode(0x33,0x33);
9126
  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
9127
  ins_pipe( ialu_reg_long_mem );
9128
%}
9129

9130
// Shift Left Long by 1
9131
instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9132
  predicate(UseNewLongLShift);
9133
  match(Set dst (LShiftL dst cnt));
9134
  effect(KILL cr);
9135
  ins_cost(100);
9136
  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9137
            "ADC    $dst.hi,$dst.hi" %}
9138
  ins_encode %{
9139
    __ addl($dst$$Register,$dst$$Register);
9140
    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9141
  %}
9142
  ins_pipe( ialu_reg_long );
9143
%}
9144

9145
// Shift Left Long by 2
9146
instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9147
  predicate(UseNewLongLShift);
9148
  match(Set dst (LShiftL dst cnt));
9149
  effect(KILL cr);
9150
  ins_cost(100);
9151
  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9152
            "ADC    $dst.hi,$dst.hi\n\t"
9153
            "ADD    $dst.lo,$dst.lo\n\t"
9154
            "ADC    $dst.hi,$dst.hi" %}
9155
  ins_encode %{
9156
    __ addl($dst$$Register,$dst$$Register);
9157
    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9158
    __ addl($dst$$Register,$dst$$Register);
9159
    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9160
  %}
9161
  ins_pipe( ialu_reg_long );
9162
%}
9163

9164
// Shift Left Long by 3
9165
instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9166
  predicate(UseNewLongLShift);
9167
  match(Set dst (LShiftL dst cnt));
9168
  effect(KILL cr);
9169
  ins_cost(100);
9170
  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9171
            "ADC    $dst.hi,$dst.hi\n\t"
9172
            "ADD    $dst.lo,$dst.lo\n\t"
9173
            "ADC    $dst.hi,$dst.hi\n\t"
9174
            "ADD    $dst.lo,$dst.lo\n\t"
9175
            "ADC    $dst.hi,$dst.hi" %}
9176
  ins_encode %{
9177
    __ addl($dst$$Register,$dst$$Register);
9178
    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9179
    __ addl($dst$$Register,$dst$$Register);
9180
    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9181
    __ addl($dst$$Register,$dst$$Register);
9182
    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9183
  %}
9184
  ins_pipe( ialu_reg_long );
9185
%}
9186

9187
// Shift Left Long by 1-31
9188
instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9189
  match(Set dst (LShiftL dst cnt));
9190
  effect(KILL cr);
9191
  ins_cost(200);
9192
  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9193
            "SHL    $dst.lo,$cnt" %}
9194
  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9195
  ins_encode( move_long_small_shift(dst,cnt) );
9196
  ins_pipe( ialu_reg_long );
9197
%}
9198

9199
// Shift Left Long by 32-63
9200
instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9201
  match(Set dst (LShiftL dst cnt));
9202
  effect(KILL cr);
9203
  ins_cost(300);
9204
  format %{ "MOV    $dst.hi,$dst.lo\n"
9205
          "\tSHL    $dst.hi,$cnt-32\n"
9206
          "\tXOR    $dst.lo,$dst.lo" %}
9207
  opcode(0xC1, 0x4);  /* C1 /4 ib */
9208
  ins_encode( move_long_big_shift_clr(dst,cnt) );
9209
  ins_pipe( ialu_reg_long );
9210
%}
9211

9212
// Shift Left Long by variable
9213
instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9214
  match(Set dst (LShiftL dst shift));
9215
  effect(KILL cr);
9216
  ins_cost(500+200);
9217
  size(17);
9218
  format %{ "TEST   $shift,32\n\t"
9219
            "JEQ,s  small\n\t"
9220
            "MOV    $dst.hi,$dst.lo\n\t"
9221
            "XOR    $dst.lo,$dst.lo\n"
9222
    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9223
            "SHL    $dst.lo,$shift" %}
9224
  ins_encode( shift_left_long( dst, shift ) );
9225
  ins_pipe( pipe_slow );
9226
%}
9227

9228
// Shift Right Long by 1-31
9229
instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9230
  match(Set dst (URShiftL dst cnt));
9231
  effect(KILL cr);
9232
  ins_cost(200);
9233
  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9234
            "SHR    $dst.hi,$cnt" %}
9235
  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9236
  ins_encode( move_long_small_shift(dst,cnt) );
9237
  ins_pipe( ialu_reg_long );
9238
%}
9239

9240
// Shift Right Long by 32-63
9241
instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9242
  match(Set dst (URShiftL dst cnt));
9243
  effect(KILL cr);
9244
  ins_cost(300);
9245
  format %{ "MOV    $dst.lo,$dst.hi\n"
9246
          "\tSHR    $dst.lo,$cnt-32\n"
9247
          "\tXOR    $dst.hi,$dst.hi" %}
9248
  opcode(0xC1, 0x5);  /* C1 /5 ib */
9249
  ins_encode( move_long_big_shift_clr(dst,cnt) );
9250
  ins_pipe( ialu_reg_long );
9251
%}
9252

9253
// Shift Right Long by variable
9254
instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9255
  match(Set dst (URShiftL dst shift));
9256
  effect(KILL cr);
9257
  ins_cost(600);
9258
  size(17);
9259
  format %{ "TEST   $shift,32\n\t"
9260
            "JEQ,s  small\n\t"
9261
            "MOV    $dst.lo,$dst.hi\n\t"
9262
            "XOR    $dst.hi,$dst.hi\n"
9263
    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9264
            "SHR    $dst.hi,$shift" %}
9265
  ins_encode( shift_right_long( dst, shift ) );
9266
  ins_pipe( pipe_slow );
9267
%}
9268

9269
// Shift Right Long by 1-31
9270
instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9271
  match(Set dst (RShiftL dst cnt));
9272
  effect(KILL cr);
9273
  ins_cost(200);
9274
  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9275
            "SAR    $dst.hi,$cnt" %}
9276
  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9277
  ins_encode( move_long_small_shift(dst,cnt) );
9278
  ins_pipe( ialu_reg_long );
9279
%}
9280

9281
// Shift Right Long by 32-63
9282
instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9283
  match(Set dst (RShiftL dst cnt));
9284
  effect(KILL cr);
9285
  ins_cost(300);
9286
  format %{ "MOV    $dst.lo,$dst.hi\n"
9287
          "\tSAR    $dst.lo,$cnt-32\n"
9288
          "\tSAR    $dst.hi,31" %}
9289
  opcode(0xC1, 0x7);  /* C1 /7 ib */
9290
  ins_encode( move_long_big_shift_sign(dst,cnt) );
9291
  ins_pipe( ialu_reg_long );
9292
%}
9293

9294
// Shift Right arithmetic Long by variable
9295
instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9296
  match(Set dst (RShiftL dst shift));
9297
  effect(KILL cr);
9298
  ins_cost(600);
9299
  size(18);
9300
  format %{ "TEST   $shift,32\n\t"
9301
            "JEQ,s  small\n\t"
9302
            "MOV    $dst.lo,$dst.hi\n\t"
9303
            "SAR    $dst.hi,31\n"
9304
    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9305
            "SAR    $dst.hi,$shift" %}
9306
  ins_encode( shift_right_arith_long( dst, shift ) );
9307
  ins_pipe( pipe_slow );
9308
%}
9309

9310

9311
//----------Double Instructions------------------------------------------------
9312
// Double Math
9313

9314
// Compare & branch
9315

9316
// P6 version of float compare, sets condition codes in EFLAGS
9317
instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9318
  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9319
  match(Set cr (CmpD src1 src2));
9320
  effect(KILL rax);
9321
  ins_cost(150);
9322
  format %{ "FLD    $src1\n\t"
9323
            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9324
            "JNP    exit\n\t"
9325
            "MOV    ah,1       // saw a NaN, set CF\n\t"
9326
            "SAHF\n"
9327
     "exit:\tNOP               // avoid branch to branch" %}
9328
  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9329
  ins_encode( Push_Reg_DPR(src1),
9330
              OpcP, RegOpc(src2),
9331
              cmpF_P6_fixup );
9332
  ins_pipe( pipe_slow );
9333
%}
9334

9335
instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9336
  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9337
  match(Set cr (CmpD src1 src2));
9338
  ins_cost(150);
9339
  format %{ "FLD    $src1\n\t"
9340
            "FUCOMIP ST,$src2  // P6 instruction" %}
9341
  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9342
  ins_encode( Push_Reg_DPR(src1),
9343
              OpcP, RegOpc(src2));
9344
  ins_pipe( pipe_slow );
9345
%}
9346

9347
// Compare & branch
9348
instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9349
  predicate(UseSSE<=1);
9350
  match(Set cr (CmpD src1 src2));
9351
  effect(KILL rax);
9352
  ins_cost(200);
9353
  format %{ "FLD    $src1\n\t"
9354
            "FCOMp  $src2\n\t"
9355
            "FNSTSW AX\n\t"
9356
            "TEST   AX,0x400\n\t"
9357
            "JZ,s   flags\n\t"
9358
            "MOV    AH,1\t# unordered treat as LT\n"
9359
    "flags:\tSAHF" %}
9360
  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9361
  ins_encode( Push_Reg_DPR(src1),
9362
              OpcP, RegOpc(src2),
9363
              fpu_flags);
9364
  ins_pipe( pipe_slow );
9365
%}
9366

9367
// Compare vs zero into -1,0,1
9368
instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9369
  predicate(UseSSE<=1);
9370
  match(Set dst (CmpD3 src1 zero));
9371
  effect(KILL cr, KILL rax);
9372
  ins_cost(280);
9373
  format %{ "FTSTD  $dst,$src1" %}
9374
  opcode(0xE4, 0xD9);
9375
  ins_encode( Push_Reg_DPR(src1),
9376
              OpcS, OpcP, PopFPU,
9377
              CmpF_Result(dst));
9378
  ins_pipe( pipe_slow );
9379
%}
9380

9381
// Compare into -1,0,1
9382
instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9383
  predicate(UseSSE<=1);
9384
  match(Set dst (CmpD3 src1 src2));
9385
  effect(KILL cr, KILL rax);
9386
  ins_cost(300);
9387
  format %{ "FCMPD  $dst,$src1,$src2" %}
9388
  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9389
  ins_encode( Push_Reg_DPR(src1),
9390
              OpcP, RegOpc(src2),
9391
              CmpF_Result(dst));
9392
  ins_pipe( pipe_slow );
9393
%}
9394

9395
// float compare and set condition codes in EFLAGS by XMM regs
9396
instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9397
  predicate(UseSSE>=2);
9398
  match(Set cr (CmpD src1 src2));
9399
  ins_cost(145);
9400
  format %{ "UCOMISD $src1,$src2\n\t"
9401
            "JNP,s   exit\n\t"
9402
            "PUSHF\t# saw NaN, set CF\n\t"
9403
            "AND     [rsp], #0xffffff2b\n\t"
9404
            "POPF\n"
9405
    "exit:" %}
9406
  ins_encode %{
9407
    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9408
    emit_cmpfp_fixup(masm);
9409
  %}
9410
  ins_pipe( pipe_slow );
9411
%}
9412

9413
instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9414
  predicate(UseSSE>=2);
9415
  match(Set cr (CmpD src1 src2));
9416
  ins_cost(100);
9417
  format %{ "UCOMISD $src1,$src2" %}
9418
  ins_encode %{
9419
    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9420
  %}
9421
  ins_pipe( pipe_slow );
9422
%}
9423

9424
// float compare and set condition codes in EFLAGS by XMM regs
9425
instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9426
  predicate(UseSSE>=2);
9427
  match(Set cr (CmpD src1 (LoadD src2)));
9428
  ins_cost(145);
9429
  format %{ "UCOMISD $src1,$src2\n\t"
9430
            "JNP,s   exit\n\t"
9431
            "PUSHF\t# saw NaN, set CF\n\t"
9432
            "AND     [rsp], #0xffffff2b\n\t"
9433
            "POPF\n"
9434
    "exit:" %}
9435
  ins_encode %{
9436
    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9437
    emit_cmpfp_fixup(masm);
9438
  %}
9439
  ins_pipe( pipe_slow );
9440
%}
9441

9442
instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9443
  predicate(UseSSE>=2);
9444
  match(Set cr (CmpD src1 (LoadD src2)));
9445
  ins_cost(100);
9446
  format %{ "UCOMISD $src1,$src2" %}
9447
  ins_encode %{
9448
    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9449
  %}
9450
  ins_pipe( pipe_slow );
9451
%}
9452

9453
// Compare into -1,0,1 in XMM
9454
instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9455
  predicate(UseSSE>=2);
9456
  match(Set dst (CmpD3 src1 src2));
9457
  effect(KILL cr);
9458
  ins_cost(255);
9459
  format %{ "UCOMISD $src1, $src2\n\t"
9460
            "MOV     $dst, #-1\n\t"
9461
            "JP,s    done\n\t"
9462
            "JB,s    done\n\t"
9463
            "SETNE   $dst\n\t"
9464
            "MOVZB   $dst, $dst\n"
9465
    "done:" %}
9466
  ins_encode %{
9467
    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9468
    emit_cmpfp3(masm, $dst$$Register);
9469
  %}
9470
  ins_pipe( pipe_slow );
9471
%}
9472

9473
// Compare into -1,0,1 in XMM and memory
9474
instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9475
  predicate(UseSSE>=2);
9476
  match(Set dst (CmpD3 src1 (LoadD src2)));
9477
  effect(KILL cr);
9478
  ins_cost(275);
9479
  format %{ "UCOMISD $src1, $src2\n\t"
9480
            "MOV     $dst, #-1\n\t"
9481
            "JP,s    done\n\t"
9482
            "JB,s    done\n\t"
9483
            "SETNE   $dst\n\t"
9484
            "MOVZB   $dst, $dst\n"
9485
    "done:" %}
9486
  ins_encode %{
9487
    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9488
    emit_cmpfp3(masm, $dst$$Register);
9489
  %}
9490
  ins_pipe( pipe_slow );
9491
%}
9492

9493

9494
instruct subDPR_reg(regDPR dst, regDPR src) %{
9495
  predicate (UseSSE <=1);
9496
  match(Set dst (SubD dst src));
9497

9498
  format %{ "FLD    $src\n\t"
9499
            "DSUBp  $dst,ST" %}
9500
  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9501
  ins_cost(150);
9502
  ins_encode( Push_Reg_DPR(src),
9503
              OpcP, RegOpc(dst) );
9504
  ins_pipe( fpu_reg_reg );
9505
%}
9506

9507
instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9508
  predicate (UseSSE <=1);
9509
  match(Set dst (RoundDouble (SubD src1 src2)));
9510
  ins_cost(250);
9511

9512
  format %{ "FLD    $src2\n\t"
9513
            "DSUB   ST,$src1\n\t"
9514
            "FSTP_D $dst\t# D-round" %}
9515
  opcode(0xD8, 0x5);
9516
  ins_encode( Push_Reg_DPR(src2),
9517
              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9518
  ins_pipe( fpu_mem_reg_reg );
9519
%}
9520

9521

9522
instruct subDPR_reg_mem(regDPR dst, memory src) %{
9523
  predicate (UseSSE <=1);
9524
  match(Set dst (SubD dst (LoadD src)));
9525
  ins_cost(150);
9526

9527
  format %{ "FLD    $src\n\t"
9528
            "DSUBp  $dst,ST" %}
9529
  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9530
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9531
              OpcP, RegOpc(dst), ClearInstMark );
9532
  ins_pipe( fpu_reg_mem );
9533
%}
9534

9535
instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9536
  predicate (UseSSE<=1);
9537
  match(Set dst (AbsD src));
9538
  ins_cost(100);
9539
  format %{ "FABS" %}
9540
  opcode(0xE1, 0xD9);
9541
  ins_encode( OpcS, OpcP );
9542
  ins_pipe( fpu_reg_reg );
9543
%}
9544

9545
instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9546
  predicate(UseSSE<=1);
9547
  match(Set dst (NegD src));
9548
  ins_cost(100);
9549
  format %{ "FCHS" %}
9550
  opcode(0xE0, 0xD9);
9551
  ins_encode( OpcS, OpcP );
9552
  ins_pipe( fpu_reg_reg );
9553
%}
9554

9555
instruct addDPR_reg(regDPR dst, regDPR src) %{
9556
  predicate(UseSSE<=1);
9557
  match(Set dst (AddD dst src));
9558
  format %{ "FLD    $src\n\t"
9559
            "DADD   $dst,ST" %}
9560
  size(4);
9561
  ins_cost(150);
9562
  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9563
  ins_encode( Push_Reg_DPR(src),
9564
              OpcP, RegOpc(dst) );
9565
  ins_pipe( fpu_reg_reg );
9566
%}
9567

9568

9569
instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9570
  predicate(UseSSE<=1);
9571
  match(Set dst (RoundDouble (AddD src1 src2)));
9572
  ins_cost(250);
9573

9574
  format %{ "FLD    $src2\n\t"
9575
            "DADD   ST,$src1\n\t"
9576
            "FSTP_D $dst\t# D-round" %}
9577
  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9578
  ins_encode( Push_Reg_DPR(src2),
9579
              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9580
  ins_pipe( fpu_mem_reg_reg );
9581
%}
9582

9583

9584
instruct addDPR_reg_mem(regDPR dst, memory src) %{
9585
  predicate(UseSSE<=1);
9586
  match(Set dst (AddD dst (LoadD src)));
9587
  ins_cost(150);
9588

9589
  format %{ "FLD    $src\n\t"
9590
            "DADDp  $dst,ST" %}
9591
  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9592
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9593
              OpcP, RegOpc(dst), ClearInstMark );
9594
  ins_pipe( fpu_reg_mem );
9595
%}
9596

9597
// add-to-memory
9598
instruct addDPR_mem_reg(memory dst, regDPR src) %{
9599
  predicate(UseSSE<=1);
9600
  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9601
  ins_cost(150);
9602

9603
  format %{ "FLD_D  $dst\n\t"
9604
            "DADD   ST,$src\n\t"
9605
            "FST_D  $dst" %}
9606
  opcode(0xDD, 0x0);
9607
  ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
9608
              Opcode(0xD8), RegOpc(src), ClearInstMark,
9609
              SetInstMark,
9610
              Opcode(0xDD), RMopc_Mem(0x03,dst),
9611
              ClearInstMark);
9612
  ins_pipe( fpu_reg_mem );
9613
%}
9614

9615
instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9616
  predicate(UseSSE<=1);
9617
  match(Set dst (AddD dst con));
9618
  ins_cost(125);
9619
  format %{ "FLD1\n\t"
9620
            "DADDp  $dst,ST" %}
9621
  ins_encode %{
9622
    __ fld1();
9623
    __ faddp($dst$$reg);
9624
  %}
9625
  ins_pipe(fpu_reg);
9626
%}
9627

9628
instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9629
  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9630
  match(Set dst (AddD dst con));
9631
  ins_cost(200);
9632
  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9633
            "DADDp  $dst,ST" %}
9634
  ins_encode %{
9635
    __ fld_d($constantaddress($con));
9636
    __ faddp($dst$$reg);
9637
  %}
9638
  ins_pipe(fpu_reg_mem);
9639
%}
9640

9641
instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9642
  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9643
  match(Set dst (RoundDouble (AddD src con)));
9644
  ins_cost(200);
9645
  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9646
            "DADD   ST,$src\n\t"
9647
            "FSTP_D $dst\t# D-round" %}
9648
  ins_encode %{
9649
    __ fld_d($constantaddress($con));
9650
    __ fadd($src$$reg);
9651
    __ fstp_d(Address(rsp, $dst$$disp));
9652
  %}
9653
  ins_pipe(fpu_mem_reg_con);
9654
%}
9655

9656
instruct mulDPR_reg(regDPR dst, regDPR src) %{
9657
  predicate(UseSSE<=1);
9658
  match(Set dst (MulD dst src));
9659
  format %{ "FLD    $src\n\t"
9660
            "DMULp  $dst,ST" %}
9661
  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9662
  ins_cost(150);
9663
  ins_encode( Push_Reg_DPR(src),
9664
              OpcP, RegOpc(dst) );
9665
  ins_pipe( fpu_reg_reg );
9666
%}
9667

9668
// Strict FP instruction biases argument before multiply then
9669
// biases result to avoid double rounding of subnormals.
9670
//
9671
// scale arg1 by multiplying arg1 by 2^(-15360)
9672
// load arg2
9673
// multiply scaled arg1 by arg2
9674
// rescale product by 2^(15360)
9675
//
9676
instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9677
  predicate( UseSSE<=1 && Compile::current()->has_method() );
9678
  match(Set dst (MulD dst src));
9679
  ins_cost(1);   // Select this instruction for all FP double multiplies
9680

9681
  format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9682
            "DMULp  $dst,ST\n\t"
9683
            "FLD    $src\n\t"
9684
            "DMULp  $dst,ST\n\t"
9685
            "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9686
            "DMULp  $dst,ST\n\t" %}
9687
  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9688
  ins_encode( strictfp_bias1(dst),
9689
              Push_Reg_DPR(src),
9690
              OpcP, RegOpc(dst),
9691
              strictfp_bias2(dst) );
9692
  ins_pipe( fpu_reg_reg );
9693
%}
9694

9695
instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9696
  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9697
  match(Set dst (MulD dst con));
9698
  ins_cost(200);
9699
  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9700
            "DMULp  $dst,ST" %}
9701
  ins_encode %{
9702
    __ fld_d($constantaddress($con));
9703
    __ fmulp($dst$$reg);
9704
  %}
9705
  ins_pipe(fpu_reg_mem);
9706
%}
9707

9708

9709
instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9710
  predicate( UseSSE<=1 );
9711
  match(Set dst (MulD dst (LoadD src)));
9712
  ins_cost(200);
9713
  format %{ "FLD_D  $src\n\t"
9714
            "DMULp  $dst,ST" %}
9715
  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9716
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
9717
              OpcP, RegOpc(dst), ClearInstMark );
9718
  ins_pipe( fpu_reg_mem );
9719
%}
9720

9721
//
9722
// Cisc-alternate to reg-reg multiply
9723
instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9724
  predicate( UseSSE<=1 );
9725
  match(Set dst (MulD src (LoadD mem)));
9726
  ins_cost(250);
9727
  format %{ "FLD_D  $mem\n\t"
9728
            "DMUL   ST,$src\n\t"
9729
            "FSTP_D $dst" %}
9730
  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9731
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
9732
              OpcReg_FPR(src),
9733
              Pop_Reg_DPR(dst), ClearInstMark );
9734
  ins_pipe( fpu_reg_reg_mem );
9735
%}
9736

9737

9738
// MACRO3 -- addDPR a mulDPR
9739
// This instruction is a '2-address' instruction in that the result goes
9740
// back to src2.  This eliminates a move from the macro; possibly the
9741
// register allocator will have to add it back (and maybe not).
9742
instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9743
  predicate( UseSSE<=1 );
9744
  match(Set src2 (AddD (MulD src0 src1) src2));
9745
  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9746
            "DMUL   ST,$src1\n\t"
9747
            "DADDp  $src2,ST" %}
9748
  ins_cost(250);
9749
  opcode(0xDD); /* LoadD DD /0 */
9750
  ins_encode( Push_Reg_FPR(src0),
9751
              FMul_ST_reg(src1),
9752
              FAddP_reg_ST(src2) );
9753
  ins_pipe( fpu_reg_reg_reg );
9754
%}
9755

9756

9757
// MACRO3 -- subDPR a mulDPR
9758
instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9759
  predicate( UseSSE<=1 );
9760
  match(Set src2 (SubD (MulD src0 src1) src2));
9761
  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9762
            "DMUL   ST,$src1\n\t"
9763
            "DSUBRp $src2,ST" %}
9764
  ins_cost(250);
9765
  ins_encode( Push_Reg_FPR(src0),
9766
              FMul_ST_reg(src1),
9767
              Opcode(0xDE), Opc_plus(0xE0,src2));
9768
  ins_pipe( fpu_reg_reg_reg );
9769
%}
9770

9771

9772
instruct divDPR_reg(regDPR dst, regDPR src) %{
9773
  predicate( UseSSE<=1 );
9774
  match(Set dst (DivD dst src));
9775

9776
  format %{ "FLD    $src\n\t"
9777
            "FDIVp  $dst,ST" %}
9778
  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9779
  ins_cost(150);
9780
  ins_encode( Push_Reg_DPR(src),
9781
              OpcP, RegOpc(dst) );
9782
  ins_pipe( fpu_reg_reg );
9783
%}
9784

9785
// Strict FP instruction biases argument before division then
9786
// biases result, to avoid double rounding of subnormals.
9787
//
9788
// scale dividend by multiplying dividend by 2^(-15360)
9789
// load divisor
9790
// divide scaled dividend by divisor
9791
// rescale quotient by 2^(15360)
9792
//
9793
instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9794
  predicate (UseSSE<=1);
9795
  match(Set dst (DivD dst src));
9796
  predicate( UseSSE<=1 && Compile::current()->has_method() );
9797
  ins_cost(01);
9798

9799
  format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
9800
            "DMULp  $dst,ST\n\t"
9801
            "FLD    $src\n\t"
9802
            "FDIVp  $dst,ST\n\t"
9803
            "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
9804
            "DMULp  $dst,ST\n\t" %}
9805
  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9806
  ins_encode( strictfp_bias1(dst),
9807
              Push_Reg_DPR(src),
9808
              OpcP, RegOpc(dst),
9809
              strictfp_bias2(dst) );
9810
  ins_pipe( fpu_reg_reg );
9811
%}
9812

9813
instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9814
  predicate(UseSSE<=1);
9815
  match(Set dst (ModD dst src));
9816
  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9817

9818
  format %{ "DMOD   $dst,$src" %}
9819
  ins_cost(250);
9820
  ins_encode(Push_Reg_Mod_DPR(dst, src),
9821
              emitModDPR(),
9822
              Push_Result_Mod_DPR(src),
9823
              Pop_Reg_DPR(dst));
9824
  ins_pipe( pipe_slow );
9825
%}
9826

9827
instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9828
  predicate(UseSSE>=2);
9829
  match(Set dst (ModD src0 src1));
9830
  effect(KILL rax, KILL cr);
9831

9832
  format %{ "SUB    ESP,8\t # DMOD\n"
9833
          "\tMOVSD  [ESP+0],$src1\n"
9834
          "\tFLD_D  [ESP+0]\n"
9835
          "\tMOVSD  [ESP+0],$src0\n"
9836
          "\tFLD_D  [ESP+0]\n"
9837
     "loop:\tFPREM\n"
9838
          "\tFWAIT\n"
9839
          "\tFNSTSW AX\n"
9840
          "\tSAHF\n"
9841
          "\tJP     loop\n"
9842
          "\tFSTP_D [ESP+0]\n"
9843
          "\tMOVSD  $dst,[ESP+0]\n"
9844
          "\tADD    ESP,8\n"
9845
          "\tFSTP   ST0\t # Restore FPU Stack"
9846
    %}
9847
  ins_cost(250);
9848
  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9849
  ins_pipe( pipe_slow );
9850
%}
9851

9852
instruct atanDPR_reg(regDPR dst, regDPR src) %{
9853
  predicate (UseSSE<=1);
9854
  match(Set dst(AtanD dst src));
9855
  format %{ "DATA   $dst,$src" %}
9856
  opcode(0xD9, 0xF3);
9857
  ins_encode( Push_Reg_DPR(src),
9858
              OpcP, OpcS, RegOpc(dst) );
9859
  ins_pipe( pipe_slow );
9860
%}
9861

9862
instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9863
  predicate (UseSSE>=2);
9864
  match(Set dst(AtanD dst src));
9865
  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9866
  format %{ "DATA   $dst,$src" %}
9867
  opcode(0xD9, 0xF3);
9868
  ins_encode( Push_SrcD(src),
9869
              OpcP, OpcS, Push_ResultD(dst) );
9870
  ins_pipe( pipe_slow );
9871
%}
9872

9873
instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9874
  predicate (UseSSE<=1);
9875
  match(Set dst (SqrtD src));
9876
  format %{ "DSQRT  $dst,$src" %}
9877
  opcode(0xFA, 0xD9);
9878
  ins_encode( Push_Reg_DPR(src),
9879
              OpcS, OpcP, Pop_Reg_DPR(dst) );
9880
  ins_pipe( pipe_slow );
9881
%}
9882

9883
//-------------Float Instructions-------------------------------
9884
// Float Math
9885

9886
// Code for float compare:
9887
//     fcompp();
9888
//     fwait(); fnstsw_ax();
9889
//     sahf();
9890
//     movl(dst, unordered_result);
9891
//     jcc(Assembler::parity, exit);
9892
//     movl(dst, less_result);
9893
//     jcc(Assembler::below, exit);
9894
//     movl(dst, equal_result);
9895
//     jcc(Assembler::equal, exit);
9896
//     movl(dst, greater_result);
9897
//   exit:
9898

9899
// P6 version of float compare, sets condition codes in EFLAGS
9900
instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9901
  predicate(VM_Version::supports_cmov() && UseSSE == 0);
9902
  match(Set cr (CmpF src1 src2));
9903
  effect(KILL rax);
9904
  ins_cost(150);
9905
  format %{ "FLD    $src1\n\t"
9906
            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9907
            "JNP    exit\n\t"
9908
            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9909
            "SAHF\n"
9910
     "exit:\tNOP               // avoid branch to branch" %}
9911
  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9912
  ins_encode( Push_Reg_DPR(src1),
9913
              OpcP, RegOpc(src2),
9914
              cmpF_P6_fixup );
9915
  ins_pipe( pipe_slow );
9916
%}
9917

9918
instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9919
  predicate(VM_Version::supports_cmov() && UseSSE == 0);
9920
  match(Set cr (CmpF src1 src2));
9921
  ins_cost(100);
9922
  format %{ "FLD    $src1\n\t"
9923
            "FUCOMIP ST,$src2  // P6 instruction" %}
9924
  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9925
  ins_encode( Push_Reg_DPR(src1),
9926
              OpcP, RegOpc(src2));
9927
  ins_pipe( pipe_slow );
9928
%}
9929

9930

9931
// Compare & branch
9932
instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9933
  predicate(UseSSE == 0);
9934
  match(Set cr (CmpF src1 src2));
9935
  effect(KILL rax);
9936
  ins_cost(200);
9937
  format %{ "FLD    $src1\n\t"
9938
            "FCOMp  $src2\n\t"
9939
            "FNSTSW AX\n\t"
9940
            "TEST   AX,0x400\n\t"
9941
            "JZ,s   flags\n\t"
9942
            "MOV    AH,1\t# unordered treat as LT\n"
9943
    "flags:\tSAHF" %}
9944
  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9945
  ins_encode( Push_Reg_DPR(src1),
9946
              OpcP, RegOpc(src2),
9947
              fpu_flags);
9948
  ins_pipe( pipe_slow );
9949
%}
9950

9951
// Compare vs zero into -1,0,1
9952
instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9953
  predicate(UseSSE == 0);
9954
  match(Set dst (CmpF3 src1 zero));
9955
  effect(KILL cr, KILL rax);
9956
  ins_cost(280);
9957
  format %{ "FTSTF  $dst,$src1" %}
9958
  opcode(0xE4, 0xD9);
9959
  ins_encode( Push_Reg_DPR(src1),
9960
              OpcS, OpcP, PopFPU,
9961
              CmpF_Result(dst));
9962
  ins_pipe( pipe_slow );
9963
%}
9964

9965
// Compare into -1,0,1
9966
instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9967
  predicate(UseSSE == 0);
9968
  match(Set dst (CmpF3 src1 src2));
9969
  effect(KILL cr, KILL rax);
9970
  ins_cost(300);
9971
  format %{ "FCMPF  $dst,$src1,$src2" %}
9972
  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9973
  ins_encode( Push_Reg_DPR(src1),
9974
              OpcP, RegOpc(src2),
9975
              CmpF_Result(dst));
9976
  ins_pipe( pipe_slow );
9977
%}
9978

9979
// float compare and set condition codes in EFLAGS by XMM regs
9980
instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9981
  predicate(UseSSE>=1);
9982
  match(Set cr (CmpF src1 src2));
9983
  ins_cost(145);
9984
  format %{ "UCOMISS $src1,$src2\n\t"
9985
            "JNP,s   exit\n\t"
9986
            "PUSHF\t# saw NaN, set CF\n\t"
9987
            "AND     [rsp], #0xffffff2b\n\t"
9988
            "POPF\n"
9989
    "exit:" %}
9990
  ins_encode %{
9991
    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9992
    emit_cmpfp_fixup(masm);
9993
  %}
9994
  ins_pipe( pipe_slow );
9995
%}
9996

9997
instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9998
  predicate(UseSSE>=1);
9999
  match(Set cr (CmpF src1 src2));
10000
  ins_cost(100);
10001
  format %{ "UCOMISS $src1,$src2" %}
10002
  ins_encode %{
10003
    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10004
  %}
10005
  ins_pipe( pipe_slow );
10006
%}
10007

10008
// float compare and set condition codes in EFLAGS by XMM regs
10009
instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10010
  predicate(UseSSE>=1);
10011
  match(Set cr (CmpF src1 (LoadF src2)));
10012
  ins_cost(165);
10013
  format %{ "UCOMISS $src1,$src2\n\t"
10014
            "JNP,s   exit\n\t"
10015
            "PUSHF\t# saw NaN, set CF\n\t"
10016
            "AND     [rsp], #0xffffff2b\n\t"
10017
            "POPF\n"
10018
    "exit:" %}
10019
  ins_encode %{
10020
    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10021
    emit_cmpfp_fixup(masm);
10022
  %}
10023
  ins_pipe( pipe_slow );
10024
%}
10025

10026
instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10027
  predicate(UseSSE>=1);
10028
  match(Set cr (CmpF src1 (LoadF src2)));
10029
  ins_cost(100);
10030
  format %{ "UCOMISS $src1,$src2" %}
10031
  ins_encode %{
10032
    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10033
  %}
10034
  ins_pipe( pipe_slow );
10035
%}
10036

10037
// Compare into -1,0,1 in XMM
10038
instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10039
  predicate(UseSSE>=1);
10040
  match(Set dst (CmpF3 src1 src2));
10041
  effect(KILL cr);
10042
  ins_cost(255);
10043
  format %{ "UCOMISS $src1, $src2\n\t"
10044
            "MOV     $dst, #-1\n\t"
10045
            "JP,s    done\n\t"
10046
            "JB,s    done\n\t"
10047
            "SETNE   $dst\n\t"
10048
            "MOVZB   $dst, $dst\n"
10049
    "done:" %}
10050
  ins_encode %{
10051
    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10052
    emit_cmpfp3(masm, $dst$$Register);
10053
  %}
10054
  ins_pipe( pipe_slow );
10055
%}
10056

10057
// Compare into -1,0,1 in XMM and memory
10058
instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10059
  predicate(UseSSE>=1);
10060
  match(Set dst (CmpF3 src1 (LoadF src2)));
10061
  effect(KILL cr);
10062
  ins_cost(275);
10063
  format %{ "UCOMISS $src1, $src2\n\t"
10064
            "MOV     $dst, #-1\n\t"
10065
            "JP,s    done\n\t"
10066
            "JB,s    done\n\t"
10067
            "SETNE   $dst\n\t"
10068
            "MOVZB   $dst, $dst\n"
10069
    "done:" %}
10070
  ins_encode %{
10071
    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10072
    emit_cmpfp3(masm, $dst$$Register);
10073
  %}
10074
  ins_pipe( pipe_slow );
10075
%}
10076

10077
// Spill to obtain 24-bit precision
10078
instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10079
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10080
  match(Set dst (SubF src1 src2));
10081

10082
  format %{ "FSUB   $dst,$src1 - $src2" %}
10083
  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10084
  ins_encode( Push_Reg_FPR(src1),
10085
              OpcReg_FPR(src2),
10086
              Pop_Mem_FPR(dst) );
10087
  ins_pipe( fpu_mem_reg_reg );
10088
%}
10089
//
10090
// This instruction does not round to 24-bits
10091
instruct subFPR_reg(regFPR dst, regFPR src) %{
10092
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10093
  match(Set dst (SubF dst src));
10094

10095
  format %{ "FSUB   $dst,$src" %}
10096
  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10097
  ins_encode( Push_Reg_FPR(src),
10098
              OpcP, RegOpc(dst) );
10099
  ins_pipe( fpu_reg_reg );
10100
%}
10101

10102
// Spill to obtain 24-bit precision
10103
instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10104
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10105
  match(Set dst (AddF src1 src2));
10106

10107
  format %{ "FADD   $dst,$src1,$src2" %}
10108
  opcode(0xD8, 0x0); /* D8 C0+i */
10109
  ins_encode( Push_Reg_FPR(src2),
10110
              OpcReg_FPR(src1),
10111
              Pop_Mem_FPR(dst) );
10112
  ins_pipe( fpu_mem_reg_reg );
10113
%}
10114
//
10115
// This instruction does not round to 24-bits
10116
instruct addFPR_reg(regFPR dst, regFPR src) %{
10117
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10118
  match(Set dst (AddF dst src));
10119

10120
  format %{ "FLD    $src\n\t"
10121
            "FADDp  $dst,ST" %}
10122
  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10123
  ins_encode( Push_Reg_FPR(src),
10124
              OpcP, RegOpc(dst) );
10125
  ins_pipe( fpu_reg_reg );
10126
%}
10127

10128
instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10129
  predicate(UseSSE==0);
10130
  match(Set dst (AbsF src));
10131
  ins_cost(100);
10132
  format %{ "FABS" %}
10133
  opcode(0xE1, 0xD9);
10134
  ins_encode( OpcS, OpcP );
10135
  ins_pipe( fpu_reg_reg );
10136
%}
10137

10138
instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10139
  predicate(UseSSE==0);
10140
  match(Set dst (NegF src));
10141
  ins_cost(100);
10142
  format %{ "FCHS" %}
10143
  opcode(0xE0, 0xD9);
10144
  ins_encode( OpcS, OpcP );
10145
  ins_pipe( fpu_reg_reg );
10146
%}
10147

10148
// Cisc-alternate to addFPR_reg
10149
// Spill to obtain 24-bit precision
10150
instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10151
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10152
  match(Set dst (AddF src1 (LoadF src2)));
10153

10154
  format %{ "FLD    $src2\n\t"
10155
            "FADD   ST,$src1\n\t"
10156
            "FSTP_S $dst" %}
10157
  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10158
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10159
              OpcReg_FPR(src1),
10160
              Pop_Mem_FPR(dst), ClearInstMark );
10161
  ins_pipe( fpu_mem_reg_mem );
10162
%}
10163
//
10164
// Cisc-alternate to addFPR_reg
10165
// This instruction does not round to 24-bits
10166
instruct addFPR_reg_mem(regFPR dst, memory src) %{
10167
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10168
  match(Set dst (AddF dst (LoadF src)));
10169

10170
  format %{ "FADD   $dst,$src" %}
10171
  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10172
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10173
              OpcP, RegOpc(dst), ClearInstMark );
10174
  ins_pipe( fpu_reg_mem );
10175
%}
10176

10177
// // Following two instructions for _222_mpegaudio
10178
// Spill to obtain 24-bit precision
10179
instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10180
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181
  match(Set dst (AddF src1 src2));
10182

10183
  format %{ "FADD   $dst,$src1,$src2" %}
10184
  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10185
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10186
              OpcReg_FPR(src2),
10187
              Pop_Mem_FPR(dst), ClearInstMark );
10188
  ins_pipe( fpu_mem_reg_mem );
10189
%}
10190

10191
// Cisc-spill variant
10192
// Spill to obtain 24-bit precision
10193
instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10194
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10195
  match(Set dst (AddF src1 (LoadF src2)));
10196

10197
  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10198
  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10199
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10200
              OpcP, RMopc_Mem(secondary,src1),
10201
              Pop_Mem_FPR(dst),
10202
              ClearInstMark);
10203
  ins_pipe( fpu_mem_mem_mem );
10204
%}
10205

10206
// Spill to obtain 24-bit precision
10207
instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10208
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10209
  match(Set dst (AddF src1 src2));
10210

10211
  format %{ "FADD   $dst,$src1,$src2" %}
10212
  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10213
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10214
              OpcP, RMopc_Mem(secondary,src1),
10215
              Pop_Mem_FPR(dst),
10216
              ClearInstMark);
10217
  ins_pipe( fpu_mem_mem_mem );
10218
%}
10219

10220

10221
// Spill to obtain 24-bit precision
10222
instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10223
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10224
  match(Set dst (AddF src con));
10225
  format %{ "FLD    $src\n\t"
10226
            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10227
            "FSTP_S $dst"  %}
10228
  ins_encode %{
10229
    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10230
    __ fadd_s($constantaddress($con));
10231
    __ fstp_s(Address(rsp, $dst$$disp));
10232
  %}
10233
  ins_pipe(fpu_mem_reg_con);
10234
%}
10235
//
10236
// This instruction does not round to 24-bits
10237
instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10238
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10239
  match(Set dst (AddF src con));
10240
  format %{ "FLD    $src\n\t"
10241
            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10242
            "FSTP   $dst"  %}
10243
  ins_encode %{
10244
    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10245
    __ fadd_s($constantaddress($con));
10246
    __ fstp_d($dst$$reg);
10247
  %}
10248
  ins_pipe(fpu_reg_reg_con);
10249
%}
10250

10251
// Spill to obtain 24-bit precision
10252
instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10253
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10254
  match(Set dst (MulF src1 src2));
10255

10256
  format %{ "FLD    $src1\n\t"
10257
            "FMUL   $src2\n\t"
10258
            "FSTP_S $dst"  %}
10259
  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10260
  ins_encode( Push_Reg_FPR(src1),
10261
              OpcReg_FPR(src2),
10262
              Pop_Mem_FPR(dst) );
10263
  ins_pipe( fpu_mem_reg_reg );
10264
%}
10265
//
10266
// This instruction does not round to 24-bits
10267
instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10268
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269
  match(Set dst (MulF src1 src2));
10270

10271
  format %{ "FLD    $src1\n\t"
10272
            "FMUL   $src2\n\t"
10273
            "FSTP_S $dst"  %}
10274
  opcode(0xD8, 0x1); /* D8 C8+i */
10275
  ins_encode( Push_Reg_FPR(src2),
10276
              OpcReg_FPR(src1),
10277
              Pop_Reg_FPR(dst) );
10278
  ins_pipe( fpu_reg_reg_reg );
10279
%}
10280

10281

10282
// Spill to obtain 24-bit precision
10283
// Cisc-alternate to reg-reg multiply
10284
instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10285
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10286
  match(Set dst (MulF src1 (LoadF src2)));
10287

10288
  format %{ "FLD_S  $src2\n\t"
10289
            "FMUL   $src1\n\t"
10290
            "FSTP_S $dst"  %}
10291
  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10292
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10293
              OpcReg_FPR(src1),
10294
              Pop_Mem_FPR(dst), ClearInstMark );
10295
  ins_pipe( fpu_mem_reg_mem );
10296
%}
10297
//
10298
// This instruction does not round to 24-bits
10299
// Cisc-alternate to reg-reg multiply
10300
instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10301
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10302
  match(Set dst (MulF src1 (LoadF src2)));
10303

10304
  format %{ "FMUL   $dst,$src1,$src2" %}
10305
  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10306
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10307
              OpcReg_FPR(src1),
10308
              Pop_Reg_FPR(dst), ClearInstMark );
10309
  ins_pipe( fpu_reg_reg_mem );
10310
%}
10311

10312
// Spill to obtain 24-bit precision
10313
instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10314
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315
  match(Set dst (MulF src1 src2));
10316

10317
  format %{ "FMUL   $dst,$src1,$src2" %}
10318
  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10319
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10320
              OpcP, RMopc_Mem(secondary,src1),
10321
              Pop_Mem_FPR(dst),
10322
              ClearInstMark );
10323
  ins_pipe( fpu_mem_mem_mem );
10324
%}
10325

10326
// Spill to obtain 24-bit precision
10327
instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10328
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329
  match(Set dst (MulF src con));
10330

10331
  format %{ "FLD    $src\n\t"
10332
            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10333
            "FSTP_S $dst"  %}
10334
  ins_encode %{
10335
    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10336
    __ fmul_s($constantaddress($con));
10337
    __ fstp_s(Address(rsp, $dst$$disp));
10338
  %}
10339
  ins_pipe(fpu_mem_reg_con);
10340
%}
10341
//
10342
// This instruction does not round to 24-bits
10343
instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10344
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10345
  match(Set dst (MulF src con));
10346

10347
  format %{ "FLD    $src\n\t"
10348
            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10349
            "FSTP   $dst"  %}
10350
  ins_encode %{
10351
    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10352
    __ fmul_s($constantaddress($con));
10353
    __ fstp_d($dst$$reg);
10354
  %}
10355
  ins_pipe(fpu_reg_reg_con);
10356
%}
10357

10358

10359
//
10360
// MACRO1 -- subsume unshared load into mulFPR
10361
// This instruction does not round to 24-bits
10362
instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10363
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10364
  match(Set dst (MulF (LoadF mem1) src));
10365

10366
  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10367
            "FMUL   ST,$src\n\t"
10368
            "FSTP   $dst" %}
10369
  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10370
  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10371
              OpcReg_FPR(src),
10372
              Pop_Reg_FPR(dst), ClearInstMark );
10373
  ins_pipe( fpu_reg_reg_mem );
10374
%}
10375
//
10376
// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10377
// This instruction does not round to 24-bits
10378
instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10379
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10380
  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10381
  ins_cost(95);
10382

10383
  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10384
            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10385
            "FADD   ST,$src2\n\t"
10386
            "FSTP   $dst" %}
10387
  opcode(0xD9); /* LoadF D9 /0 */
10388
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10389
              FMul_ST_reg(src1),
10390
              FAdd_ST_reg(src2),
10391
              Pop_Reg_FPR(dst), ClearInstMark );
10392
  ins_pipe( fpu_reg_mem_reg_reg );
10393
%}
10394

10395
// MACRO3 -- addFPR a mulFPR
10396
// This instruction does not round to 24-bits.  It is a '2-address'
10397
// instruction in that the result goes back to src2.  This eliminates
10398
// a move from the macro; possibly the register allocator will have
10399
// to add it back (and maybe not).
10400
instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10401
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10402
  match(Set src2 (AddF (MulF src0 src1) src2));
10403

10404
  format %{ "FLD    $src0     ===MACRO3===\n\t"
10405
            "FMUL   ST,$src1\n\t"
10406
            "FADDP  $src2,ST" %}
10407
  opcode(0xD9); /* LoadF D9 /0 */
10408
  ins_encode( Push_Reg_FPR(src0),
10409
              FMul_ST_reg(src1),
10410
              FAddP_reg_ST(src2) );
10411
  ins_pipe( fpu_reg_reg_reg );
10412
%}
10413

10414
// MACRO4 -- divFPR subFPR
10415
// This instruction does not round to 24-bits
10416
instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10417
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10418
  match(Set dst (DivF (SubF src2 src1) src3));
10419

10420
  format %{ "FLD    $src2   ===MACRO4===\n\t"
10421
            "FSUB   ST,$src1\n\t"
10422
            "FDIV   ST,$src3\n\t"
10423
            "FSTP  $dst" %}
10424
  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10425
  ins_encode( Push_Reg_FPR(src2),
10426
              subFPR_divFPR_encode(src1,src3),
10427
              Pop_Reg_FPR(dst) );
10428
  ins_pipe( fpu_reg_reg_reg_reg );
10429
%}
10430

10431
// Spill to obtain 24-bit precision
10432
instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10433
  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10434
  match(Set dst (DivF src1 src2));
10435

10436
  format %{ "FDIV   $dst,$src1,$src2" %}
10437
  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10438
  ins_encode( Push_Reg_FPR(src1),
10439
              OpcReg_FPR(src2),
10440
              Pop_Mem_FPR(dst) );
10441
  ins_pipe( fpu_mem_reg_reg );
10442
%}
10443
//
10444
// This instruction does not round to 24-bits
10445
instruct divFPR_reg(regFPR dst, regFPR src) %{
10446
  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10447
  match(Set dst (DivF dst src));
10448

10449
  format %{ "FDIV   $dst,$src" %}
10450
  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10451
  ins_encode( Push_Reg_FPR(src),
10452
              OpcP, RegOpc(dst) );
10453
  ins_pipe( fpu_reg_reg );
10454
%}
10455

10456

10457
// Spill to obtain 24-bit precision
10458
instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10459
  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10460
  match(Set dst (ModF src1 src2));
10461
  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10462

10463
  format %{ "FMOD   $dst,$src1,$src2" %}
10464
  ins_encode( Push_Reg_Mod_DPR(src1, src2),
10465
              emitModDPR(),
10466
              Push_Result_Mod_DPR(src2),
10467
              Pop_Mem_FPR(dst));
10468
  ins_pipe( pipe_slow );
10469
%}
10470
//
10471
// This instruction does not round to 24-bits
10472
instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10473
  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10474
  match(Set dst (ModF dst src));
10475
  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10476

10477
  format %{ "FMOD   $dst,$src" %}
10478
  ins_encode(Push_Reg_Mod_DPR(dst, src),
10479
              emitModDPR(),
10480
              Push_Result_Mod_DPR(src),
10481
              Pop_Reg_FPR(dst));
10482
  ins_pipe( pipe_slow );
10483
%}
10484

10485
instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10486
  predicate(UseSSE>=1);
10487
  match(Set dst (ModF src0 src1));
10488
  effect(KILL rax, KILL cr);
10489
  format %{ "SUB    ESP,4\t # FMOD\n"
10490
          "\tMOVSS  [ESP+0],$src1\n"
10491
          "\tFLD_S  [ESP+0]\n"
10492
          "\tMOVSS  [ESP+0],$src0\n"
10493
          "\tFLD_S  [ESP+0]\n"
10494
     "loop:\tFPREM\n"
10495
          "\tFWAIT\n"
10496
          "\tFNSTSW AX\n"
10497
          "\tSAHF\n"
10498
          "\tJP     loop\n"
10499
          "\tFSTP_S [ESP+0]\n"
10500
          "\tMOVSS  $dst,[ESP+0]\n"
10501
          "\tADD    ESP,4\n"
10502
          "\tFSTP   ST0\t # Restore FPU Stack"
10503
    %}
10504
  ins_cost(250);
10505
  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10506
  ins_pipe( pipe_slow );
10507
%}
10508

10509

10510
//----------Arithmetic Conversion Instructions---------------------------------
10511
// The conversions operations are all Alpha sorted.  Please keep it that way!
10512

10513
instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10514
  predicate(UseSSE==0);
10515
  match(Set dst (RoundFloat src));
10516
  ins_cost(125);
10517
  format %{ "FST_S  $dst,$src\t# F-round" %}
10518
  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10519
  ins_pipe( fpu_mem_reg );
10520
%}
10521

10522
instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10523
  predicate(UseSSE<=1);
10524
  match(Set dst (RoundDouble src));
10525
  ins_cost(125);
10526
  format %{ "FST_D  $dst,$src\t# D-round" %}
10527
  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10528
  ins_pipe( fpu_mem_reg );
10529
%}
10530

10531
// Force rounding to 24-bit precision and 6-bit exponent
10532
instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10533
  predicate(UseSSE==0);
10534
  match(Set dst (ConvD2F src));
10535
  format %{ "FST_S  $dst,$src\t# F-round" %}
10536
  expand %{
10537
    roundFloat_mem_reg(dst,src);
10538
  %}
10539
%}
10540

10541
// Force rounding to 24-bit precision and 6-bit exponent
10542
instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10543
  predicate(UseSSE==1);
10544
  match(Set dst (ConvD2F src));
10545
  effect( KILL cr );
10546
  format %{ "SUB    ESP,4\n\t"
10547
            "FST_S  [ESP],$src\t# F-round\n\t"
10548
            "MOVSS  $dst,[ESP]\n\t"
10549
            "ADD ESP,4" %}
10550
  ins_encode %{
10551
    __ subptr(rsp, 4);
10552
    if ($src$$reg != FPR1L_enc) {
10553
      __ fld_s($src$$reg-1);
10554
      __ fstp_s(Address(rsp, 0));
10555
    } else {
10556
      __ fst_s(Address(rsp, 0));
10557
    }
10558
    __ movflt($dst$$XMMRegister, Address(rsp, 0));
10559
    __ addptr(rsp, 4);
10560
  %}
10561
  ins_pipe( pipe_slow );
10562
%}
10563

10564
// Force rounding double precision to single precision
10565
instruct convD2F_reg(regF dst, regD src) %{
10566
  predicate(UseSSE>=2);
10567
  match(Set dst (ConvD2F src));
10568
  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10569
  ins_encode %{
10570
    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10571
  %}
10572
  ins_pipe( pipe_slow );
10573
%}
10574

10575
instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10576
  predicate(UseSSE==0);
10577
  match(Set dst (ConvF2D src));
10578
  format %{ "FST_S  $dst,$src\t# D-round" %}
10579
  ins_encode( Pop_Reg_Reg_DPR(dst, src));
10580
  ins_pipe( fpu_reg_reg );
10581
%}
10582

10583
instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10584
  predicate(UseSSE==1);
10585
  match(Set dst (ConvF2D src));
10586
  format %{ "FST_D  $dst,$src\t# D-round" %}
10587
  expand %{
10588
    roundDouble_mem_reg(dst,src);
10589
  %}
10590
%}
10591

10592
instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10593
  predicate(UseSSE==1);
10594
  match(Set dst (ConvF2D src));
10595
  effect( KILL cr );
10596
  format %{ "SUB    ESP,4\n\t"
10597
            "MOVSS  [ESP] $src\n\t"
10598
            "FLD_S  [ESP]\n\t"
10599
            "ADD    ESP,4\n\t"
10600
            "FSTP   $dst\t# D-round" %}
10601
  ins_encode %{
10602
    __ subptr(rsp, 4);
10603
    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10604
    __ fld_s(Address(rsp, 0));
10605
    __ addptr(rsp, 4);
10606
    __ fstp_d($dst$$reg);
10607
  %}
10608
  ins_pipe( pipe_slow );
10609
%}
10610

10611
instruct convF2D_reg(regD dst, regF src) %{
10612
  predicate(UseSSE>=2);
10613
  match(Set dst (ConvF2D src));
10614
  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10615
  ins_encode %{
10616
    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10617
  %}
10618
  ins_pipe( pipe_slow );
10619
%}
10620

10621
// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10622
instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10623
  predicate(UseSSE<=1);
10624
  match(Set dst (ConvD2I src));
10625
  effect( KILL tmp, KILL cr );
10626
  format %{ "FLD    $src\t# Convert double to int \n\t"
10627
            "FLDCW  trunc mode\n\t"
10628
            "SUB    ESP,4\n\t"
10629
            "FISTp  [ESP + #0]\n\t"
10630
            "FLDCW  std/24-bit mode\n\t"
10631
            "POP    EAX\n\t"
10632
            "CMP    EAX,0x80000000\n\t"
10633
            "JNE,s  fast\n\t"
10634
            "FLD_D  $src\n\t"
10635
            "CALL   d2i_wrapper\n"
10636
      "fast:" %}
10637
  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10638
  ins_pipe( pipe_slow );
10639
%}
10640

10641
// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10642
instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10643
  predicate(UseSSE>=2);
10644
  match(Set dst (ConvD2I src));
10645
  effect( KILL tmp, KILL cr );
10646
  format %{ "CVTTSD2SI $dst, $src\n\t"
10647
            "CMP    $dst,0x80000000\n\t"
10648
            "JNE,s  fast\n\t"
10649
            "SUB    ESP, 8\n\t"
10650
            "MOVSD  [ESP], $src\n\t"
10651
            "FLD_D  [ESP]\n\t"
10652
            "ADD    ESP, 8\n\t"
10653
            "CALL   d2i_wrapper\n"
10654
      "fast:" %}
10655
  ins_encode %{
10656
    Label fast;
10657
    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10658
    __ cmpl($dst$$Register, 0x80000000);
10659
    __ jccb(Assembler::notEqual, fast);
10660
    __ subptr(rsp, 8);
10661
    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10662
    __ fld_d(Address(rsp, 0));
10663
    __ addptr(rsp, 8);
10664
    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10665
    __ post_call_nop();
10666
    __ bind(fast);
10667
  %}
10668
  ins_pipe( pipe_slow );
10669
%}
10670

10671
instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10672
  predicate(UseSSE<=1);
10673
  match(Set dst (ConvD2L src));
10674
  effect( KILL cr );
10675
  format %{ "FLD    $src\t# Convert double to long\n\t"
10676
            "FLDCW  trunc mode\n\t"
10677
            "SUB    ESP,8\n\t"
10678
            "FISTp  [ESP + #0]\n\t"
10679
            "FLDCW  std/24-bit mode\n\t"
10680
            "POP    EAX\n\t"
10681
            "POP    EDX\n\t"
10682
            "CMP    EDX,0x80000000\n\t"
10683
            "JNE,s  fast\n\t"
10684
            "TEST   EAX,EAX\n\t"
10685
            "JNE,s  fast\n\t"
10686
            "FLD    $src\n\t"
10687
            "CALL   d2l_wrapper\n"
10688
      "fast:" %}
10689
  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10690
  ins_pipe( pipe_slow );
10691
%}
10692

10693
// XMM lacks a float/double->long conversion, so use the old FPU stack.
10694
instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10695
  predicate (UseSSE>=2);
10696
  match(Set dst (ConvD2L src));
10697
  effect( KILL cr );
10698
  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10699
            "MOVSD  [ESP],$src\n\t"
10700
            "FLD_D  [ESP]\n\t"
10701
            "FLDCW  trunc mode\n\t"
10702
            "FISTp  [ESP + #0]\n\t"
10703
            "FLDCW  std/24-bit mode\n\t"
10704
            "POP    EAX\n\t"
10705
            "POP    EDX\n\t"
10706
            "CMP    EDX,0x80000000\n\t"
10707
            "JNE,s  fast\n\t"
10708
            "TEST   EAX,EAX\n\t"
10709
            "JNE,s  fast\n\t"
10710
            "SUB    ESP,8\n\t"
10711
            "MOVSD  [ESP],$src\n\t"
10712
            "FLD_D  [ESP]\n\t"
10713
            "ADD    ESP,8\n\t"
10714
            "CALL   d2l_wrapper\n"
10715
      "fast:" %}
10716
  ins_encode %{
10717
    Label fast;
10718
    __ subptr(rsp, 8);
10719
    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10720
    __ fld_d(Address(rsp, 0));
10721
    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10722
    __ fistp_d(Address(rsp, 0));
10723
    // Restore the rounding mode, mask the exception
10724
    if (Compile::current()->in_24_bit_fp_mode()) {
10725
      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10726
    } else {
10727
      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10728
    }
10729
    // Load the converted long, adjust CPU stack
10730
    __ pop(rax);
10731
    __ pop(rdx);
10732
    __ cmpl(rdx, 0x80000000);
10733
    __ jccb(Assembler::notEqual, fast);
10734
    __ testl(rax, rax);
10735
    __ jccb(Assembler::notEqual, fast);
10736
    __ subptr(rsp, 8);
10737
    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10738
    __ fld_d(Address(rsp, 0));
10739
    __ addptr(rsp, 8);
10740
    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10741
    __ post_call_nop();
10742
    __ bind(fast);
10743
  %}
10744
  ins_pipe( pipe_slow );
10745
%}
10746

10747
// Convert a double to an int.  Java semantics require we do complex
10748
// manglations in the corner cases.  So we set the rounding mode to
10749
// 'zero', store the darned double down as an int, and reset the
10750
// rounding mode to 'nearest'.  The hardware stores a flag value down
10751
// if we would overflow or converted a NAN; we check for this and
10752
// and go the slow path if needed.
10753
instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10754
  predicate(UseSSE==0);
10755
  match(Set dst (ConvF2I src));
10756
  effect( KILL tmp, KILL cr );
10757
  format %{ "FLD    $src\t# Convert float to int \n\t"
10758
            "FLDCW  trunc mode\n\t"
10759
            "SUB    ESP,4\n\t"
10760
            "FISTp  [ESP + #0]\n\t"
10761
            "FLDCW  std/24-bit mode\n\t"
10762
            "POP    EAX\n\t"
10763
            "CMP    EAX,0x80000000\n\t"
10764
            "JNE,s  fast\n\t"
10765
            "FLD    $src\n\t"
10766
            "CALL   d2i_wrapper\n"
10767
      "fast:" %}
10768
  // DPR2I_encoding works for FPR2I
10769
  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10770
  ins_pipe( pipe_slow );
10771
%}
10772

10773
// Convert a float in xmm to an int reg.
10774
instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10775
  predicate(UseSSE>=1);
10776
  match(Set dst (ConvF2I src));
10777
  effect( KILL tmp, KILL cr );
10778
  format %{ "CVTTSS2SI $dst, $src\n\t"
10779
            "CMP    $dst,0x80000000\n\t"
10780
            "JNE,s  fast\n\t"
10781
            "SUB    ESP, 4\n\t"
10782
            "MOVSS  [ESP], $src\n\t"
10783
            "FLD    [ESP]\n\t"
10784
            "ADD    ESP, 4\n\t"
10785
            "CALL   d2i_wrapper\n"
10786
      "fast:" %}
10787
  ins_encode %{
10788
    Label fast;
10789
    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10790
    __ cmpl($dst$$Register, 0x80000000);
10791
    __ jccb(Assembler::notEqual, fast);
10792
    __ subptr(rsp, 4);
10793
    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10794
    __ fld_s(Address(rsp, 0));
10795
    __ addptr(rsp, 4);
10796
    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10797
    __ post_call_nop();
10798
    __ bind(fast);
10799
  %}
10800
  ins_pipe( pipe_slow );
10801
%}
10802

10803
instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10804
  predicate(UseSSE==0);
10805
  match(Set dst (ConvF2L src));
10806
  effect( KILL cr );
10807
  format %{ "FLD    $src\t# Convert float to long\n\t"
10808
            "FLDCW  trunc mode\n\t"
10809
            "SUB    ESP,8\n\t"
10810
            "FISTp  [ESP + #0]\n\t"
10811
            "FLDCW  std/24-bit mode\n\t"
10812
            "POP    EAX\n\t"
10813
            "POP    EDX\n\t"
10814
            "CMP    EDX,0x80000000\n\t"
10815
            "JNE,s  fast\n\t"
10816
            "TEST   EAX,EAX\n\t"
10817
            "JNE,s  fast\n\t"
10818
            "FLD    $src\n\t"
10819
            "CALL   d2l_wrapper\n"
10820
      "fast:" %}
10821
  // DPR2L_encoding works for FPR2L
10822
  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10823
  ins_pipe( pipe_slow );
10824
%}
10825

10826
// XMM lacks a float/double->long conversion, so use the old FPU stack.
10827
instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10828
  predicate (UseSSE>=1);
10829
  match(Set dst (ConvF2L src));
10830
  effect( KILL cr );
10831
  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10832
            "MOVSS  [ESP],$src\n\t"
10833
            "FLD_S  [ESP]\n\t"
10834
            "FLDCW  trunc mode\n\t"
10835
            "FISTp  [ESP + #0]\n\t"
10836
            "FLDCW  std/24-bit mode\n\t"
10837
            "POP    EAX\n\t"
10838
            "POP    EDX\n\t"
10839
            "CMP    EDX,0x80000000\n\t"
10840
            "JNE,s  fast\n\t"
10841
            "TEST   EAX,EAX\n\t"
10842
            "JNE,s  fast\n\t"
10843
            "SUB    ESP,4\t# Convert float to long\n\t"
10844
            "MOVSS  [ESP],$src\n\t"
10845
            "FLD_S  [ESP]\n\t"
10846
            "ADD    ESP,4\n\t"
10847
            "CALL   d2l_wrapper\n"
10848
      "fast:" %}
10849
  ins_encode %{
10850
    Label fast;
10851
    __ subptr(rsp, 8);
10852
    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10853
    __ fld_s(Address(rsp, 0));
10854
    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10855
    __ fistp_d(Address(rsp, 0));
10856
    // Restore the rounding mode, mask the exception
10857
    if (Compile::current()->in_24_bit_fp_mode()) {
10858
      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10859
    } else {
10860
      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10861
    }
10862
    // Load the converted long, adjust CPU stack
10863
    __ pop(rax);
10864
    __ pop(rdx);
10865
    __ cmpl(rdx, 0x80000000);
10866
    __ jccb(Assembler::notEqual, fast);
10867
    __ testl(rax, rax);
10868
    __ jccb(Assembler::notEqual, fast);
10869
    __ subptr(rsp, 4);
10870
    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10871
    __ fld_s(Address(rsp, 0));
10872
    __ addptr(rsp, 4);
10873
    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10874
    __ post_call_nop();
10875
    __ bind(fast);
10876
  %}
10877
  ins_pipe( pipe_slow );
10878
%}
10879

10880
instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10881
  predicate( UseSSE<=1 );
10882
  match(Set dst (ConvI2D src));
10883
  format %{ "FILD   $src\n\t"
10884
            "FSTP   $dst" %}
10885
  opcode(0xDB, 0x0);  /* DB /0 */
10886
  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10887
  ins_pipe( fpu_reg_mem );
10888
%}
10889

10890
instruct convI2D_reg(regD dst, rRegI src) %{
10891
  predicate( UseSSE>=2 && !UseXmmI2D );
10892
  match(Set dst (ConvI2D src));
10893
  format %{ "CVTSI2SD $dst,$src" %}
10894
  ins_encode %{
10895
    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10896
  %}
10897
  ins_pipe( pipe_slow );
10898
%}
10899

10900
instruct convI2D_mem(regD dst, memory mem) %{
10901
  predicate( UseSSE>=2 );
10902
  match(Set dst (ConvI2D (LoadI mem)));
10903
  format %{ "CVTSI2SD $dst,$mem" %}
10904
  ins_encode %{
10905
    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10906
  %}
10907
  ins_pipe( pipe_slow );
10908
%}
10909

10910
instruct convXI2D_reg(regD dst, rRegI src)
10911
%{
10912
  predicate( UseSSE>=2 && UseXmmI2D );
10913
  match(Set dst (ConvI2D src));
10914

10915
  format %{ "MOVD  $dst,$src\n\t"
10916
            "CVTDQ2PD $dst,$dst\t# i2d" %}
10917
  ins_encode %{
10918
    __ movdl($dst$$XMMRegister, $src$$Register);
10919
    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10920
  %}
10921
  ins_pipe(pipe_slow); // XXX
10922
%}
10923

10924
instruct convI2DPR_mem(regDPR dst, memory mem) %{
10925
  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10926
  match(Set dst (ConvI2D (LoadI mem)));
10927
  format %{ "FILD   $mem\n\t"
10928
            "FSTP   $dst" %}
10929
  opcode(0xDB);      /* DB /0 */
10930
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10931
              Pop_Reg_DPR(dst), ClearInstMark);
10932
  ins_pipe( fpu_reg_mem );
10933
%}
10934

10935
// Convert a byte to a float; no rounding step needed.
10936
instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10937
  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10938
  match(Set dst (ConvI2F src));
10939
  format %{ "FILD   $src\n\t"
10940
            "FSTP   $dst" %}
10941

10942
  opcode(0xDB, 0x0);  /* DB /0 */
10943
  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10944
  ins_pipe( fpu_reg_mem );
10945
%}
10946

10947
// In 24-bit mode, force exponent rounding by storing back out
10948
instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10949
  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10950
  match(Set dst (ConvI2F src));
10951
  ins_cost(200);
10952
  format %{ "FILD   $src\n\t"
10953
            "FSTP_S $dst" %}
10954
  opcode(0xDB, 0x0);  /* DB /0 */
10955
  ins_encode( Push_Mem_I(src),
10956
              Pop_Mem_FPR(dst));
10957
  ins_pipe( fpu_mem_mem );
10958
%}
10959

10960
// In 24-bit mode, force exponent rounding by storing back out
10961
instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10962
  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10963
  match(Set dst (ConvI2F (LoadI mem)));
10964
  ins_cost(200);
10965
  format %{ "FILD   $mem\n\t"
10966
            "FSTP_S $dst" %}
10967
  opcode(0xDB);  /* DB /0 */
10968
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10969
              Pop_Mem_FPR(dst), ClearInstMark);
10970
  ins_pipe( fpu_mem_mem );
10971
%}
10972

10973
// This instruction does not round to 24-bits
10974
instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10975
  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10976
  match(Set dst (ConvI2F src));
10977
  format %{ "FILD   $src\n\t"
10978
            "FSTP   $dst" %}
10979
  opcode(0xDB, 0x0);  /* DB /0 */
10980
  ins_encode( Push_Mem_I(src),
10981
              Pop_Reg_FPR(dst));
10982
  ins_pipe( fpu_reg_mem );
10983
%}
10984

10985
// This instruction does not round to 24-bits
10986
instruct convI2FPR_mem(regFPR dst, memory mem) %{
10987
  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10988
  match(Set dst (ConvI2F (LoadI mem)));
10989
  format %{ "FILD   $mem\n\t"
10990
            "FSTP   $dst" %}
10991
  opcode(0xDB);      /* DB /0 */
10992
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10993
              Pop_Reg_FPR(dst), ClearInstMark);
10994
  ins_pipe( fpu_reg_mem );
10995
%}
10996

10997
// Convert an int to a float in xmm; no rounding step needed.
10998
instruct convI2F_reg(regF dst, rRegI src) %{
10999
  predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
11000
  match(Set dst (ConvI2F src));
11001
  format %{ "CVTSI2SS $dst, $src" %}
11002
  ins_encode %{
11003
    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11004
  %}
11005
  ins_pipe( pipe_slow );
11006
%}
11007

11008
 instruct convXI2F_reg(regF dst, rRegI src)
11009
%{
11010
  predicate( UseSSE>=2 && UseXmmI2F );
11011
  match(Set dst (ConvI2F src));
11012

11013
  format %{ "MOVD  $dst,$src\n\t"
11014
            "CVTDQ2PS $dst,$dst\t# i2f" %}
11015
  ins_encode %{
11016
    __ movdl($dst$$XMMRegister, $src$$Register);
11017
    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11018
  %}
11019
  ins_pipe(pipe_slow); // XXX
11020
%}
11021

11022
instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11023
  match(Set dst (ConvI2L src));
11024
  effect(KILL cr);
11025
  ins_cost(375);
11026
  format %{ "MOV    $dst.lo,$src\n\t"
11027
            "MOV    $dst.hi,$src\n\t"
11028
            "SAR    $dst.hi,31" %}
11029
  ins_encode(convert_int_long(dst,src));
11030
  ins_pipe( ialu_reg_reg_long );
11031
%}
11032

11033
// Zero-extend convert int to long
11034
instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11035
  match(Set dst (AndL (ConvI2L src) mask) );
11036
  effect( KILL flags );
11037
  ins_cost(250);
11038
  format %{ "MOV    $dst.lo,$src\n\t"
11039
            "XOR    $dst.hi,$dst.hi" %}
11040
  opcode(0x33); // XOR
11041
  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11042
  ins_pipe( ialu_reg_reg_long );
11043
%}
11044

11045
// Zero-extend long
11046
instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11047
  match(Set dst (AndL src mask) );
11048
  effect( KILL flags );
11049
  ins_cost(250);
11050
  format %{ "MOV    $dst.lo,$src.lo\n\t"
11051
            "XOR    $dst.hi,$dst.hi\n\t" %}
11052
  opcode(0x33); // XOR
11053
  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11054
  ins_pipe( ialu_reg_reg_long );
11055
%}
11056

11057
instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11058
  predicate (UseSSE<=1);
11059
  match(Set dst (ConvL2D src));
11060
  effect( KILL cr );
11061
  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11062
            "PUSH   $src.lo\n\t"
11063
            "FILD   ST,[ESP + #0]\n\t"
11064
            "ADD    ESP,8\n\t"
11065
            "FSTP_D $dst\t# D-round" %}
11066
  opcode(0xDF, 0x5);  /* DF /5 */
11067
  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11068
  ins_pipe( pipe_slow );
11069
%}
11070

11071
instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11072
  predicate (UseSSE>=2);
11073
  match(Set dst (ConvL2D src));
11074
  effect( KILL cr );
11075
  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11076
            "PUSH   $src.lo\n\t"
11077
            "FILD_D [ESP]\n\t"
11078
            "FSTP_D [ESP]\n\t"
11079
            "MOVSD  $dst,[ESP]\n\t"
11080
            "ADD    ESP,8" %}
11081
  opcode(0xDF, 0x5);  /* DF /5 */
11082
  ins_encode(convert_long_double2(src), Push_ResultD(dst));
11083
  ins_pipe( pipe_slow );
11084
%}
11085

11086
instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11087
  predicate (UseSSE>=1);
11088
  match(Set dst (ConvL2F src));
11089
  effect( KILL cr );
11090
  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11091
            "PUSH   $src.lo\n\t"
11092
            "FILD_D [ESP]\n\t"
11093
            "FSTP_S [ESP]\n\t"
11094
            "MOVSS  $dst,[ESP]\n\t"
11095
            "ADD    ESP,8" %}
11096
  opcode(0xDF, 0x5);  /* DF /5 */
11097
  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11098
  ins_pipe( pipe_slow );
11099
%}
11100

11101
instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11102
  match(Set dst (ConvL2F src));
11103
  effect( KILL cr );
11104
  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11105
            "PUSH   $src.lo\n\t"
11106
            "FILD   ST,[ESP + #0]\n\t"
11107
            "ADD    ESP,8\n\t"
11108
            "FSTP_S $dst\t# F-round" %}
11109
  opcode(0xDF, 0x5);  /* DF /5 */
11110
  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11111
  ins_pipe( pipe_slow );
11112
%}
11113

11114
instruct convL2I_reg( rRegI dst, eRegL src ) %{
11115
  match(Set dst (ConvL2I src));
11116
  effect( DEF dst, USE src );
11117
  format %{ "MOV    $dst,$src.lo" %}
11118
  ins_encode(enc_CopyL_Lo(dst,src));
11119
  ins_pipe( ialu_reg_reg );
11120
%}
11121

11122
instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11123
  match(Set dst (MoveF2I src));
11124
  effect( DEF dst, USE src );
11125
  ins_cost(100);
11126
  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11127
  ins_encode %{
11128
    __ movl($dst$$Register, Address(rsp, $src$$disp));
11129
  %}
11130
  ins_pipe( ialu_reg_mem );
11131
%}
11132

11133
instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11134
  predicate(UseSSE==0);
11135
  match(Set dst (MoveF2I src));
11136
  effect( DEF dst, USE src );
11137

11138
  ins_cost(125);
11139
  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11140
  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11141
  ins_pipe( fpu_mem_reg );
11142
%}
11143

11144
instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11145
  predicate(UseSSE>=1);
11146
  match(Set dst (MoveF2I src));
11147
  effect( DEF dst, USE src );
11148

11149
  ins_cost(95);
11150
  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11151
  ins_encode %{
11152
    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11153
  %}
11154
  ins_pipe( pipe_slow );
11155
%}
11156

11157
instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11158
  predicate(UseSSE>=2);
11159
  match(Set dst (MoveF2I src));
11160
  effect( DEF dst, USE src );
11161
  ins_cost(85);
11162
  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11163
  ins_encode %{
11164
    __ movdl($dst$$Register, $src$$XMMRegister);
11165
  %}
11166
  ins_pipe( pipe_slow );
11167
%}
11168

11169
instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11170
  match(Set dst (MoveI2F src));
11171
  effect( DEF dst, USE src );
11172

11173
  ins_cost(100);
11174
  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11175
  ins_encode %{
11176
    __ movl(Address(rsp, $dst$$disp), $src$$Register);
11177
  %}
11178
  ins_pipe( ialu_mem_reg );
11179
%}
11180

11181

11182
instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11183
  predicate(UseSSE==0);
11184
  match(Set dst (MoveI2F src));
11185
  effect(DEF dst, USE src);
11186

11187
  ins_cost(125);
11188
  format %{ "FLD_S  $src\n\t"
11189
            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11190
  opcode(0xD9);               /* D9 /0, FLD m32real */
11191
  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11192
              Pop_Reg_FPR(dst), ClearInstMark );
11193
  ins_pipe( fpu_reg_mem );
11194
%}
11195

11196
instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11197
  predicate(UseSSE>=1);
11198
  match(Set dst (MoveI2F src));
11199
  effect( DEF dst, USE src );
11200

11201
  ins_cost(95);
11202
  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11203
  ins_encode %{
11204
    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11205
  %}
11206
  ins_pipe( pipe_slow );
11207
%}
11208

11209
instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11210
  predicate(UseSSE>=2);
11211
  match(Set dst (MoveI2F src));
11212
  effect( DEF dst, USE src );
11213

11214
  ins_cost(85);
11215
  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11216
  ins_encode %{
11217
    __ movdl($dst$$XMMRegister, $src$$Register);
11218
  %}
11219
  ins_pipe( pipe_slow );
11220
%}
11221

11222
instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11223
  match(Set dst (MoveD2L src));
11224
  effect(DEF dst, USE src);
11225

11226
  ins_cost(250);
11227
  format %{ "MOV    $dst.lo,$src\n\t"
11228
            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11229
  opcode(0x8B, 0x8B);
11230
  ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11231
  ins_pipe( ialu_mem_long_reg );
11232
%}
11233

11234
instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11235
  predicate(UseSSE<=1);
11236
  match(Set dst (MoveD2L src));
11237
  effect(DEF dst, USE src);
11238

11239
  ins_cost(125);
11240
  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11241
  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11242
  ins_pipe( fpu_mem_reg );
11243
%}
11244

11245
instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11246
  predicate(UseSSE>=2);
11247
  match(Set dst (MoveD2L src));
11248
  effect(DEF dst, USE src);
11249
  ins_cost(95);
11250
  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11251
  ins_encode %{
11252
    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11253
  %}
11254
  ins_pipe( pipe_slow );
11255
%}
11256

11257
instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11258
  predicate(UseSSE>=2);
11259
  match(Set dst (MoveD2L src));
11260
  effect(DEF dst, USE src, TEMP tmp);
11261
  ins_cost(85);
11262
  format %{ "MOVD   $dst.lo,$src\n\t"
11263
            "PSHUFLW $tmp,$src,0x4E\n\t"
11264
            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11265
  ins_encode %{
11266
    __ movdl($dst$$Register, $src$$XMMRegister);
11267
    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11268
    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11269
  %}
11270
  ins_pipe( pipe_slow );
11271
%}
11272

11273
instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11274
  match(Set dst (MoveL2D src));
11275
  effect(DEF dst, USE src);
11276

11277
  ins_cost(200);
11278
  format %{ "MOV    $dst,$src.lo\n\t"
11279
            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11280
  opcode(0x89, 0x89);
11281
  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11282
  ins_pipe( ialu_mem_long_reg );
11283
%}
11284

11285

11286
instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11287
  predicate(UseSSE<=1);
11288
  match(Set dst (MoveL2D src));
11289
  effect(DEF dst, USE src);
11290
  ins_cost(125);
11291

11292
  format %{ "FLD_D  $src\n\t"
11293
            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11294
  opcode(0xDD);               /* DD /0, FLD m64real */
11295
  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11296
              Pop_Reg_DPR(dst), ClearInstMark );
11297
  ins_pipe( fpu_reg_mem );
11298
%}
11299

11300

11301
instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11302
  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11303
  match(Set dst (MoveL2D src));
11304
  effect(DEF dst, USE src);
11305

11306
  ins_cost(95);
11307
  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11308
  ins_encode %{
11309
    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11310
  %}
11311
  ins_pipe( pipe_slow );
11312
%}
11313

11314
instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11315
  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11316
  match(Set dst (MoveL2D src));
11317
  effect(DEF dst, USE src);
11318

11319
  ins_cost(95);
11320
  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11321
  ins_encode %{
11322
    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11323
  %}
11324
  ins_pipe( pipe_slow );
11325
%}
11326

11327
instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11328
  predicate(UseSSE>=2);
11329
  match(Set dst (MoveL2D src));
11330
  effect(TEMP dst, USE src, TEMP tmp);
11331
  ins_cost(85);
11332
  format %{ "MOVD   $dst,$src.lo\n\t"
11333
            "MOVD   $tmp,$src.hi\n\t"
11334
            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11335
  ins_encode %{
11336
    __ movdl($dst$$XMMRegister, $src$$Register);
11337
    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11338
    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11339
  %}
11340
  ins_pipe( pipe_slow );
11341
%}
11342

11343
//----------------------------- CompressBits/ExpandBits ------------------------
11344

11345
instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11346
  predicate(n->bottom_type()->isa_long());
11347
  match(Set dst (CompressBits src mask));
11348
  effect(TEMP rtmp, TEMP xtmp, KILL cr);
11349
  format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11350
  ins_encode %{
11351
    Label exit, partail_result;
11352
    // Parallely extract both upper and lower 32 bits of source into destination register pair.
11353
    // Merge the results of upper and lower destination registers such that upper destination
11354
    // results are contiguously laid out after the lower destination result.
11355
    __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11356
    __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11357
    __ popcntl($rtmp$$Register, $mask$$Register);
11358
    // Skip merging if bit count of lower mask register is equal to 32 (register size).
11359
    __ cmpl($rtmp$$Register, 32);
11360
    __ jccb(Assembler::equal, exit);
11361
    // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11362
    __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11363
    // Shift left the contents of upper destination register by true bit count of lower mask register
11364
    // and merge with lower destination register.
11365
    __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11366
    __ orl($dst$$Register, $rtmp$$Register);
11367
    __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11368
    // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11369
    // since contents of upper destination have already been copied to lower destination
11370
    // register.
11371
    __ cmpl($rtmp$$Register, 0);
11372
    __ jccb(Assembler::greater, partail_result);
11373
    __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11374
    __ jmp(exit);
11375
    __ bind(partail_result);
11376
    // Perform right shift over upper destination register to move out bits already copied
11377
    // to lower destination register.
11378
    __ subl($rtmp$$Register, 32);
11379
    __ negl($rtmp$$Register);
11380
    __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11381
    __ bind(exit);
11382
  %}
11383
  ins_pipe( pipe_slow );
11384
%}
11385

11386
instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11387
  predicate(n->bottom_type()->isa_long());
11388
  match(Set dst (ExpandBits src mask));
11389
  effect(TEMP rtmp, TEMP xtmp, KILL cr);
11390
  format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11391
  ins_encode %{
11392
    // Extraction operation sequentially reads the bits from source register starting from LSB
11393
    // and lays them out into destination register at bit locations corresponding to true bits
11394
    // in mask register. Thus number of source bits read are equal to combined true bit count
11395
    // of mask register pair.
11396
    Label exit, mask_clipping;
11397
    __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11398
    __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11399
    __ popcntl($rtmp$$Register, $mask$$Register);
11400
    // If true bit count of lower mask register is 32 then none of bit of lower source register
11401
    // will feed to upper destination register.
11402
    __ cmpl($rtmp$$Register, 32);
11403
    __ jccb(Assembler::equal, exit);
11404
    // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11405
    __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11406
    // Shift right the contents of lower source register to remove already consumed bits.
11407
    __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11408
    // Extract the bits from lower source register starting from LSB under the influence
11409
    // of upper mask register.
11410
    __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11411
    __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11412
    __ subl($rtmp$$Register, 32);
11413
    __ negl($rtmp$$Register);
11414
    __ movdl($xtmp$$XMMRegister, $mask$$Register);
11415
    __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11416
    // Clear the set bits in upper mask register which have been used to extract the contents
11417
    // from lower source register.
11418
    __ bind(mask_clipping);
11419
    __ blsrl($mask$$Register, $mask$$Register);
11420
    __ decrementl($rtmp$$Register, 1);
11421
    __ jccb(Assembler::greater, mask_clipping);
11422
    // Starting from LSB extract the bits from upper source register under the influence of
11423
    // remaining set bits in upper mask register.
11424
    __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11425
    // Merge the partial results extracted from lower and upper source register bits.
11426
    __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11427
    __ movdl($mask$$Register, $xtmp$$XMMRegister);
11428
    __ bind(exit);
11429
  %}
11430
  ins_pipe( pipe_slow );
11431
%}
11432

11433
// =======================================================================
11434
// Fast clearing of an array
11435
// Small non-constant length ClearArray for non-AVX512 targets.
11436
instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11437
  predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11438
  match(Set dummy (ClearArray cnt base));
11439
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11440

11441
  format %{ $$template
11442
    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11443
    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11444
    $$emit$$"JG     LARGE\n\t"
11445
    $$emit$$"SHL    ECX, 1\n\t"
11446
    $$emit$$"DEC    ECX\n\t"
11447
    $$emit$$"JS     DONE\t# Zero length\n\t"
11448
    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11449
    $$emit$$"DEC    ECX\n\t"
11450
    $$emit$$"JGE    LOOP\n\t"
11451
    $$emit$$"JMP    DONE\n\t"
11452
    $$emit$$"# LARGE:\n\t"
11453
    if (UseFastStosb) {
11454
       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11455
       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11456
    } else if (UseXMMForObjInit) {
11457
       $$emit$$"MOV     RDI,RAX\n\t"
11458
       $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11459
       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11460
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11461
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11462
       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11463
       $$emit$$"ADD     0x40,RAX\n\t"
11464
       $$emit$$"# L_zero_64_bytes:\n\t"
11465
       $$emit$$"SUB     0x8,RCX\n\t"
11466
       $$emit$$"JGE     L_loop\n\t"
11467
       $$emit$$"ADD     0x4,RCX\n\t"
11468
       $$emit$$"JL      L_tail\n\t"
11469
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11470
       $$emit$$"ADD     0x20,RAX\n\t"
11471
       $$emit$$"SUB     0x4,RCX\n\t"
11472
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11473
       $$emit$$"ADD     0x4,RCX\n\t"
11474
       $$emit$$"JLE     L_end\n\t"
11475
       $$emit$$"DEC     RCX\n\t"
11476
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11477
       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11478
       $$emit$$"ADD     0x8,RAX\n\t"
11479
       $$emit$$"DEC     RCX\n\t"
11480
       $$emit$$"JGE     L_sloop\n\t"
11481
       $$emit$$"# L_end:\n\t"
11482
    } else {
11483
       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11484
       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11485
    }
11486
    $$emit$$"# DONE"
11487
  %}
11488
  ins_encode %{
11489
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11490
                 $tmp$$XMMRegister, false, knoreg);
11491
  %}
11492
  ins_pipe( pipe_slow );
11493
%}
11494

11495
// Small non-constant length ClearArray for AVX512 targets.
11496
instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11497
  predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11498
  match(Set dummy (ClearArray cnt base));
11499
  ins_cost(125);
11500
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11501

11502
  format %{ $$template
11503
    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11504
    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11505
    $$emit$$"JG     LARGE\n\t"
11506
    $$emit$$"SHL    ECX, 1\n\t"
11507
    $$emit$$"DEC    ECX\n\t"
11508
    $$emit$$"JS     DONE\t# Zero length\n\t"
11509
    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11510
    $$emit$$"DEC    ECX\n\t"
11511
    $$emit$$"JGE    LOOP\n\t"
11512
    $$emit$$"JMP    DONE\n\t"
11513
    $$emit$$"# LARGE:\n\t"
11514
    if (UseFastStosb) {
11515
       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11516
       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11517
    } else if (UseXMMForObjInit) {
11518
       $$emit$$"MOV     RDI,RAX\n\t"
11519
       $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11520
       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11521
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11522
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11523
       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11524
       $$emit$$"ADD     0x40,RAX\n\t"
11525
       $$emit$$"# L_zero_64_bytes:\n\t"
11526
       $$emit$$"SUB     0x8,RCX\n\t"
11527
       $$emit$$"JGE     L_loop\n\t"
11528
       $$emit$$"ADD     0x4,RCX\n\t"
11529
       $$emit$$"JL      L_tail\n\t"
11530
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11531
       $$emit$$"ADD     0x20,RAX\n\t"
11532
       $$emit$$"SUB     0x4,RCX\n\t"
11533
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11534
       $$emit$$"ADD     0x4,RCX\n\t"
11535
       $$emit$$"JLE     L_end\n\t"
11536
       $$emit$$"DEC     RCX\n\t"
11537
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11538
       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11539
       $$emit$$"ADD     0x8,RAX\n\t"
11540
       $$emit$$"DEC     RCX\n\t"
11541
       $$emit$$"JGE     L_sloop\n\t"
11542
       $$emit$$"# L_end:\n\t"
11543
    } else {
11544
       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11545
       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11546
    }
11547
    $$emit$$"# DONE"
11548
  %}
11549
  ins_encode %{
11550
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11551
                 $tmp$$XMMRegister, false, $ktmp$$KRegister);
11552
  %}
11553
  ins_pipe( pipe_slow );
11554
%}
11555

11556
// Large non-constant length ClearArray for non-AVX512 targets.
11557
instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11558
  predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11559
  match(Set dummy (ClearArray cnt base));
11560
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11561
  format %{ $$template
11562
    if (UseFastStosb) {
11563
       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11564
       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11565
       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11566
    } else if (UseXMMForObjInit) {
11567
       $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11568
       $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11569
       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11570
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11571
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11572
       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11573
       $$emit$$"ADD     0x40,RAX\n\t"
11574
       $$emit$$"# L_zero_64_bytes:\n\t"
11575
       $$emit$$"SUB     0x8,RCX\n\t"
11576
       $$emit$$"JGE     L_loop\n\t"
11577
       $$emit$$"ADD     0x4,RCX\n\t"
11578
       $$emit$$"JL      L_tail\n\t"
11579
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11580
       $$emit$$"ADD     0x20,RAX\n\t"
11581
       $$emit$$"SUB     0x4,RCX\n\t"
11582
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11583
       $$emit$$"ADD     0x4,RCX\n\t"
11584
       $$emit$$"JLE     L_end\n\t"
11585
       $$emit$$"DEC     RCX\n\t"
11586
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11587
       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11588
       $$emit$$"ADD     0x8,RAX\n\t"
11589
       $$emit$$"DEC     RCX\n\t"
11590
       $$emit$$"JGE     L_sloop\n\t"
11591
       $$emit$$"# L_end:\n\t"
11592
    } else {
11593
       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11594
       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11595
       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11596
    }
11597
    $$emit$$"# DONE"
11598
  %}
11599
  ins_encode %{
11600
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11601
                 $tmp$$XMMRegister, true, knoreg);
11602
  %}
11603
  ins_pipe( pipe_slow );
11604
%}
11605

11606
// Large non-constant length ClearArray for AVX512 targets.
11607
instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11608
  predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11609
  match(Set dummy (ClearArray cnt base));
11610
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11611
  format %{ $$template
11612
    if (UseFastStosb) {
11613
       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11614
       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11615
       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11616
    } else if (UseXMMForObjInit) {
11617
       $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11618
       $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11619
       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11620
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11621
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11622
       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11623
       $$emit$$"ADD     0x40,RAX\n\t"
11624
       $$emit$$"# L_zero_64_bytes:\n\t"
11625
       $$emit$$"SUB     0x8,RCX\n\t"
11626
       $$emit$$"JGE     L_loop\n\t"
11627
       $$emit$$"ADD     0x4,RCX\n\t"
11628
       $$emit$$"JL      L_tail\n\t"
11629
       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11630
       $$emit$$"ADD     0x20,RAX\n\t"
11631
       $$emit$$"SUB     0x4,RCX\n\t"
11632
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11633
       $$emit$$"ADD     0x4,RCX\n\t"
11634
       $$emit$$"JLE     L_end\n\t"
11635
       $$emit$$"DEC     RCX\n\t"
11636
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11637
       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11638
       $$emit$$"ADD     0x8,RAX\n\t"
11639
       $$emit$$"DEC     RCX\n\t"
11640
       $$emit$$"JGE     L_sloop\n\t"
11641
       $$emit$$"# L_end:\n\t"
11642
    } else {
11643
       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11644
       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11645
       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11646
    }
11647
    $$emit$$"# DONE"
11648
  %}
11649
  ins_encode %{
11650
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11651
                 $tmp$$XMMRegister, true, $ktmp$$KRegister);
11652
  %}
11653
  ins_pipe( pipe_slow );
11654
%}
11655

11656
// Small constant length ClearArray for AVX512 targets.
11657
instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11658
%{
11659
  predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11660
  match(Set dummy (ClearArray cnt base));
11661
  ins_cost(100);
11662
  effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11663
  format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11664
  ins_encode %{
11665
   __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11666
  %}
11667
  ins_pipe(pipe_slow);
11668
%}
11669

11670
instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11671
                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11672
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11673
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11674
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11675

11676
  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11677
  ins_encode %{
11678
    __ string_compare($str1$$Register, $str2$$Register,
11679
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11680
                      $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11681
  %}
11682
  ins_pipe( pipe_slow );
11683
%}
11684

11685
instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11686
                              eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11687
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11688
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11689
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11690

11691
  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11692
  ins_encode %{
11693
    __ string_compare($str1$$Register, $str2$$Register,
11694
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11695
                      $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11696
  %}
11697
  ins_pipe( pipe_slow );
11698
%}
11699

11700
instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11701
                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11702
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11703
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11704
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11705

11706
  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11707
  ins_encode %{
11708
    __ string_compare($str1$$Register, $str2$$Register,
11709
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11710
                      $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11711
  %}
11712
  ins_pipe( pipe_slow );
11713
%}
11714

11715
instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11716
                              eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11717
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11718
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11719
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11720

11721
  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11722
  ins_encode %{
11723
    __ string_compare($str1$$Register, $str2$$Register,
11724
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11725
                      $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11726
  %}
11727
  ins_pipe( pipe_slow );
11728
%}
11729

11730
instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11731
                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11732
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11733
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11734
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11735

11736
  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11737
  ins_encode %{
11738
    __ string_compare($str1$$Register, $str2$$Register,
11739
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11740
                      $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11741
  %}
11742
  ins_pipe( pipe_slow );
11743
%}
11744

11745
instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11746
                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11747
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11748
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11749
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11750

11751
  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11752
  ins_encode %{
11753
    __ string_compare($str1$$Register, $str2$$Register,
11754
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11755
                      $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11756
  %}
11757
  ins_pipe( pipe_slow );
11758
%}
11759

11760
instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11761
                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11762
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11763
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11764
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11765

11766
  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11767
  ins_encode %{
11768
    __ string_compare($str2$$Register, $str1$$Register,
11769
                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
11770
                      $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11771
  %}
11772
  ins_pipe( pipe_slow );
11773
%}
11774

11775
instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11776
                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11777
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11778
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11779
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11780

11781
  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11782
  ins_encode %{
11783
    __ string_compare($str2$$Register, $str1$$Register,
11784
                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
11785
                      $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11786
  %}
11787
  ins_pipe( pipe_slow );
11788
%}
11789

11790
// fast string equals
11791
instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11792
                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11793
  predicate(!VM_Version::supports_avx512vlbw());
11794
  match(Set result (StrEquals (Binary str1 str2) cnt));
11795
  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11796

11797
  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11798
  ins_encode %{
11799
    __ arrays_equals(false, $str1$$Register, $str2$$Register,
11800
                     $cnt$$Register, $result$$Register, $tmp3$$Register,
11801
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11802
  %}
11803

11804
  ins_pipe( pipe_slow );
11805
%}
11806

11807
instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11808
                            regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11809
  predicate(VM_Version::supports_avx512vlbw());
11810
  match(Set result (StrEquals (Binary str1 str2) cnt));
11811
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11812

11813
  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11814
  ins_encode %{
11815
    __ arrays_equals(false, $str1$$Register, $str2$$Register,
11816
                     $cnt$$Register, $result$$Register, $tmp3$$Register,
11817
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11818
  %}
11819

11820
  ins_pipe( pipe_slow );
11821
%}
11822

11823

11824
// fast search of substring with known size.
11825
instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11826
                             eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11827
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11828
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11829
  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11830

11831
  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11832
  ins_encode %{
11833
    int icnt2 = (int)$int_cnt2$$constant;
11834
    if (icnt2 >= 16) {
11835
      // IndexOf for constant substrings with size >= 16 elements
11836
      // which don't need to be loaded through stack.
11837
      __ string_indexofC8($str1$$Register, $str2$$Register,
11838
                          $cnt1$$Register, $cnt2$$Register,
11839
                          icnt2, $result$$Register,
11840
                          $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11841
    } else {
11842
      // Small strings are loaded through stack if they cross page boundary.
11843
      __ string_indexof($str1$$Register, $str2$$Register,
11844
                        $cnt1$$Register, $cnt2$$Register,
11845
                        icnt2, $result$$Register,
11846
                        $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11847
    }
11848
  %}
11849
  ins_pipe( pipe_slow );
11850
%}
11851

11852
// fast search of substring with known size.
11853
instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11854
                             eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11855
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11856
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11857
  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11858

11859
  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11860
  ins_encode %{
11861
    int icnt2 = (int)$int_cnt2$$constant;
11862
    if (icnt2 >= 8) {
11863
      // IndexOf for constant substrings with size >= 8 elements
11864
      // which don't need to be loaded through stack.
11865
      __ string_indexofC8($str1$$Register, $str2$$Register,
11866
                          $cnt1$$Register, $cnt2$$Register,
11867
                          icnt2, $result$$Register,
11868
                          $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11869
    } else {
11870
      // Small strings are loaded through stack if they cross page boundary.
11871
      __ string_indexof($str1$$Register, $str2$$Register,
11872
                        $cnt1$$Register, $cnt2$$Register,
11873
                        icnt2, $result$$Register,
11874
                        $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11875
    }
11876
  %}
11877
  ins_pipe( pipe_slow );
11878
%}
11879

11880
// fast search of substring with known size.
11881
instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11882
                             eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11883
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11884
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11885
  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11886

11887
  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11888
  ins_encode %{
11889
    int icnt2 = (int)$int_cnt2$$constant;
11890
    if (icnt2 >= 8) {
11891
      // IndexOf for constant substrings with size >= 8 elements
11892
      // which don't need to be loaded through stack.
11893
      __ string_indexofC8($str1$$Register, $str2$$Register,
11894
                          $cnt1$$Register, $cnt2$$Register,
11895
                          icnt2, $result$$Register,
11896
                          $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11897
    } else {
11898
      // Small strings are loaded through stack if they cross page boundary.
11899
      __ string_indexof($str1$$Register, $str2$$Register,
11900
                        $cnt1$$Register, $cnt2$$Register,
11901
                        icnt2, $result$$Register,
11902
                        $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11903
    }
11904
  %}
11905
  ins_pipe( pipe_slow );
11906
%}
11907

11908
instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11909
                         eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11910
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11911
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11912
  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11913

11914
  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11915
  ins_encode %{
11916
    __ string_indexof($str1$$Register, $str2$$Register,
11917
                      $cnt1$$Register, $cnt2$$Register,
11918
                      (-1), $result$$Register,
11919
                      $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11920
  %}
11921
  ins_pipe( pipe_slow );
11922
%}
11923

11924
instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11925
                         eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11926
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11927
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11928
  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11929

11930
  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11931
  ins_encode %{
11932
    __ string_indexof($str1$$Register, $str2$$Register,
11933
                      $cnt1$$Register, $cnt2$$Register,
11934
                      (-1), $result$$Register,
11935
                      $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11936
  %}
11937
  ins_pipe( pipe_slow );
11938
%}
11939

11940
instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11941
                         eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11942
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11943
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11944
  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11945

11946
  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11947
  ins_encode %{
11948
    __ string_indexof($str1$$Register, $str2$$Register,
11949
                      $cnt1$$Register, $cnt2$$Register,
11950
                      (-1), $result$$Register,
11951
                      $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11952
  %}
11953
  ins_pipe( pipe_slow );
11954
%}
11955

11956
instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11957
                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11958
  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11959
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11960
  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11961
  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11962
  ins_encode %{
11963
    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11964
                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11965
  %}
11966
  ins_pipe( pipe_slow );
11967
%}
11968

11969
instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11970
                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11971
  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11972
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11973
  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11974
  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11975
  ins_encode %{
11976
    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11977
                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11978
  %}
11979
  ins_pipe( pipe_slow );
11980
%}
11981

11982

11983
// fast array equals
11984
instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11985
                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11986
%{
11987
  predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11988
  match(Set result (AryEq ary1 ary2));
11989
  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11990
  //ins_cost(300);
11991

11992
  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11993
  ins_encode %{
11994
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11995
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11996
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11997
  %}
11998
  ins_pipe( pipe_slow );
11999
%}
12000

12001
instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12002
                       regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12003
%{
12004
  predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12005
  match(Set result (AryEq ary1 ary2));
12006
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12007
  //ins_cost(300);
12008

12009
  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12010
  ins_encode %{
12011
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12012
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
12013
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12014
  %}
12015
  ins_pipe( pipe_slow );
12016
%}
12017

12018
instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12019
                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12020
%{
12021
  predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12022
  match(Set result (AryEq ary1 ary2));
12023
  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12024
  //ins_cost(300);
12025

12026
  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12027
  ins_encode %{
12028
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12029
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
12030
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12031
  %}
12032
  ins_pipe( pipe_slow );
12033
%}
12034

12035
instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12036
                            regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12037
%{
12038
  predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12039
  match(Set result (AryEq ary1 ary2));
12040
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12041
  //ins_cost(300);
12042

12043
  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12044
  ins_encode %{
12045
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12046
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
12047
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12048
  %}
12049
  ins_pipe( pipe_slow );
12050
%}
12051

12052
instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12053
                         regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12054
%{
12055
  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12056
  match(Set result (CountPositives ary1 len));
12057
  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12058

12059
  format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12060
  ins_encode %{
12061
    __ count_positives($ary1$$Register, $len$$Register,
12062
                       $result$$Register, $tmp3$$Register,
12063
                       $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12064
  %}
12065
  ins_pipe( pipe_slow );
12066
%}
12067

12068
instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12069
                              regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12070
%{
12071
  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12072
  match(Set result (CountPositives ary1 len));
12073
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12074

12075
  format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12076
  ins_encode %{
12077
    __ count_positives($ary1$$Register, $len$$Register,
12078
                       $result$$Register, $tmp3$$Register,
12079
                       $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12080
  %}
12081
  ins_pipe( pipe_slow );
12082
%}
12083

12084

12085
// fast char[] to byte[] compression
12086
instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12087
                         regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12088
  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12089
  match(Set result (StrCompressedCopy src (Binary dst len)));
12090
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12091

12092
  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12093
  ins_encode %{
12094
    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12095
                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12096
                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12097
                           knoreg, knoreg);
12098
  %}
12099
  ins_pipe( pipe_slow );
12100
%}
12101

12102
instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12103
                              regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12104
  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12105
  match(Set result (StrCompressedCopy src (Binary dst len)));
12106
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12107

12108
  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12109
  ins_encode %{
12110
    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12111
                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12112
                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12113
                           $ktmp1$$KRegister, $ktmp2$$KRegister);
12114
  %}
12115
  ins_pipe( pipe_slow );
12116
%}
12117

12118
// fast byte[] to char[] inflation
12119
instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12120
                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12121
  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12122
  match(Set dummy (StrInflatedCopy src (Binary dst len)));
12123
  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12124

12125
  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12126
  ins_encode %{
12127
    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12128
                          $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12129
  %}
12130
  ins_pipe( pipe_slow );
12131
%}
12132

12133
instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12134
                             regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12135
  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12136
  match(Set dummy (StrInflatedCopy src (Binary dst len)));
12137
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12138

12139
  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12140
  ins_encode %{
12141
    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12142
                          $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12143
  %}
12144
  ins_pipe( pipe_slow );
12145
%}
12146

12147
// encode char[] to byte[] in ISO_8859_1
12148
instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12149
                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12150
                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12151
  predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12152
  match(Set result (EncodeISOArray src (Binary dst len)));
12153
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12154

12155
  format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12156
  ins_encode %{
12157
    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12158
                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12159
                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12160
  %}
12161
  ins_pipe( pipe_slow );
12162
%}
12163

12164
// encode char[] to byte[] in ASCII
12165
instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12166
                            regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12167
                            eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12168
  predicate(((EncodeISOArrayNode*)n)->is_ascii());
12169
  match(Set result (EncodeISOArray src (Binary dst len)));
12170
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12171

12172
  format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12173
  ins_encode %{
12174
    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12175
                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12176
                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12177
  %}
12178
  ins_pipe( pipe_slow );
12179
%}
12180

12181
//----------Control Flow Instructions------------------------------------------
12182
// Signed compare Instructions
12183
instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12184
  match(Set cr (CmpI op1 op2));
12185
  effect( DEF cr, USE op1, USE op2 );
12186
  format %{ "CMP    $op1,$op2" %}
12187
  opcode(0x3B);  /* Opcode 3B /r */
12188
  ins_encode( OpcP, RegReg( op1, op2) );
12189
  ins_pipe( ialu_cr_reg_reg );
12190
%}
12191

12192
instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12193
  match(Set cr (CmpI op1 op2));
12194
  effect( DEF cr, USE op1 );
12195
  format %{ "CMP    $op1,$op2" %}
12196
  opcode(0x81,0x07);  /* Opcode 81 /7 */
12197
  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12198
  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12199
  ins_pipe( ialu_cr_reg_imm );
12200
%}
12201

12202
// Cisc-spilled version of cmpI_eReg
12203
instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12204
  match(Set cr (CmpI op1 (LoadI op2)));
12205

12206
  format %{ "CMP    $op1,$op2" %}
12207
  ins_cost(500);
12208
  opcode(0x3B);  /* Opcode 3B /r */
12209
  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12210
  ins_pipe( ialu_cr_reg_mem );
12211
%}
12212

12213
instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12214
  match(Set cr (CmpI src zero));
12215
  effect( DEF cr, USE src );
12216

12217
  format %{ "TEST   $src,$src" %}
12218
  opcode(0x85);
12219
  ins_encode( OpcP, RegReg( src, src ) );
12220
  ins_pipe( ialu_cr_reg_imm );
12221
%}
12222

12223
instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12224
  match(Set cr (CmpI (AndI src con) zero));
12225

12226
  format %{ "TEST   $src,$con" %}
12227
  opcode(0xF7,0x00);
12228
  ins_encode( OpcP, RegOpc(src), Con32(con) );
12229
  ins_pipe( ialu_cr_reg_imm );
12230
%}
12231

12232
instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12233
  match(Set cr (CmpI (AndI src mem) zero));
12234

12235
  format %{ "TEST   $src,$mem" %}
12236
  opcode(0x85);
12237
  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12238
  ins_pipe( ialu_cr_reg_mem );
12239
%}
12240

12241
// Unsigned compare Instructions; really, same as signed except they
12242
// produce an eFlagsRegU instead of eFlagsReg.
12243
instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12244
  match(Set cr (CmpU op1 op2));
12245

12246
  format %{ "CMPu   $op1,$op2" %}
12247
  opcode(0x3B);  /* Opcode 3B /r */
12248
  ins_encode( OpcP, RegReg( op1, op2) );
12249
  ins_pipe( ialu_cr_reg_reg );
12250
%}
12251

12252
instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12253
  match(Set cr (CmpU op1 op2));
12254

12255
  format %{ "CMPu   $op1,$op2" %}
12256
  opcode(0x81,0x07);  /* Opcode 81 /7 */
12257
  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12258
  ins_pipe( ialu_cr_reg_imm );
12259
%}
12260

12261
// // Cisc-spilled version of cmpU_eReg
12262
instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12263
  match(Set cr (CmpU op1 (LoadI op2)));
12264

12265
  format %{ "CMPu   $op1,$op2" %}
12266
  ins_cost(500);
12267
  opcode(0x3B);  /* Opcode 3B /r */
12268
  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12269
  ins_pipe( ialu_cr_reg_mem );
12270
%}
12271

12272
// // Cisc-spilled version of cmpU_eReg
12273
//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12274
//  match(Set cr (CmpU (LoadI op1) op2));
12275
//
12276
//  format %{ "CMPu   $op1,$op2" %}
12277
//  ins_cost(500);
12278
//  opcode(0x39);  /* Opcode 39 /r */
12279
//  ins_encode( OpcP, RegMem( op1, op2) );
12280
//%}
12281

12282
instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12283
  match(Set cr (CmpU src zero));
12284

12285
  format %{ "TESTu  $src,$src" %}
12286
  opcode(0x85);
12287
  ins_encode( OpcP, RegReg( src, src ) );
12288
  ins_pipe( ialu_cr_reg_imm );
12289
%}
12290

12291
// Unsigned pointer compare Instructions
12292
instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12293
  match(Set cr (CmpP op1 op2));
12294

12295
  format %{ "CMPu   $op1,$op2" %}
12296
  opcode(0x3B);  /* Opcode 3B /r */
12297
  ins_encode( OpcP, RegReg( op1, op2) );
12298
  ins_pipe( ialu_cr_reg_reg );
12299
%}
12300

12301
instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12302
  match(Set cr (CmpP op1 op2));
12303

12304
  format %{ "CMPu   $op1,$op2" %}
12305
  opcode(0x81,0x07);  /* Opcode 81 /7 */
12306
  ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12307
  ins_pipe( ialu_cr_reg_imm );
12308
%}
12309

12310
// // Cisc-spilled version of cmpP_eReg
12311
instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12312
  match(Set cr (CmpP op1 (LoadP op2)));
12313

12314
  format %{ "CMPu   $op1,$op2" %}
12315
  ins_cost(500);
12316
  opcode(0x3B);  /* Opcode 3B /r */
12317
  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12318
  ins_pipe( ialu_cr_reg_mem );
12319
%}
12320

12321
// // Cisc-spilled version of cmpP_eReg
12322
//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12323
//  match(Set cr (CmpP (LoadP op1) op2));
12324
//
12325
//  format %{ "CMPu   $op1,$op2" %}
12326
//  ins_cost(500);
12327
//  opcode(0x39);  /* Opcode 39 /r */
12328
//  ins_encode( OpcP, RegMem( op1, op2) );
12329
//%}
12330

12331
// Compare raw pointer (used in out-of-heap check).
12332
// Only works because non-oop pointers must be raw pointers
12333
// and raw pointers have no anti-dependencies.
12334
instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12335
  predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12336
  match(Set cr (CmpP op1 (LoadP op2)));
12337

12338
  format %{ "CMPu   $op1,$op2" %}
12339
  opcode(0x3B);  /* Opcode 3B /r */
12340
  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12341
  ins_pipe( ialu_cr_reg_mem );
12342
%}
12343

12344
//
12345
// This will generate a signed flags result. This should be ok
12346
// since any compare to a zero should be eq/neq.
12347
instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12348
  match(Set cr (CmpP src zero));
12349

12350
  format %{ "TEST   $src,$src" %}
12351
  opcode(0x85);
12352
  ins_encode( OpcP, RegReg( src, src ) );
12353
  ins_pipe( ialu_cr_reg_imm );
12354
%}
12355

12356
// Cisc-spilled version of testP_reg
12357
// This will generate a signed flags result. This should be ok
12358
// since any compare to a zero should be eq/neq.
12359
instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12360
  match(Set cr (CmpP (LoadP op) zero));
12361

12362
  format %{ "TEST   $op,0xFFFFFFFF" %}
12363
  ins_cost(500);
12364
  opcode(0xF7);               /* Opcode F7 /0 */
12365
  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12366
  ins_pipe( ialu_cr_reg_imm );
12367
%}
12368

12369
// Yanked all unsigned pointer compare operations.
12370
// Pointer compares are done with CmpP which is already unsigned.
12371

12372
//----------Max and Min--------------------------------------------------------
12373
// Min Instructions
12374
////
12375
//   *** Min and Max using the conditional move are slower than the
12376
//   *** branch version on a Pentium III.
12377
// // Conditional move for min
12378
//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12379
//  effect( USE_DEF op2, USE op1, USE cr );
12380
//  format %{ "CMOVlt $op2,$op1\t! min" %}
12381
//  opcode(0x4C,0x0F);
12382
//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12383
//  ins_pipe( pipe_cmov_reg );
12384
//%}
12385
//
12386
//// Min Register with Register (P6 version)
12387
//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12388
//  predicate(VM_Version::supports_cmov() );
12389
//  match(Set op2 (MinI op1 op2));
12390
//  ins_cost(200);
12391
//  expand %{
12392
//    eFlagsReg cr;
12393
//    compI_eReg(cr,op1,op2);
12394
//    cmovI_reg_lt(op2,op1,cr);
12395
//  %}
12396
//%}
12397

12398
// Min Register with Register (generic version)
12399
instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12400
  match(Set dst (MinI dst src));
12401
  effect(KILL flags);
12402
  ins_cost(300);
12403

12404
  format %{ "MIN    $dst,$src" %}
12405
  opcode(0xCC);
12406
  ins_encode( min_enc(dst,src) );
12407
  ins_pipe( pipe_slow );
12408
%}
12409

12410
// Max Register with Register
12411
//   *** Min and Max using the conditional move are slower than the
12412
//   *** branch version on a Pentium III.
12413
// // Conditional move for max
12414
//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12415
//  effect( USE_DEF op2, USE op1, USE cr );
12416
//  format %{ "CMOVgt $op2,$op1\t! max" %}
12417
//  opcode(0x4F,0x0F);
12418
//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12419
//  ins_pipe( pipe_cmov_reg );
12420
//%}
12421
//
12422
// // Max Register with Register (P6 version)
12423
//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12424
//  predicate(VM_Version::supports_cmov() );
12425
//  match(Set op2 (MaxI op1 op2));
12426
//  ins_cost(200);
12427
//  expand %{
12428
//    eFlagsReg cr;
12429
//    compI_eReg(cr,op1,op2);
12430
//    cmovI_reg_gt(op2,op1,cr);
12431
//  %}
12432
//%}
12433

12434
// Max Register with Register (generic version)
12435
instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12436
  match(Set dst (MaxI dst src));
12437
  effect(KILL flags);
12438
  ins_cost(300);
12439

12440
  format %{ "MAX    $dst,$src" %}
12441
  opcode(0xCC);
12442
  ins_encode( max_enc(dst,src) );
12443
  ins_pipe( pipe_slow );
12444
%}
12445

12446
// ============================================================================
12447
// Counted Loop limit node which represents exact final iterator value.
12448
// Note: the resulting value should fit into integer range since
12449
// counted loops have limit check on overflow.
12450
instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12451
  match(Set limit (LoopLimit (Binary init limit) stride));
12452
  effect(TEMP limit_hi, TEMP tmp, KILL flags);
12453
  ins_cost(300);
12454

12455
  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12456
  ins_encode %{
12457
    int strd = (int)$stride$$constant;
12458
    assert(strd != 1 && strd != -1, "sanity");
12459
    int m1 = (strd > 0) ? 1 : -1;
12460
    // Convert limit to long (EAX:EDX)
12461
    __ cdql();
12462
    // Convert init to long (init:tmp)
12463
    __ movl($tmp$$Register, $init$$Register);
12464
    __ sarl($tmp$$Register, 31);
12465
    // $limit - $init
12466
    __ subl($limit$$Register, $init$$Register);
12467
    __ sbbl($limit_hi$$Register, $tmp$$Register);
12468
    // + ($stride - 1)
12469
    if (strd > 0) {
12470
      __ addl($limit$$Register, (strd - 1));
12471
      __ adcl($limit_hi$$Register, 0);
12472
      __ movl($tmp$$Register, strd);
12473
    } else {
12474
      __ addl($limit$$Register, (strd + 1));
12475
      __ adcl($limit_hi$$Register, -1);
12476
      __ lneg($limit_hi$$Register, $limit$$Register);
12477
      __ movl($tmp$$Register, -strd);
12478
    }
12479
    // signed division: (EAX:EDX) / pos_stride
12480
    __ idivl($tmp$$Register);
12481
    if (strd < 0) {
12482
      // restore sign
12483
      __ negl($tmp$$Register);
12484
    }
12485
    // (EAX) * stride
12486
    __ mull($tmp$$Register);
12487
    // + init (ignore upper bits)
12488
    __ addl($limit$$Register, $init$$Register);
12489
  %}
12490
  ins_pipe( pipe_slow );
12491
%}
12492

12493
// ============================================================================
12494
// Branch Instructions
12495
// Jump Table
12496
instruct jumpXtnd(rRegI switch_val) %{
12497
  match(Jump switch_val);
12498
  ins_cost(350);
12499
  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12500
  ins_encode %{
12501
    // Jump to Address(table_base + switch_reg)
12502
    Address index(noreg, $switch_val$$Register, Address::times_1);
12503
    __ jump(ArrayAddress($constantaddress, index), noreg);
12504
  %}
12505
  ins_pipe(pipe_jmp);
12506
%}
12507

12508
// Jump Direct - Label defines a relative address from JMP+1
12509
instruct jmpDir(label labl) %{
12510
  match(Goto);
12511
  effect(USE labl);
12512

12513
  ins_cost(300);
12514
  format %{ "JMP    $labl" %}
12515
  size(5);
12516
  ins_encode %{
12517
    Label* L = $labl$$label;
12518
    __ jmp(*L, false); // Always long jump
12519
  %}
12520
  ins_pipe( pipe_jmp );
12521
%}
12522

12523
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12524
instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12525
  match(If cop cr);
12526
  effect(USE labl);
12527

12528
  ins_cost(300);
12529
  format %{ "J$cop    $labl" %}
12530
  size(6);
12531
  ins_encode %{
12532
    Label* L = $labl$$label;
12533
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12534
  %}
12535
  ins_pipe( pipe_jcc );
12536
%}
12537

12538
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12539
instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12540
  match(CountedLoopEnd cop cr);
12541
  effect(USE labl);
12542

12543
  ins_cost(300);
12544
  format %{ "J$cop    $labl\t# Loop end" %}
12545
  size(6);
12546
  ins_encode %{
12547
    Label* L = $labl$$label;
12548
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12549
  %}
12550
  ins_pipe( pipe_jcc );
12551
%}
12552

12553
// Jump Direct Conditional - using unsigned comparison
12554
instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12555
  match(If cop cmp);
12556
  effect(USE labl);
12557

12558
  ins_cost(300);
12559
  format %{ "J$cop,u  $labl" %}
12560
  size(6);
12561
  ins_encode %{
12562
    Label* L = $labl$$label;
12563
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12564
  %}
12565
  ins_pipe(pipe_jcc);
12566
%}
12567

12568
instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12569
  match(If cop cmp);
12570
  effect(USE labl);
12571

12572
  ins_cost(200);
12573
  format %{ "J$cop,u  $labl" %}
12574
  size(6);
12575
  ins_encode %{
12576
    Label* L = $labl$$label;
12577
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12578
  %}
12579
  ins_pipe(pipe_jcc);
12580
%}
12581

12582
instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12583
  match(If cop cmp);
12584
  effect(USE labl);
12585

12586
  ins_cost(200);
12587
  format %{ $$template
12588
    if ($cop$$cmpcode == Assembler::notEqual) {
12589
      $$emit$$"JP,u   $labl\n\t"
12590
      $$emit$$"J$cop,u   $labl"
12591
    } else {
12592
      $$emit$$"JP,u   done\n\t"
12593
      $$emit$$"J$cop,u   $labl\n\t"
12594
      $$emit$$"done:"
12595
    }
12596
  %}
12597
  ins_encode %{
12598
    Label* l = $labl$$label;
12599
    if ($cop$$cmpcode == Assembler::notEqual) {
12600
      __ jcc(Assembler::parity, *l, false);
12601
      __ jcc(Assembler::notEqual, *l, false);
12602
    } else if ($cop$$cmpcode == Assembler::equal) {
12603
      Label done;
12604
      __ jccb(Assembler::parity, done);
12605
      __ jcc(Assembler::equal, *l, false);
12606
      __ bind(done);
12607
    } else {
12608
       ShouldNotReachHere();
12609
    }
12610
  %}
12611
  ins_pipe(pipe_jcc);
12612
%}
12613

12614
// ============================================================================
12615
// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12616
// array for an instance of the superklass.  Set a hidden internal cache on a
12617
// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12618
// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12619
instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12620
  match(Set result (PartialSubtypeCheck sub super));
12621
  effect( KILL rcx, KILL cr );
12622

12623
  ins_cost(1100);  // slightly larger than the next version
12624
  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12625
            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12626
            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12627
            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12628
            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12629
            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12630
            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12631
     "miss:\t" %}
12632

12633
  opcode(0x1); // Force a XOR of EDI
12634
  ins_encode( enc_PartialSubtypeCheck() );
12635
  ins_pipe( pipe_slow );
12636
%}
12637

12638
instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12639
  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12640
  effect( KILL rcx, KILL result );
12641

12642
  ins_cost(1000);
12643
  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12644
            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12645
            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12646
            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12647
            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12648
            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12649
     "miss:\t" %}
12650

12651
  opcode(0x0);  // No need to XOR EDI
12652
  ins_encode( enc_PartialSubtypeCheck() );
12653
  ins_pipe( pipe_slow );
12654
%}
12655

12656
// ============================================================================
12657
// Branch Instructions -- short offset versions
12658
//
12659
// These instructions are used to replace jumps of a long offset (the default
12660
// match) with jumps of a shorter offset.  These instructions are all tagged
12661
// with the ins_short_branch attribute, which causes the ADLC to suppress the
12662
// match rules in general matching.  Instead, the ADLC generates a conversion
12663
// method in the MachNode which can be used to do in-place replacement of the
12664
// long variant with the shorter variant.  The compiler will determine if a
12665
// branch can be taken by the is_short_branch_offset() predicate in the machine
12666
// specific code section of the file.
12667

12668
// Jump Direct - Label defines a relative address from JMP+1
12669
instruct jmpDir_short(label labl) %{
12670
  match(Goto);
12671
  effect(USE labl);
12672

12673
  ins_cost(300);
12674
  format %{ "JMP,s  $labl" %}
12675
  size(2);
12676
  ins_encode %{
12677
    Label* L = $labl$$label;
12678
    __ jmpb(*L);
12679
  %}
12680
  ins_pipe( pipe_jmp );
12681
  ins_short_branch(1);
12682
%}
12683

12684
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12685
instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12686
  match(If cop cr);
12687
  effect(USE labl);
12688

12689
  ins_cost(300);
12690
  format %{ "J$cop,s  $labl" %}
12691
  size(2);
12692
  ins_encode %{
12693
    Label* L = $labl$$label;
12694
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12695
  %}
12696
  ins_pipe( pipe_jcc );
12697
  ins_short_branch(1);
12698
%}
12699

12700
// Jump Direct Conditional - Label defines a relative address from Jcc+1
12701
instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12702
  match(CountedLoopEnd cop cr);
12703
  effect(USE labl);
12704

12705
  ins_cost(300);
12706
  format %{ "J$cop,s  $labl\t# Loop end" %}
12707
  size(2);
12708
  ins_encode %{
12709
    Label* L = $labl$$label;
12710
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12711
  %}
12712
  ins_pipe( pipe_jcc );
12713
  ins_short_branch(1);
12714
%}
12715

12716
// Jump Direct Conditional - using unsigned comparison
12717
instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12718
  match(If cop cmp);
12719
  effect(USE labl);
12720

12721
  ins_cost(300);
12722
  format %{ "J$cop,us $labl" %}
12723
  size(2);
12724
  ins_encode %{
12725
    Label* L = $labl$$label;
12726
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12727
  %}
12728
  ins_pipe( pipe_jcc );
12729
  ins_short_branch(1);
12730
%}
12731

12732
instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12733
  match(If cop cmp);
12734
  effect(USE labl);
12735

12736
  ins_cost(300);
12737
  format %{ "J$cop,us $labl" %}
12738
  size(2);
12739
  ins_encode %{
12740
    Label* L = $labl$$label;
12741
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12742
  %}
12743
  ins_pipe( pipe_jcc );
12744
  ins_short_branch(1);
12745
%}
12746

12747
instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12748
  match(If cop cmp);
12749
  effect(USE labl);
12750

12751
  ins_cost(300);
12752
  format %{ $$template
12753
    if ($cop$$cmpcode == Assembler::notEqual) {
12754
      $$emit$$"JP,u,s   $labl\n\t"
12755
      $$emit$$"J$cop,u,s   $labl"
12756
    } else {
12757
      $$emit$$"JP,u,s   done\n\t"
12758
      $$emit$$"J$cop,u,s  $labl\n\t"
12759
      $$emit$$"done:"
12760
    }
12761
  %}
12762
  size(4);
12763
  ins_encode %{
12764
    Label* l = $labl$$label;
12765
    if ($cop$$cmpcode == Assembler::notEqual) {
12766
      __ jccb(Assembler::parity, *l);
12767
      __ jccb(Assembler::notEqual, *l);
12768
    } else if ($cop$$cmpcode == Assembler::equal) {
12769
      Label done;
12770
      __ jccb(Assembler::parity, done);
12771
      __ jccb(Assembler::equal, *l);
12772
      __ bind(done);
12773
    } else {
12774
       ShouldNotReachHere();
12775
    }
12776
  %}
12777
  ins_pipe(pipe_jcc);
12778
  ins_short_branch(1);
12779
%}
12780

12781
// ============================================================================
12782
// Long Compare
12783
//
12784
// Currently we hold longs in 2 registers.  Comparing such values efficiently
12785
// is tricky.  The flavor of compare used depends on whether we are testing
12786
// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12787
// The GE test is the negated LT test.  The LE test can be had by commuting
12788
// the operands (yielding a GE test) and then negating; negate again for the
12789
// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12790
// NE test is negated from that.
12791

12792
// Due to a shortcoming in the ADLC, it mixes up expressions like:
12793
// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12794
// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12795
// are collapsed internally in the ADLC's dfa-gen code.  The match for
12796
// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12797
// foo match ends up with the wrong leaf.  One fix is to not match both
12798
// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12799
// both forms beat the trinary form of long-compare and both are very useful
12800
// on Intel which has so few registers.
12801

12802
// Manifest a CmpL result in an integer register.  Very painful.
12803
// This is the test to avoid.
12804
instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12805
  match(Set dst (CmpL3 src1 src2));
12806
  effect( KILL flags );
12807
  ins_cost(1000);
12808
  format %{ "XOR    $dst,$dst\n\t"
12809
            "CMP    $src1.hi,$src2.hi\n\t"
12810
            "JLT,s  m_one\n\t"
12811
            "JGT,s  p_one\n\t"
12812
            "CMP    $src1.lo,$src2.lo\n\t"
12813
            "JB,s   m_one\n\t"
12814
            "JEQ,s  done\n"
12815
    "p_one:\tINC    $dst\n\t"
12816
            "JMP,s  done\n"
12817
    "m_one:\tDEC    $dst\n"
12818
     "done:" %}
12819
  ins_encode %{
12820
    Label p_one, m_one, done;
12821
    __ xorptr($dst$$Register, $dst$$Register);
12822
    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12823
    __ jccb(Assembler::less,    m_one);
12824
    __ jccb(Assembler::greater, p_one);
12825
    __ cmpl($src1$$Register, $src2$$Register);
12826
    __ jccb(Assembler::below,   m_one);
12827
    __ jccb(Assembler::equal,   done);
12828
    __ bind(p_one);
12829
    __ incrementl($dst$$Register);
12830
    __ jmpb(done);
12831
    __ bind(m_one);
12832
    __ decrementl($dst$$Register);
12833
    __ bind(done);
12834
  %}
12835
  ins_pipe( pipe_slow );
12836
%}
12837

12838
//======
12839
// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12840
// compares.  Can be used for LE or GT compares by reversing arguments.
12841
// NOT GOOD FOR EQ/NE tests.
12842
instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12843
  match( Set flags (CmpL src zero ));
12844
  ins_cost(100);
12845
  format %{ "TEST   $src.hi,$src.hi" %}
12846
  opcode(0x85);
12847
  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12848
  ins_pipe( ialu_cr_reg_reg );
12849
%}
12850

12851
// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12852
// compares.  Can be used for LE or GT compares by reversing arguments.
12853
// NOT GOOD FOR EQ/NE tests.
12854
instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12855
  match( Set flags (CmpL src1 src2 ));
12856
  effect( TEMP tmp );
12857
  ins_cost(300);
12858
  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12859
            "MOV    $tmp,$src1.hi\n\t"
12860
            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12861
  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12862
  ins_pipe( ialu_cr_reg_reg );
12863
%}
12864

12865
// Long compares reg < zero/req OR reg >= zero/req.
12866
// Just a wrapper for a normal branch, plus the predicate test.
12867
instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12868
  match(If cmp flags);
12869
  effect(USE labl);
12870
  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12871
  expand %{
12872
    jmpCon(cmp,flags,labl);    // JLT or JGE...
12873
  %}
12874
%}
12875

12876
//======
12877
// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12878
// compares.  Can be used for LE or GT compares by reversing arguments.
12879
// NOT GOOD FOR EQ/NE tests.
12880
instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12881
  match(Set flags (CmpUL src zero));
12882
  ins_cost(100);
12883
  format %{ "TEST   $src.hi,$src.hi" %}
12884
  opcode(0x85);
12885
  ins_encode(OpcP, RegReg_Hi2(src, src));
12886
  ins_pipe(ialu_cr_reg_reg);
12887
%}
12888

12889
// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12890
// compares.  Can be used for LE or GT compares by reversing arguments.
12891
// NOT GOOD FOR EQ/NE tests.
12892
instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12893
  match(Set flags (CmpUL src1 src2));
12894
  effect(TEMP tmp);
12895
  ins_cost(300);
12896
  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12897
            "MOV    $tmp,$src1.hi\n\t"
12898
            "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12899
  ins_encode(long_cmp_flags2(src1, src2, tmp));
12900
  ins_pipe(ialu_cr_reg_reg);
12901
%}
12902

12903
// Unsigned long compares reg < zero/req OR reg >= zero/req.
12904
// Just a wrapper for a normal branch, plus the predicate test.
12905
instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12906
  match(If cmp flags);
12907
  effect(USE labl);
12908
  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12909
  expand %{
12910
    jmpCon(cmp, flags, labl);    // JLT or JGE...
12911
  %}
12912
%}
12913

12914
// Compare 2 longs and CMOVE longs.
12915
instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12916
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12917
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12918
  ins_cost(400);
12919
  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12920
            "CMOV$cmp $dst.hi,$src.hi" %}
12921
  opcode(0x0F,0x40);
12922
  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12923
  ins_pipe( pipe_cmov_reg_long );
12924
%}
12925

12926
instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12927
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12928
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12929
  ins_cost(500);
12930
  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12931
            "CMOV$cmp $dst.hi,$src.hi" %}
12932
  opcode(0x0F,0x40);
12933
  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12934
  ins_pipe( pipe_cmov_reg_long );
12935
%}
12936

12937
instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12938
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12939
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12940
  ins_cost(400);
12941
  expand %{
12942
    cmovLL_reg_LTGE(cmp, flags, dst, src);
12943
  %}
12944
%}
12945

12946
instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12947
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12948
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12949
  ins_cost(500);
12950
  expand %{
12951
    cmovLL_mem_LTGE(cmp, flags, dst, src);
12952
  %}
12953
%}
12954

12955
// Compare 2 longs and CMOVE ints.
12956
instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12957
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12958
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12959
  ins_cost(200);
12960
  format %{ "CMOV$cmp $dst,$src" %}
12961
  opcode(0x0F,0x40);
12962
  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12963
  ins_pipe( pipe_cmov_reg );
12964
%}
12965

12966
instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12967
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12968
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12969
  ins_cost(250);
12970
  format %{ "CMOV$cmp $dst,$src" %}
12971
  opcode(0x0F,0x40);
12972
  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12973
  ins_pipe( pipe_cmov_mem );
12974
%}
12975

12976
instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12977
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12978
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12979
  ins_cost(200);
12980
  expand %{
12981
    cmovII_reg_LTGE(cmp, flags, dst, src);
12982
  %}
12983
%}
12984

12985
instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12986
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12987
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12988
  ins_cost(250);
12989
  expand %{
12990
    cmovII_mem_LTGE(cmp, flags, dst, src);
12991
  %}
12992
%}
12993

12994
// Compare 2 longs and CMOVE ptrs.
12995
instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12996
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12997
  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12998
  ins_cost(200);
12999
  format %{ "CMOV$cmp $dst,$src" %}
13000
  opcode(0x0F,0x40);
13001
  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13002
  ins_pipe( pipe_cmov_reg );
13003
%}
13004

13005
// Compare 2 unsigned longs and CMOVE ptrs.
13006
instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13007
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13008
  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13009
  ins_cost(200);
13010
  expand %{
13011
    cmovPP_reg_LTGE(cmp,flags,dst,src);
13012
  %}
13013
%}
13014

13015
// Compare 2 longs and CMOVE doubles
13016
instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13017
  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13018
  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13019
  ins_cost(200);
13020
  expand %{
13021
    fcmovDPR_regS(cmp,flags,dst,src);
13022
  %}
13023
%}
13024

13025
// Compare 2 longs and CMOVE doubles
13026
instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13027
  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13028
  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13029
  ins_cost(200);
13030
  expand %{
13031
    fcmovD_regS(cmp,flags,dst,src);
13032
  %}
13033
%}
13034

13035
instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13036
  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13037
  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13038
  ins_cost(200);
13039
  expand %{
13040
    fcmovFPR_regS(cmp,flags,dst,src);
13041
  %}
13042
%}
13043

13044
instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13045
  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13046
  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13047
  ins_cost(200);
13048
  expand %{
13049
    fcmovF_regS(cmp,flags,dst,src);
13050
  %}
13051
%}
13052

13053
//======
13054
// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13055
instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13056
  match( Set flags (CmpL src zero ));
13057
  effect(TEMP tmp);
13058
  ins_cost(200);
13059
  format %{ "MOV    $tmp,$src.lo\n\t"
13060
            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13061
  ins_encode( long_cmp_flags0( src, tmp ) );
13062
  ins_pipe( ialu_reg_reg_long );
13063
%}
13064

13065
// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13066
instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13067
  match( Set flags (CmpL src1 src2 ));
13068
  ins_cost(200+300);
13069
  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13070
            "JNE,s  skip\n\t"
13071
            "CMP    $src1.hi,$src2.hi\n\t"
13072
     "skip:\t" %}
13073
  ins_encode( long_cmp_flags1( src1, src2 ) );
13074
  ins_pipe( ialu_cr_reg_reg );
13075
%}
13076

13077
// Long compare reg == zero/reg OR reg != zero/reg
13078
// Just a wrapper for a normal branch, plus the predicate test.
13079
instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13080
  match(If cmp flags);
13081
  effect(USE labl);
13082
  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13083
  expand %{
13084
    jmpCon(cmp,flags,labl);    // JEQ or JNE...
13085
  %}
13086
%}
13087

13088
//======
13089
// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13090
instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13091
  match(Set flags (CmpUL src zero));
13092
  effect(TEMP tmp);
13093
  ins_cost(200);
13094
  format %{ "MOV    $tmp,$src.lo\n\t"
13095
            "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13096
  ins_encode(long_cmp_flags0(src, tmp));
13097
  ins_pipe(ialu_reg_reg_long);
13098
%}
13099

13100
// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13101
instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13102
  match(Set flags (CmpUL src1 src2));
13103
  ins_cost(200+300);
13104
  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13105
            "JNE,s  skip\n\t"
13106
            "CMP    $src1.hi,$src2.hi\n\t"
13107
     "skip:\t" %}
13108
  ins_encode(long_cmp_flags1(src1, src2));
13109
  ins_pipe(ialu_cr_reg_reg);
13110
%}
13111

13112
// Unsigned long compare reg == zero/reg OR reg != zero/reg
13113
// Just a wrapper for a normal branch, plus the predicate test.
13114
instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13115
  match(If cmp flags);
13116
  effect(USE labl);
13117
  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13118
  expand %{
13119
    jmpCon(cmp, flags, labl);    // JEQ or JNE...
13120
  %}
13121
%}
13122

13123
// Compare 2 longs and CMOVE longs.
13124
instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13125
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13126
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13127
  ins_cost(400);
13128
  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13129
            "CMOV$cmp $dst.hi,$src.hi" %}
13130
  opcode(0x0F,0x40);
13131
  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13132
  ins_pipe( pipe_cmov_reg_long );
13133
%}
13134

13135
instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13136
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13137
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13138
  ins_cost(500);
13139
  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13140
            "CMOV$cmp $dst.hi,$src.hi" %}
13141
  opcode(0x0F,0x40);
13142
  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13143
  ins_pipe( pipe_cmov_reg_long );
13144
%}
13145

13146
// Compare 2 longs and CMOVE ints.
13147
instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13148
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13149
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13150
  ins_cost(200);
13151
  format %{ "CMOV$cmp $dst,$src" %}
13152
  opcode(0x0F,0x40);
13153
  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13154
  ins_pipe( pipe_cmov_reg );
13155
%}
13156

13157
instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13158
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13159
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13160
  ins_cost(250);
13161
  format %{ "CMOV$cmp $dst,$src" %}
13162
  opcode(0x0F,0x40);
13163
  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13164
  ins_pipe( pipe_cmov_mem );
13165
%}
13166

13167
instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13168
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13169
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13170
  ins_cost(200);
13171
  expand %{
13172
    cmovII_reg_EQNE(cmp, flags, dst, src);
13173
  %}
13174
%}
13175

13176
instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13177
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13178
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13179
  ins_cost(250);
13180
  expand %{
13181
    cmovII_mem_EQNE(cmp, flags, dst, src);
13182
  %}
13183
%}
13184

13185
// Compare 2 longs and CMOVE ptrs.
13186
instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13187
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13188
  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13189
  ins_cost(200);
13190
  format %{ "CMOV$cmp $dst,$src" %}
13191
  opcode(0x0F,0x40);
13192
  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13193
  ins_pipe( pipe_cmov_reg );
13194
%}
13195

13196
// Compare 2 unsigned longs and CMOVE ptrs.
13197
instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13198
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13199
  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13200
  ins_cost(200);
13201
  expand %{
13202
    cmovPP_reg_EQNE(cmp,flags,dst,src);
13203
  %}
13204
%}
13205

13206
// Compare 2 longs and CMOVE doubles
13207
instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13208
  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13209
  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13210
  ins_cost(200);
13211
  expand %{
13212
    fcmovDPR_regS(cmp,flags,dst,src);
13213
  %}
13214
%}
13215

13216
// Compare 2 longs and CMOVE doubles
13217
instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13218
  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13219
  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13220
  ins_cost(200);
13221
  expand %{
13222
    fcmovD_regS(cmp,flags,dst,src);
13223
  %}
13224
%}
13225

13226
instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13227
  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13228
  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13229
  ins_cost(200);
13230
  expand %{
13231
    fcmovFPR_regS(cmp,flags,dst,src);
13232
  %}
13233
%}
13234

13235
instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13236
  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13237
  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13238
  ins_cost(200);
13239
  expand %{
13240
    fcmovF_regS(cmp,flags,dst,src);
13241
  %}
13242
%}
13243

13244
//======
13245
// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13246
// Same as cmpL_reg_flags_LEGT except must negate src
13247
instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13248
  match( Set flags (CmpL src zero ));
13249
  effect( TEMP tmp );
13250
  ins_cost(300);
13251
  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13252
            "CMP    $tmp,$src.lo\n\t"
13253
            "SBB    $tmp,$src.hi\n\t" %}
13254
  ins_encode( long_cmp_flags3(src, tmp) );
13255
  ins_pipe( ialu_reg_reg_long );
13256
%}
13257

13258
// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13259
// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13260
// requires a commuted test to get the same result.
13261
instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13262
  match( Set flags (CmpL src1 src2 ));
13263
  effect( TEMP tmp );
13264
  ins_cost(300);
13265
  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13266
            "MOV    $tmp,$src2.hi\n\t"
13267
            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13268
  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13269
  ins_pipe( ialu_cr_reg_reg );
13270
%}
13271

13272
// Long compares reg < zero/req OR reg >= zero/req.
13273
// Just a wrapper for a normal branch, plus the predicate test
13274
instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13275
  match(If cmp flags);
13276
  effect(USE labl);
13277
  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13278
  ins_cost(300);
13279
  expand %{
13280
    jmpCon(cmp,flags,labl);    // JGT or JLE...
13281
  %}
13282
%}
13283

13284
//======
13285
// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13286
// Same as cmpUL_reg_flags_LEGT except must negate src
13287
instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13288
  match(Set flags (CmpUL src zero));
13289
  effect(TEMP tmp);
13290
  ins_cost(300);
13291
  format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13292
            "CMP    $tmp,$src.lo\n\t"
13293
            "SBB    $tmp,$src.hi\n\t" %}
13294
  ins_encode(long_cmp_flags3(src, tmp));
13295
  ins_pipe(ialu_reg_reg_long);
13296
%}
13297

13298
// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13299
// Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13300
// requires a commuted test to get the same result.
13301
instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13302
  match(Set flags (CmpUL src1 src2));
13303
  effect(TEMP tmp);
13304
  ins_cost(300);
13305
  format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13306
            "MOV    $tmp,$src2.hi\n\t"
13307
            "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13308
  ins_encode(long_cmp_flags2( src2, src1, tmp));
13309
  ins_pipe(ialu_cr_reg_reg);
13310
%}
13311

13312
// Unsigned long compares reg < zero/req OR reg >= zero/req.
13313
// Just a wrapper for a normal branch, plus the predicate test
13314
instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13315
  match(If cmp flags);
13316
  effect(USE labl);
13317
  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13318
  ins_cost(300);
13319
  expand %{
13320
    jmpCon(cmp, flags, labl);    // JGT or JLE...
13321
  %}
13322
%}
13323

13324
// Compare 2 longs and CMOVE longs.
13325
instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13326
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13327
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13328
  ins_cost(400);
13329
  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13330
            "CMOV$cmp $dst.hi,$src.hi" %}
13331
  opcode(0x0F,0x40);
13332
  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13333
  ins_pipe( pipe_cmov_reg_long );
13334
%}
13335

13336
instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13337
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13338
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13339
  ins_cost(500);
13340
  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13341
            "CMOV$cmp $dst.hi,$src.hi+4" %}
13342
  opcode(0x0F,0x40);
13343
  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13344
  ins_pipe( pipe_cmov_reg_long );
13345
%}
13346

13347
instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13348
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13349
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13350
  ins_cost(400);
13351
  expand %{
13352
    cmovLL_reg_LEGT(cmp, flags, dst, src);
13353
  %}
13354
%}
13355

13356
instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13357
  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13358
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13359
  ins_cost(500);
13360
  expand %{
13361
    cmovLL_mem_LEGT(cmp, flags, dst, src);
13362
  %}
13363
%}
13364

13365
// Compare 2 longs and CMOVE ints.
13366
instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13367
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13368
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13369
  ins_cost(200);
13370
  format %{ "CMOV$cmp $dst,$src" %}
13371
  opcode(0x0F,0x40);
13372
  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13373
  ins_pipe( pipe_cmov_reg );
13374
%}
13375

13376
instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13377
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13378
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13379
  ins_cost(250);
13380
  format %{ "CMOV$cmp $dst,$src" %}
13381
  opcode(0x0F,0x40);
13382
  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13383
  ins_pipe( pipe_cmov_mem );
13384
%}
13385

13386
instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13387
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13388
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13389
  ins_cost(200);
13390
  expand %{
13391
    cmovII_reg_LEGT(cmp, flags, dst, src);
13392
  %}
13393
%}
13394

13395
instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13396
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13397
  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13398
  ins_cost(250);
13399
  expand %{
13400
    cmovII_mem_LEGT(cmp, flags, dst, src);
13401
  %}
13402
%}
13403

13404
// Compare 2 longs and CMOVE ptrs.
13405
instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13406
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13407
  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13408
  ins_cost(200);
13409
  format %{ "CMOV$cmp $dst,$src" %}
13410
  opcode(0x0F,0x40);
13411
  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13412
  ins_pipe( pipe_cmov_reg );
13413
%}
13414

13415
// Compare 2 unsigned longs and CMOVE ptrs.
13416
instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13417
  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13418
  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13419
  ins_cost(200);
13420
  expand %{
13421
    cmovPP_reg_LEGT(cmp,flags,dst,src);
13422
  %}
13423
%}
13424

13425
// Compare 2 longs and CMOVE doubles
13426
instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13427
  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13428
  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13429
  ins_cost(200);
13430
  expand %{
13431
    fcmovDPR_regS(cmp,flags,dst,src);
13432
  %}
13433
%}
13434

13435
// Compare 2 longs and CMOVE doubles
13436
instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13437
  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13438
  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13439
  ins_cost(200);
13440
  expand %{
13441
    fcmovD_regS(cmp,flags,dst,src);
13442
  %}
13443
%}
13444

13445
instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13446
  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13447
  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13448
  ins_cost(200);
13449
  expand %{
13450
    fcmovFPR_regS(cmp,flags,dst,src);
13451
  %}
13452
%}
13453

13454

13455
instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13456
  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13457
  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13458
  ins_cost(200);
13459
  expand %{
13460
    fcmovF_regS(cmp,flags,dst,src);
13461
  %}
13462
%}
13463

13464

13465
// ============================================================================
13466
// Procedure Call/Return Instructions
13467
// Call Java Static Instruction
13468
// Note: If this code changes, the corresponding ret_addr_offset() and
13469
//       compute_padding() functions will have to be adjusted.
13470
instruct CallStaticJavaDirect(method meth) %{
13471
  match(CallStaticJava);
13472
  effect(USE meth);
13473

13474
  ins_cost(300);
13475
  format %{ "CALL,static " %}
13476
  opcode(0xE8); /* E8 cd */
13477
  ins_encode( pre_call_resets,
13478
              Java_Static_Call( meth ),
13479
              call_epilog,
13480
              post_call_FPU );
13481
  ins_pipe( pipe_slow );
13482
  ins_alignment(4);
13483
%}
13484

13485
// Call Java Dynamic Instruction
13486
// Note: If this code changes, the corresponding ret_addr_offset() and
13487
//       compute_padding() functions will have to be adjusted.
13488
instruct CallDynamicJavaDirect(method meth) %{
13489
  match(CallDynamicJava);
13490
  effect(USE meth);
13491

13492
  ins_cost(300);
13493
  format %{ "MOV    EAX,(oop)-1\n\t"
13494
            "CALL,dynamic" %}
13495
  opcode(0xE8); /* E8 cd */
13496
  ins_encode( pre_call_resets,
13497
              Java_Dynamic_Call( meth ),
13498
              call_epilog,
13499
              post_call_FPU );
13500
  ins_pipe( pipe_slow );
13501
  ins_alignment(4);
13502
%}
13503

13504
// Call Runtime Instruction
13505
instruct CallRuntimeDirect(method meth) %{
13506
  match(CallRuntime );
13507
  effect(USE meth);
13508

13509
  ins_cost(300);
13510
  format %{ "CALL,runtime " %}
13511
  opcode(0xE8); /* E8 cd */
13512
  // Use FFREEs to clear entries in float stack
13513
  ins_encode( pre_call_resets,
13514
              FFree_Float_Stack_All,
13515
              Java_To_Runtime( meth ),
13516
              post_call_FPU );
13517
  ins_pipe( pipe_slow );
13518
%}
13519

13520
// Call runtime without safepoint
13521
instruct CallLeafDirect(method meth) %{
13522
  match(CallLeaf);
13523
  effect(USE meth);
13524

13525
  ins_cost(300);
13526
  format %{ "CALL_LEAF,runtime " %}
13527
  opcode(0xE8); /* E8 cd */
13528
  ins_encode( pre_call_resets,
13529
              FFree_Float_Stack_All,
13530
              Java_To_Runtime( meth ),
13531
              Verify_FPU_For_Leaf, post_call_FPU );
13532
  ins_pipe( pipe_slow );
13533
%}
13534

13535
instruct CallLeafNoFPDirect(method meth) %{
13536
  match(CallLeafNoFP);
13537
  effect(USE meth);
13538

13539
  ins_cost(300);
13540
  format %{ "CALL_LEAF_NOFP,runtime " %}
13541
  opcode(0xE8); /* E8 cd */
13542
  ins_encode(pre_call_resets, Java_To_Runtime(meth));
13543
  ins_pipe( pipe_slow );
13544
%}
13545

13546

13547
// Return Instruction
13548
// Remove the return address & jump to it.
13549
instruct Ret() %{
13550
  match(Return);
13551
  format %{ "RET" %}
13552
  opcode(0xC3);
13553
  ins_encode(OpcP);
13554
  ins_pipe( pipe_jmp );
13555
%}
13556

13557
// Tail Call; Jump from runtime stub to Java code.
13558
// Also known as an 'interprocedural jump'.
13559
// Target of jump will eventually return to caller.
13560
// TailJump below removes the return address.
13561
// Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13562
// emitted just above the TailCall which has reset ebp to the caller state.
13563
instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13564
  match(TailCall jump_target method_ptr);
13565
  ins_cost(300);
13566
  format %{ "JMP    $jump_target \t# EBX holds method" %}
13567
  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13568
  ins_encode( OpcP, RegOpc(jump_target) );
13569
  ins_pipe( pipe_jmp );
13570
%}
13571

13572

13573
// Tail Jump; remove the return address; jump to target.
13574
// TailCall above leaves the return address around.
13575
instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13576
  match( TailJump jump_target ex_oop );
13577
  ins_cost(300);
13578
  format %{ "POP    EDX\t# pop return address into dummy\n\t"
13579
            "JMP    $jump_target " %}
13580
  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13581
  ins_encode( enc_pop_rdx,
13582
              OpcP, RegOpc(jump_target) );
13583
  ins_pipe( pipe_jmp );
13584
%}
13585

13586
// Create exception oop: created by stack-crawling runtime code.
13587
// Created exception is now available to this handler, and is setup
13588
// just prior to jumping to this handler.  No code emitted.
13589
instruct CreateException( eAXRegP ex_oop )
13590
%{
13591
  match(Set ex_oop (CreateEx));
13592

13593
  size(0);
13594
  // use the following format syntax
13595
  format %{ "# exception oop is in EAX; no code emitted" %}
13596
  ins_encode();
13597
  ins_pipe( empty );
13598
%}
13599

13600

13601
// Rethrow exception:
13602
// The exception oop will come in the first argument position.
13603
// Then JUMP (not call) to the rethrow stub code.
13604
instruct RethrowException()
13605
%{
13606
  match(Rethrow);
13607

13608
  // use the following format syntax
13609
  format %{ "JMP    rethrow_stub" %}
13610
  ins_encode(enc_rethrow);
13611
  ins_pipe( pipe_jmp );
13612
%}
13613

13614
// inlined locking and unlocking
13615

13616
instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13617
  predicate(LockingMode != LM_LIGHTWEIGHT);
13618
  match(Set cr (FastLock object box));
13619
  effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13620
  ins_cost(300);
13621
  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13622
  ins_encode %{
13623
    __ get_thread($thread$$Register);
13624
    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13625
                 $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13626
  %}
13627
  ins_pipe(pipe_slow);
13628
%}
13629

13630
instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13631
  predicate(LockingMode != LM_LIGHTWEIGHT);
13632
  match(Set cr (FastUnlock object box));
13633
  effect(TEMP tmp, USE_KILL box);
13634
  ins_cost(300);
13635
  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13636
  ins_encode %{
13637
    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13638
  %}
13639
  ins_pipe(pipe_slow);
13640
%}
13641

13642
instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13643
  predicate(LockingMode == LM_LIGHTWEIGHT);
13644
  match(Set cr (FastLock object box));
13645
  effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13646
  ins_cost(300);
13647
  format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13648
  ins_encode %{
13649
    __ get_thread($thread$$Register);
13650
    __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13651
  %}
13652
  ins_pipe(pipe_slow);
13653
%}
13654

13655
instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13656
  predicate(LockingMode == LM_LIGHTWEIGHT);
13657
  match(Set cr (FastUnlock object eax_reg));
13658
  effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13659
  ins_cost(300);
13660
  format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13661
  ins_encode %{
13662
    __ get_thread($thread$$Register);
13663
    __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13664
  %}
13665
  ins_pipe(pipe_slow);
13666
%}
13667

13668
instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13669
  predicate(Matcher::vector_length(n) <= 32);
13670
  match(Set dst (MaskAll src));
13671
  format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13672
  ins_encode %{
13673
    int mask_len = Matcher::vector_length(this);
13674
    __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13675
  %}
13676
  ins_pipe( pipe_slow );
13677
%}
13678

13679
instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13680
  predicate(Matcher::vector_length(n) > 32);
13681
  match(Set dst (MaskAll src));
13682
  effect(TEMP ktmp);
13683
  format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13684
  ins_encode %{
13685
    int mask_len = Matcher::vector_length(this);
13686
    __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13687
  %}
13688
  ins_pipe( pipe_slow );
13689
%}
13690

13691
instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13692
  predicate(Matcher::vector_length(n) > 32);
13693
  match(Set dst (MaskAll src));
13694
  effect(TEMP ktmp);
13695
  format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13696
  ins_encode %{
13697
    int mask_len = Matcher::vector_length(this);
13698
    __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13699
  %}
13700
  ins_pipe( pipe_slow );
13701
%}
13702

13703
// ============================================================================
13704
// Safepoint Instruction
13705
instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13706
  match(SafePoint poll);
13707
  effect(KILL cr, USE poll);
13708

13709
  format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13710
  ins_cost(125);
13711
  // EBP would need size(3)
13712
  size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13713
  ins_encode %{
13714
    __ set_inst_mark();
13715
    __ relocate(relocInfo::poll_type);
13716
    __ clear_inst_mark();
13717
    address pre_pc = __ pc();
13718
    __ testl(rax, Address($poll$$Register, 0));
13719
    address post_pc = __ pc();
13720
    guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13721
  %}
13722
  ins_pipe(ialu_reg_mem);
13723
%}
13724

13725

13726
// ============================================================================
13727
// This name is KNOWN by the ADLC and cannot be changed.
13728
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13729
// for this guy.
13730
instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13731
  match(Set dst (ThreadLocal));
13732
  effect(DEF dst, KILL cr);
13733

13734
  format %{ "MOV    $dst, Thread::current()" %}
13735
  ins_encode %{
13736
    Register dstReg = as_Register($dst$$reg);
13737
    __ get_thread(dstReg);
13738
  %}
13739
  ins_pipe( ialu_reg_fat );
13740
%}
13741

13742

13743

13744
//----------PEEPHOLE RULES-----------------------------------------------------
13745
// These must follow all instruction definitions as they use the names
13746
// defined in the instructions definitions.
13747
//
13748
// peepmatch ( root_instr_name [preceding_instruction]* );
13749
//
13750
// peepconstraint %{
13751
// (instruction_number.operand_name relational_op instruction_number.operand_name
13752
//  [, ...] );
13753
// // instruction numbers are zero-based using left to right order in peepmatch
13754
//
13755
// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13756
// // provide an instruction_number.operand_name for each operand that appears
13757
// // in the replacement instruction's match rule
13758
//
13759
// ---------VM FLAGS---------------------------------------------------------
13760
//
13761
// All peephole optimizations can be turned off using -XX:-OptoPeephole
13762
//
13763
// Each peephole rule is given an identifying number starting with zero and
13764
// increasing by one in the order seen by the parser.  An individual peephole
13765
// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13766
// on the command-line.
13767
//
13768
// ---------CURRENT LIMITATIONS----------------------------------------------
13769
//
13770
// Only match adjacent instructions in same basic block
13771
// Only equality constraints
13772
// Only constraints between operands, not (0.dest_reg == EAX_enc)
13773
// Only one replacement instruction
13774
//
13775
// ---------EXAMPLE----------------------------------------------------------
13776
//
13777
// // pertinent parts of existing instructions in architecture description
13778
// instruct movI(rRegI dst, rRegI src) %{
13779
//   match(Set dst (CopyI src));
13780
// %}
13781
//
13782
// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13783
//   match(Set dst (AddI dst src));
13784
//   effect(KILL cr);
13785
// %}
13786
//
13787
// // Change (inc mov) to lea
13788
// peephole %{
13789
//   // increment preceded by register-register move
13790
//   peepmatch ( incI_eReg movI );
13791
//   // require that the destination register of the increment
13792
//   // match the destination register of the move
13793
//   peepconstraint ( 0.dst == 1.dst );
13794
//   // construct a replacement instruction that sets
13795
//   // the destination to ( move's source register + one )
13796
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13797
// %}
13798
//
13799
// Implementation no longer uses movX instructions since
13800
// machine-independent system no longer uses CopyX nodes.
13801
//
13802
// peephole %{
13803
//   peepmatch ( incI_eReg movI );
13804
//   peepconstraint ( 0.dst == 1.dst );
13805
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13806
// %}
13807
//
13808
// peephole %{
13809
//   peepmatch ( decI_eReg movI );
13810
//   peepconstraint ( 0.dst == 1.dst );
13811
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13812
// %}
13813
//
13814
// peephole %{
13815
//   peepmatch ( addI_eReg_imm movI );
13816
//   peepconstraint ( 0.dst == 1.dst );
13817
//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13818
// %}
13819
//
13820
// peephole %{
13821
//   peepmatch ( addP_eReg_imm movP );
13822
//   peepconstraint ( 0.dst == 1.dst );
13823
//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13824
// %}
13825

13826
// // Change load of spilled value to only a spill
13827
// instruct storeI(memory mem, rRegI src) %{
13828
//   match(Set mem (StoreI mem src));
13829
// %}
13830
//
13831
// instruct loadI(rRegI dst, memory mem) %{
13832
//   match(Set dst (LoadI mem));
13833
// %}
13834
//
13835
peephole %{
13836
  peepmatch ( loadI storeI );
13837
  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13838
  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13839
%}
13840

13841
//----------SMARTSPILL RULES---------------------------------------------------
13842
// These must follow all instruction definitions as they use the names
13843
// defined in the instructions definitions.
13844

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.