jdk

Форк
0
/
macroArrayCopy.cpp 
1424 строки · 60.3 Кб
1
/*
2
 * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 *
23
 */
24

25
#include "precompiled.hpp"
26
#include "gc/shared/barrierSet.hpp"
27
#include "gc/shared/tlab_globals.hpp"
28
#include "opto/arraycopynode.hpp"
29
#include "oops/objArrayKlass.hpp"
30
#include "opto/convertnode.hpp"
31
#include "opto/vectornode.hpp"
32
#include "opto/graphKit.hpp"
33
#include "opto/macro.hpp"
34
#include "opto/runtime.hpp"
35
#include "opto/castnode.hpp"
36
#include "runtime/stubRoutines.hpp"
37
#include "utilities/align.hpp"
38
#include "utilities/powerOfTwo.hpp"
39

40
void PhaseMacroExpand::insert_mem_bar(Node** ctrl, Node** mem, int opcode, Node* precedent) {
41
  MemBarNode* mb = MemBarNode::make(C, opcode, Compile::AliasIdxBot, precedent);
42
  mb->init_req(TypeFunc::Control, *ctrl);
43
  mb->init_req(TypeFunc::Memory, *mem);
44
  transform_later(mb);
45
  *ctrl = new ProjNode(mb,TypeFunc::Control);
46
  transform_later(*ctrl);
47
  Node* mem_proj = new ProjNode(mb,TypeFunc::Memory);
48
  transform_later(mem_proj);
49
  *mem = mem_proj;
50
}
51

52
Node* PhaseMacroExpand::array_element_address(Node* ary, Node* idx, BasicType elembt) {
53
  uint shift  = exact_log2(type2aelembytes(elembt));
54
  uint header = arrayOopDesc::base_offset_in_bytes(elembt);
55
  Node* base =  basic_plus_adr(ary, header);
56
#ifdef _LP64
57
  // see comment in GraphKit::array_element_address
58
  int index_max = max_jint - 1;  // array size is max_jint, index is one less
59
  const TypeLong* lidxtype = TypeLong::make(CONST64(0), index_max, Type::WidenMax);
60
  idx = transform_later( new ConvI2LNode(idx, lidxtype) );
61
#endif
62
  Node* scale = new LShiftXNode(idx, intcon(shift));
63
  transform_later(scale);
64
  return basic_plus_adr(ary, base, scale);
65
}
66

67
Node* PhaseMacroExpand::ConvI2L(Node* offset) {
68
  return transform_later(new ConvI2LNode(offset));
69
}
70

71
Node* PhaseMacroExpand::make_leaf_call(Node* ctrl, Node* mem,
72
                                       const TypeFunc* call_type, address call_addr,
73
                                       const char* call_name,
74
                                       const TypePtr* adr_type,
75
                                       Node* parm0, Node* parm1,
76
                                       Node* parm2, Node* parm3,
77
                                       Node* parm4, Node* parm5,
78
                                       Node* parm6, Node* parm7) {
79
  Node* call = new CallLeafNoFPNode(call_type, call_addr, call_name, adr_type);
80
  call->init_req(TypeFunc::Control, ctrl);
81
  call->init_req(TypeFunc::I_O    , top());
82
  call->init_req(TypeFunc::Memory , mem);
83
  call->init_req(TypeFunc::ReturnAdr, top());
84
  call->init_req(TypeFunc::FramePtr, top());
85

86
  // Hook each parm in order.  Stop looking at the first null.
87
  if (parm0 != nullptr) { call->init_req(TypeFunc::Parms+0, parm0);
88
  if (parm1 != nullptr) { call->init_req(TypeFunc::Parms+1, parm1);
89
  if (parm2 != nullptr) { call->init_req(TypeFunc::Parms+2, parm2);
90
  if (parm3 != nullptr) { call->init_req(TypeFunc::Parms+3, parm3);
91
  if (parm4 != nullptr) { call->init_req(TypeFunc::Parms+4, parm4);
92
  if (parm5 != nullptr) { call->init_req(TypeFunc::Parms+5, parm5);
93
  if (parm6 != nullptr) { call->init_req(TypeFunc::Parms+6, parm6);
94
  if (parm7 != nullptr) { call->init_req(TypeFunc::Parms+7, parm7);
95
    /* close each nested if ===> */  } } } } } } } }
96
  assert(call->in(call->req()-1) != nullptr, "must initialize all parms");
97

98
  return call;
99
}
100

101

102
//------------------------------generate_guard---------------------------
103
// Helper function for generating guarded fast-slow graph structures.
104
// The given 'test', if true, guards a slow path.  If the test fails
105
// then a fast path can be taken.  (We generally hope it fails.)
106
// In all cases, GraphKit::control() is updated to the fast path.
107
// The returned value represents the control for the slow path.
108
// The return value is never 'top'; it is either a valid control
109
// or null if it is obvious that the slow path can never be taken.
110
// Also, if region and the slow control are not null, the slow edge
111
// is appended to the region.
112
Node* PhaseMacroExpand::generate_guard(Node** ctrl, Node* test, RegionNode* region, float true_prob) {
113
  if ((*ctrl)->is_top()) {
114
    // Already short circuited.
115
    return nullptr;
116
  }
117
  // Build an if node and its projections.
118
  // If test is true we take the slow path, which we assume is uncommon.
119
  if (_igvn.type(test) == TypeInt::ZERO) {
120
    // The slow branch is never taken.  No need to build this guard.
121
    return nullptr;
122
  }
123

124
  IfNode* iff = new IfNode(*ctrl, test, true_prob, COUNT_UNKNOWN);
125
  transform_later(iff);
126

127
  Node* if_slow = new IfTrueNode(iff);
128
  transform_later(if_slow);
129

130
  if (region != nullptr) {
131
    region->add_req(if_slow);
132
  }
133

134
  Node* if_fast = new IfFalseNode(iff);
135
  transform_later(if_fast);
136

137
  *ctrl = if_fast;
138

139
  return if_slow;
140
}
141

142
inline Node* PhaseMacroExpand::generate_slow_guard(Node** ctrl, Node* test, RegionNode* region) {
143
  return generate_guard(ctrl, test, region, PROB_UNLIKELY_MAG(3));
144
}
145

146
void PhaseMacroExpand::generate_negative_guard(Node** ctrl, Node* index, RegionNode* region) {
147
  if ((*ctrl)->is_top())
148
    return;                // already stopped
149
  if (_igvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
150
    return;                // index is already adequately typed
151
  Node* cmp_lt = new CmpINode(index, intcon(0));
152
  transform_later(cmp_lt);
153
  Node* bol_lt = new BoolNode(cmp_lt, BoolTest::lt);
154
  transform_later(bol_lt);
155
  generate_guard(ctrl, bol_lt, region, PROB_MIN);
156
}
157

158
void PhaseMacroExpand::generate_limit_guard(Node** ctrl, Node* offset, Node* subseq_length, Node* array_length, RegionNode* region) {
159
  if ((*ctrl)->is_top())
160
    return;                // already stopped
161
  bool zero_offset = _igvn.type(offset) == TypeInt::ZERO;
162
  if (zero_offset && subseq_length->eqv_uncast(array_length))
163
    return;                // common case of whole-array copy
164
  Node* last = subseq_length;
165
  if (!zero_offset) {            // last += offset
166
    last = new AddINode(last, offset);
167
    transform_later(last);
168
  }
169
  Node* cmp_lt = new CmpUNode(array_length, last);
170
  transform_later(cmp_lt);
171
  Node* bol_lt = new BoolNode(cmp_lt, BoolTest::lt);
172
  transform_later(bol_lt);
173
  generate_guard(ctrl, bol_lt, region, PROB_MIN);
174
}
175

176
//
177
// Partial in-lining handling for smaller conjoint/disjoint array copies having
178
// length(in bytes) less than ArrayOperationPartialInlineSize.
179
//  if (length <= ArrayOperationPartialInlineSize) {
180
//    partial_inlining_block:
181
//      mask = Mask_Gen
182
//      vload = LoadVectorMasked src , mask
183
//      StoreVectorMasked dst, mask, vload
184
//  } else {
185
//    stub_block:
186
//      callstub array_copy
187
//  }
188
//  exit_block:
189
//    Phi = label partial_inlining_block:mem , label stub_block:mem (filled by caller)
190
//    mem = MergeMem (Phi)
191
//    control = stub_block
192
//
193
//  Exit_block and associated phi(memory) are partially initialized for partial_in-lining_block
194
//  edges. Remaining edges for exit_block coming from stub_block are connected by the caller
195
//  post stub nodes creation.
196
//
197

198
void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode** mem, const TypePtr* adr_type,
199
                                                       RegionNode** exit_block, Node** result_memory, Node* length,
200
                                                       Node* src_start, Node* dst_start, BasicType type) {
201
  const TypePtr *src_adr_type = _igvn.type(src_start)->isa_ptr();
202
  Node* inline_block = nullptr;
203
  Node* stub_block = nullptr;
204

205
  int const_len = -1;
206
  const TypeInt* lty = nullptr;
207
  uint shift  = exact_log2(type2aelembytes(type));
208
  if (length->Opcode() == Op_ConvI2L) {
209
    lty = _igvn.type(length->in(1))->isa_int();
210
  } else  {
211
    lty = _igvn.type(length)->isa_int();
212
  }
213
  if (lty && lty->is_con()) {
214
    const_len = lty->get_con() << shift;
215
  }
216

217
  // Return if copy length is greater than partial inline size limit or
218
  // target does not supports masked load/stores.
219
  int lane_count = ArrayCopyNode::get_partial_inline_vector_lane_count(type, const_len);
220
  if ( const_len > ArrayOperationPartialInlineSize ||
221
      !Matcher::match_rule_supported_vector(Op_LoadVectorMasked, lane_count, type)  ||
222
      !Matcher::match_rule_supported_vector(Op_StoreVectorMasked, lane_count, type) ||
223
      !Matcher::match_rule_supported_vector(Op_VectorMaskGen, lane_count, type)) {
224
    return;
225
  }
226

227
  int inline_limit = ArrayOperationPartialInlineSize / type2aelembytes(type);
228
  Node* casted_length = new CastLLNode(*ctrl, length, TypeLong::make(0, inline_limit, Type::WidenMin));
229
  transform_later(casted_length);
230
  Node* copy_bytes = new LShiftXNode(length, intcon(shift));
231
  transform_later(copy_bytes);
232

233
  Node* cmp_le = new CmpULNode(copy_bytes, longcon(ArrayOperationPartialInlineSize));
234
  transform_later(cmp_le);
235
  Node* bol_le = new BoolNode(cmp_le, BoolTest::le);
236
  transform_later(bol_le);
237
  inline_block  = generate_guard(ctrl, bol_le, nullptr, PROB_FAIR);
238
  stub_block = *ctrl;
239

240
  Node* mask_gen = VectorMaskGenNode::make(casted_length, type);
241
  transform_later(mask_gen);
242

243
  unsigned vec_size = lane_count *  type2aelembytes(type);
244
  if (C->max_vector_size() < vec_size) {
245
    C->set_max_vector_size(vec_size);
246
  }
247

248
  const TypeVect * vt = TypeVect::make(type, lane_count);
249
  Node* mm = (*mem)->memory_at(C->get_alias_index(src_adr_type));
250
  Node* masked_load = new LoadVectorMaskedNode(inline_block, mm, src_start,
251
                                               src_adr_type, vt, mask_gen);
252
  transform_later(masked_load);
253

254
  mm = (*mem)->memory_at(C->get_alias_index(adr_type));
255
  Node* masked_store = new StoreVectorMaskedNode(inline_block, mm, dst_start,
256
                                                 masked_load, adr_type, mask_gen);
257
  transform_later(masked_store);
258

259
  // Convergence region for inline_block and stub_block.
260
  *exit_block = new RegionNode(3);
261
  transform_later(*exit_block);
262
  (*exit_block)->init_req(1, inline_block);
263
  *result_memory = new PhiNode(*exit_block, Type::MEMORY, adr_type);
264
  transform_later(*result_memory);
265
  (*result_memory)->init_req(1, masked_store);
266

267
  *ctrl = stub_block;
268
}
269

270

271
Node* PhaseMacroExpand::generate_nonpositive_guard(Node** ctrl, Node* index, bool never_negative) {
272
  if ((*ctrl)->is_top())  return nullptr;
273

274
  if (_igvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
275
    return nullptr;                // index is already adequately typed
276
  Node* cmp_le = new CmpINode(index, intcon(0));
277
  transform_later(cmp_le);
278
  BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
279
  Node* bol_le = new BoolNode(cmp_le, le_or_eq);
280
  transform_later(bol_le);
281
  Node* is_notp = generate_guard(ctrl, bol_le, nullptr, PROB_MIN);
282

283
  return is_notp;
284
}
285

286
void PhaseMacroExpand::finish_arraycopy_call(Node* call, Node** ctrl, MergeMemNode** mem, const TypePtr* adr_type) {
287
  transform_later(call);
288

289
  *ctrl = new ProjNode(call,TypeFunc::Control);
290
  transform_later(*ctrl);
291
  Node* newmem = new ProjNode(call, TypeFunc::Memory);
292
  transform_later(newmem);
293

294
  uint alias_idx = C->get_alias_index(adr_type);
295
  if (alias_idx != Compile::AliasIdxBot) {
296
    *mem = MergeMemNode::make(*mem);
297
    (*mem)->set_memory_at(alias_idx, newmem);
298
  } else {
299
    *mem = MergeMemNode::make(newmem);
300
  }
301
  transform_later(*mem);
302
}
303

304
address PhaseMacroExpand::basictype2arraycopy(BasicType t,
305
                                              Node* src_offset,
306
                                              Node* dest_offset,
307
                                              bool disjoint_bases,
308
                                              const char* &name,
309
                                              bool dest_uninitialized) {
310
  const TypeInt* src_offset_inttype  = _igvn.find_int_type(src_offset);
311
  const TypeInt* dest_offset_inttype = _igvn.find_int_type(dest_offset);
312

313
  bool aligned = false;
314
  bool disjoint = disjoint_bases;
315

316
  // if the offsets are the same, we can treat the memory regions as
317
  // disjoint, because either the memory regions are in different arrays,
318
  // or they are identical (which we can treat as disjoint.)  We can also
319
  // treat a copy with a destination index  less that the source index
320
  // as disjoint since a low->high copy will work correctly in this case.
321
  if (src_offset_inttype != nullptr && src_offset_inttype->is_con() &&
322
      dest_offset_inttype != nullptr && dest_offset_inttype->is_con()) {
323
    // both indices are constants
324
    int s_offs = src_offset_inttype->get_con();
325
    int d_offs = dest_offset_inttype->get_con();
326
    int element_size = type2aelembytes(t);
327
    aligned = ((arrayOopDesc::base_offset_in_bytes(t) + (uint)s_offs * element_size) % HeapWordSize == 0) &&
328
              ((arrayOopDesc::base_offset_in_bytes(t) + (uint)d_offs * element_size) % HeapWordSize == 0);
329
    if (s_offs >= d_offs)  disjoint = true;
330
  } else if (src_offset == dest_offset && src_offset != nullptr) {
331
    // This can occur if the offsets are identical non-constants.
332
    disjoint = true;
333
  }
334

335
  return StubRoutines::select_arraycopy_function(t, aligned, disjoint, name, dest_uninitialized);
336
}
337

338
#define XTOP LP64_ONLY(COMMA top())
339

340
// Generate an optimized call to arraycopy.
341
// Caller must guard against non-arrays.
342
// Caller must determine a common array basic-type for both arrays.
343
// Caller must validate offsets against array bounds.
344
// The slow_region has already collected guard failure paths
345
// (such as out of bounds length or non-conformable array types).
346
// The generated code has this shape, in general:
347
//
348
//     if (length == 0)  return   // via zero_path
349
//     slowval = -1
350
//     if (types unknown) {
351
//       slowval = call generic copy loop
352
//       if (slowval == 0)  return  // via checked_path
353
//     } else if (indexes in bounds) {
354
//       if ((is object array) && !(array type check)) {
355
//         slowval = call checked copy loop
356
//         if (slowval == 0)  return  // via checked_path
357
//       } else {
358
//         call bulk copy loop
359
//         return  // via fast_path
360
//       }
361
//     }
362
//     // adjust params for remaining work:
363
//     if (slowval != -1) {
364
//       n = -1^slowval; src_offset += n; dest_offset += n; length -= n
365
//     }
366
//   slow_region:
367
//     call slow arraycopy(src, src_offset, dest, dest_offset, length)
368
//     return  // via slow_call_path
369
//
370
// This routine is used from several intrinsics:  System.arraycopy,
371
// Object.clone (the array subcase), and Arrays.copyOf[Range].
372
//
373
Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode* alloc,
374
                                           Node** ctrl, MergeMemNode* mem, Node** io,
375
                                           const TypePtr* adr_type,
376
                                           BasicType basic_elem_type,
377
                                           Node* src,  Node* src_offset,
378
                                           Node* dest, Node* dest_offset,
379
                                           Node* copy_length,
380
                                           bool disjoint_bases,
381
                                           bool length_never_negative,
382
                                           RegionNode* slow_region) {
383
  if (slow_region == nullptr) {
384
    slow_region = new RegionNode(1);
385
    transform_later(slow_region);
386
  }
387

388
  Node* original_dest = dest;
389
  bool  dest_needs_zeroing   = false;
390
  bool  acopy_to_uninitialized = false;
391

392
  // See if this is the initialization of a newly-allocated array.
393
  // If so, we will take responsibility here for initializing it to zero.
394
  // (Note:  Because tightly_coupled_allocation performs checks on the
395
  // out-edges of the dest, we need to avoid making derived pointers
396
  // from it until we have checked its uses.)
397
  if (ReduceBulkZeroing
398
      && !(UseTLAB && ZeroTLAB) // pointless if already zeroed
399
      && basic_elem_type != T_CONFLICT // avoid corner case
400
      && !src->eqv_uncast(dest)
401
      && alloc != nullptr
402
      && _igvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0) {
403
    assert(ac->is_alloc_tightly_coupled(), "sanity");
404
    // acopy to uninitialized tightly coupled allocations
405
    // needs zeroing outside the copy range
406
    // and the acopy itself will be to uninitialized memory
407
    acopy_to_uninitialized = true;
408
    if (alloc->maybe_set_complete(&_igvn)) {
409
      // "You break it, you buy it."
410
      InitializeNode* init = alloc->initialization();
411
      assert(init->is_complete(), "we just did this");
412
      init->set_complete_with_arraycopy();
413
      assert(dest->is_CheckCastPP(), "sanity");
414
      assert(dest->in(0)->in(0) == init, "dest pinned");
415
      adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
416
      // From this point on, every exit path is responsible for
417
      // initializing any non-copied parts of the object to zero.
418
      // Also, if this flag is set we make sure that arraycopy interacts properly
419
      // with G1, eliding pre-barriers. See CR 6627983.
420
      dest_needs_zeroing = true;
421
    } else {
422
      // dest_need_zeroing = false;
423
    }
424
  } else {
425
    // No zeroing elimination needed here.
426
    alloc                  = nullptr;
427
    acopy_to_uninitialized = false;
428
    //original_dest        = dest;
429
    //dest_needs_zeroing   = false;
430
  }
431

432
  uint alias_idx = C->get_alias_index(adr_type);
433

434
  // Results are placed here:
435
  enum { fast_path        = 1,  // normal void-returning assembly stub
436
         checked_path     = 2,  // special assembly stub with cleanup
437
         slow_call_path   = 3,  // something went wrong; call the VM
438
         zero_path        = 4,  // bypass when length of copy is zero
439
         bcopy_path       = 5,  // copy primitive array by 64-bit blocks
440
         PATH_LIMIT       = 6
441
  };
442
  RegionNode* result_region = new RegionNode(PATH_LIMIT);
443
  PhiNode*    result_i_o    = new PhiNode(result_region, Type::ABIO);
444
  PhiNode*    result_memory = new PhiNode(result_region, Type::MEMORY, adr_type);
445
  assert(adr_type != TypePtr::BOTTOM, "must be RawMem or a T[] slice");
446
  transform_later(result_region);
447
  transform_later(result_i_o);
448
  transform_later(result_memory);
449

450
  // The slow_control path:
451
  Node* slow_control;
452
  Node* slow_i_o = *io;
453
  Node* slow_mem = mem->memory_at(alias_idx);
454
  DEBUG_ONLY(slow_control = (Node*) badAddress);
455

456
  // Checked control path:
457
  Node* checked_control = top();
458
  Node* checked_mem     = nullptr;
459
  Node* checked_i_o     = nullptr;
460
  Node* checked_value   = nullptr;
461

462
  if (basic_elem_type == T_CONFLICT) {
463
    assert(!dest_needs_zeroing, "");
464
    Node* cv = generate_generic_arraycopy(ctrl, &mem,
465
                                          adr_type,
466
                                          src, src_offset, dest, dest_offset,
467
                                          copy_length, acopy_to_uninitialized);
468
    if (cv == nullptr)  cv = intcon(-1);  // failure (no stub available)
469
    checked_control = *ctrl;
470
    checked_i_o     = *io;
471
    checked_mem     = mem->memory_at(alias_idx);
472
    checked_value   = cv;
473
    *ctrl = top();
474
  }
475

476
  Node* not_pos = generate_nonpositive_guard(ctrl, copy_length, length_never_negative);
477
  if (not_pos != nullptr) {
478
    Node* local_ctrl = not_pos, *local_io = *io;
479
    MergeMemNode* local_mem = MergeMemNode::make(mem);
480
    transform_later(local_mem);
481

482
    // (6) length must not be negative.
483
    if (!length_never_negative) {
484
      generate_negative_guard(&local_ctrl, copy_length, slow_region);
485
    }
486

487
    // copy_length is 0.
488
    if (dest_needs_zeroing) {
489
      assert(!local_ctrl->is_top(), "no ctrl?");
490
      Node* dest_length = alloc->in(AllocateNode::ALength);
491
      if (copy_length->eqv_uncast(dest_length)
492
          || _igvn.find_int_con(dest_length, 1) <= 0) {
493
        // There is no zeroing to do. No need for a secondary raw memory barrier.
494
      } else {
495
        // Clear the whole thing since there are no source elements to copy.
496
        generate_clear_array(local_ctrl, local_mem,
497
                             adr_type, dest, basic_elem_type,
498
                             intcon(0), nullptr,
499
                             alloc->in(AllocateNode::AllocSize));
500
        // Use a secondary InitializeNode as raw memory barrier.
501
        // Currently it is needed only on this path since other
502
        // paths have stub or runtime calls as raw memory barriers.
503
        MemBarNode* mb = MemBarNode::make(C, Op_Initialize,
504
                                          Compile::AliasIdxRaw,
505
                                          top());
506
        transform_later(mb);
507
        mb->set_req(TypeFunc::Control,local_ctrl);
508
        mb->set_req(TypeFunc::Memory, local_mem->memory_at(Compile::AliasIdxRaw));
509
        local_ctrl = transform_later(new ProjNode(mb, TypeFunc::Control));
510
        local_mem->set_memory_at(Compile::AliasIdxRaw, transform_later(new ProjNode(mb, TypeFunc::Memory)));
511

512
        InitializeNode* init = mb->as_Initialize();
513
        init->set_complete(&_igvn);  // (there is no corresponding AllocateNode)
514
      }
515
    }
516

517
    // Present the results of the fast call.
518
    result_region->init_req(zero_path, local_ctrl);
519
    result_i_o   ->init_req(zero_path, local_io);
520
    result_memory->init_req(zero_path, local_mem->memory_at(alias_idx));
521
  }
522

523
  if (!(*ctrl)->is_top() && dest_needs_zeroing) {
524
    // We have to initialize the *uncopied* part of the array to zero.
525
    // The copy destination is the slice dest[off..off+len].  The other slices
526
    // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
527
    Node* dest_size   = alloc->in(AllocateNode::AllocSize);
528
    Node* dest_length = alloc->in(AllocateNode::ALength);
529
    Node* dest_tail   = transform_later( new AddINode(dest_offset, copy_length));
530

531
    // If there is a head section that needs zeroing, do it now.
532
    if (_igvn.find_int_con(dest_offset, -1) != 0) {
533
      generate_clear_array(*ctrl, mem,
534
                           adr_type, dest, basic_elem_type,
535
                           intcon(0), dest_offset,
536
                           nullptr);
537
    }
538

539
    // Next, perform a dynamic check on the tail length.
540
    // It is often zero, and we can win big if we prove this.
541
    // There are two wins:  Avoid generating the ClearArray
542
    // with its attendant messy index arithmetic, and upgrade
543
    // the copy to a more hardware-friendly word size of 64 bits.
544
    Node* tail_ctl = nullptr;
545
    if (!(*ctrl)->is_top() && !dest_tail->eqv_uncast(dest_length)) {
546
      Node* cmp_lt   = transform_later( new CmpINode(dest_tail, dest_length) );
547
      Node* bol_lt   = transform_later( new BoolNode(cmp_lt, BoolTest::lt) );
548
      tail_ctl = generate_slow_guard(ctrl, bol_lt, nullptr);
549
      assert(tail_ctl != nullptr || !(*ctrl)->is_top(), "must be an outcome");
550
    }
551

552
    // At this point, let's assume there is no tail.
553
    if (!(*ctrl)->is_top() && alloc != nullptr && basic_elem_type != T_OBJECT) {
554
      // There is no tail.  Try an upgrade to a 64-bit copy.
555
      bool didit = false;
556
      {
557
        Node* local_ctrl = *ctrl, *local_io = *io;
558
        MergeMemNode* local_mem = MergeMemNode::make(mem);
559
        transform_later(local_mem);
560

561
        didit = generate_block_arraycopy(&local_ctrl, &local_mem, local_io,
562
                                         adr_type, basic_elem_type, alloc,
563
                                         src, src_offset, dest, dest_offset,
564
                                         dest_size, acopy_to_uninitialized);
565
        if (didit) {
566
          // Present the results of the block-copying fast call.
567
          result_region->init_req(bcopy_path, local_ctrl);
568
          result_i_o   ->init_req(bcopy_path, local_io);
569
          result_memory->init_req(bcopy_path, local_mem->memory_at(alias_idx));
570
        }
571
      }
572
      if (didit) {
573
        *ctrl = top();     // no regular fast path
574
      }
575
    }
576

577
    // Clear the tail, if any.
578
    if (tail_ctl != nullptr) {
579
      Node* notail_ctl = (*ctrl)->is_top() ? nullptr : *ctrl;
580
      *ctrl = tail_ctl;
581
      if (notail_ctl == nullptr) {
582
        generate_clear_array(*ctrl, mem,
583
                             adr_type, dest, basic_elem_type,
584
                             dest_tail, nullptr,
585
                             dest_size);
586
      } else {
587
        // Make a local merge.
588
        Node* done_ctl = transform_later(new RegionNode(3));
589
        Node* done_mem = transform_later(new PhiNode(done_ctl, Type::MEMORY, adr_type));
590
        done_ctl->init_req(1, notail_ctl);
591
        done_mem->init_req(1, mem->memory_at(alias_idx));
592
        generate_clear_array(*ctrl, mem,
593
                             adr_type, dest, basic_elem_type,
594
                             dest_tail, nullptr,
595
                             dest_size);
596
        done_ctl->init_req(2, *ctrl);
597
        done_mem->init_req(2, mem->memory_at(alias_idx));
598
        *ctrl = done_ctl;
599
        mem->set_memory_at(alias_idx, done_mem);
600
      }
601
    }
602
  }
603

604
  BasicType copy_type = basic_elem_type;
605
  assert(basic_elem_type != T_ARRAY, "caller must fix this");
606
  if (!(*ctrl)->is_top() && copy_type == T_OBJECT) {
607
    // If src and dest have compatible element types, we can copy bits.
608
    // Types S[] and D[] are compatible if D is a supertype of S.
609
    //
610
    // If they are not, we will use checked_oop_disjoint_arraycopy,
611
    // which performs a fast optimistic per-oop check, and backs off
612
    // further to JVM_ArrayCopy on the first per-oop check that fails.
613
    // (Actually, we don't move raw bits only; the GC requires card marks.)
614

615
    // We don't need a subtype check for validated copies and Object[].clone()
616
    bool skip_subtype_check = ac->is_arraycopy_validated() || ac->is_copyof_validated() ||
617
                              ac->is_copyofrange_validated() || ac->is_clone_oop_array();
618
    if (!skip_subtype_check) {
619
      // Get the klass* for both src and dest
620
      Node* src_klass  = ac->in(ArrayCopyNode::SrcKlass);
621
      Node* dest_klass = ac->in(ArrayCopyNode::DestKlass);
622

623
      assert(src_klass != nullptr && dest_klass != nullptr, "should have klasses");
624

625
      // Generate the subtype check.
626
      // This might fold up statically, or then again it might not.
627
      //
628
      // Non-static example:  Copying List<String>.elements to a new String[].
629
      // The backing store for a List<String> is always an Object[],
630
      // but its elements are always type String, if the generic types
631
      // are correct at the source level.
632
      //
633
      // Test S[] against D[], not S against D, because (probably)
634
      // the secondary supertype cache is less busy for S[] than S.
635
      // This usually only matters when D is an interface.
636
      Node* not_subtype_ctrl = Phase::gen_subtype_check(src_klass, dest_klass, ctrl, mem, _igvn, nullptr, -1);
637
      // Plug failing path into checked_oop_disjoint_arraycopy
638
      if (not_subtype_ctrl != top()) {
639
        Node* local_ctrl = not_subtype_ctrl;
640
        MergeMemNode* local_mem = MergeMemNode::make(mem);
641
        transform_later(local_mem);
642

643
        // (At this point we can assume disjoint_bases, since types differ.)
644
        int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
645
        Node* p1 = basic_plus_adr(dest_klass, ek_offset);
646
        Node* n1 = LoadKlassNode::make(_igvn, nullptr, C->immutable_memory(), p1, TypeRawPtr::BOTTOM);
647
        Node* dest_elem_klass = transform_later(n1);
648
        Node* cv = generate_checkcast_arraycopy(&local_ctrl, &local_mem,
649
                                                adr_type,
650
                                                dest_elem_klass,
651
                                                src, src_offset, dest, dest_offset,
652
                                                ConvI2X(copy_length), acopy_to_uninitialized);
653
        if (cv == nullptr)  cv = intcon(-1);  // failure (no stub available)
654
        checked_control = local_ctrl;
655
        checked_i_o     = *io;
656
        checked_mem     = local_mem->memory_at(alias_idx);
657
        checked_value   = cv;
658
      }
659
    }
660
    // At this point we know we do not need type checks on oop stores.
661

662
    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
663
    if (!bs->array_copy_requires_gc_barriers(alloc != nullptr, copy_type, false, false, BarrierSetC2::Expansion)) {
664
      // If we do not need gc barriers, copy using the jint or jlong stub.
665
      copy_type = LP64_ONLY(UseCompressedOops ? T_INT : T_LONG) NOT_LP64(T_INT);
666
      assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type),
667
             "sizes agree");
668
    }
669
  }
670

671
  bool is_partial_array_copy = false;
672
  if (!(*ctrl)->is_top()) {
673
    // Generate the fast path, if possible.
674
    Node* local_ctrl = *ctrl;
675
    MergeMemNode* local_mem = MergeMemNode::make(mem);
676
    transform_later(local_mem);
677
    is_partial_array_copy = generate_unchecked_arraycopy(&local_ctrl, &local_mem,
678
                                                         adr_type, copy_type, disjoint_bases,
679
                                                         src, src_offset, dest, dest_offset,
680
                                                         ConvI2X(copy_length), acopy_to_uninitialized);
681

682
    // Present the results of the fast call.
683
    result_region->init_req(fast_path, local_ctrl);
684
    result_i_o   ->init_req(fast_path, *io);
685
    result_memory->init_req(fast_path, local_mem->memory_at(alias_idx));
686
  }
687

688
  // Here are all the slow paths up to this point, in one bundle:
689
  assert(slow_region != nullptr, "allocated on entry");
690
  slow_control = slow_region;
691
  DEBUG_ONLY(slow_region = (RegionNode*)badAddress);
692

693
  *ctrl = checked_control;
694
  if (!(*ctrl)->is_top()) {
695
    // Clean up after the checked call.
696
    // The returned value is either 0 or -1^K,
697
    // where K = number of partially transferred array elements.
698
    Node* cmp = new CmpINode(checked_value, intcon(0));
699
    transform_later(cmp);
700
    Node* bol = new BoolNode(cmp, BoolTest::eq);
701
    transform_later(bol);
702
    IfNode* iff = new IfNode(*ctrl, bol, PROB_MAX, COUNT_UNKNOWN);
703
    transform_later(iff);
704

705
    // If it is 0, we are done, so transfer to the end.
706
    Node* checks_done = new IfTrueNode(iff);
707
    transform_later(checks_done);
708
    result_region->init_req(checked_path, checks_done);
709
    result_i_o   ->init_req(checked_path, checked_i_o);
710
    result_memory->init_req(checked_path, checked_mem);
711

712
    // If it is not zero, merge into the slow call.
713
    *ctrl = new IfFalseNode(iff);
714
    transform_later(*ctrl);
715
    RegionNode* slow_reg2 = new RegionNode(3);
716
    PhiNode*    slow_i_o2 = new PhiNode(slow_reg2, Type::ABIO);
717
    PhiNode*    slow_mem2 = new PhiNode(slow_reg2, Type::MEMORY, adr_type);
718
    transform_later(slow_reg2);
719
    transform_later(slow_i_o2);
720
    transform_later(slow_mem2);
721
    slow_reg2  ->init_req(1, slow_control);
722
    slow_i_o2  ->init_req(1, slow_i_o);
723
    slow_mem2  ->init_req(1, slow_mem);
724
    slow_reg2  ->init_req(2, *ctrl);
725
    slow_i_o2  ->init_req(2, checked_i_o);
726
    slow_mem2  ->init_req(2, checked_mem);
727

728
    slow_control = slow_reg2;
729
    slow_i_o     = slow_i_o2;
730
    slow_mem     = slow_mem2;
731

732
    if (alloc != nullptr) {
733
      // We'll restart from the very beginning, after zeroing the whole thing.
734
      // This can cause double writes, but that's OK since dest is brand new.
735
      // So we ignore the low 31 bits of the value returned from the stub.
736
    } else {
737
      // We must continue the copy exactly where it failed, or else
738
      // another thread might see the wrong number of writes to dest.
739
      Node* checked_offset = new XorINode(checked_value, intcon(-1));
740
      Node* slow_offset    = new PhiNode(slow_reg2, TypeInt::INT);
741
      transform_later(checked_offset);
742
      transform_later(slow_offset);
743
      slow_offset->init_req(1, intcon(0));
744
      slow_offset->init_req(2, checked_offset);
745

746
      // Adjust the arguments by the conditionally incoming offset.
747
      Node* src_off_plus  = new AddINode(src_offset,  slow_offset);
748
      transform_later(src_off_plus);
749
      Node* dest_off_plus = new AddINode(dest_offset, slow_offset);
750
      transform_later(dest_off_plus);
751
      Node* length_minus  = new SubINode(copy_length, slow_offset);
752
      transform_later(length_minus);
753

754
      // Tweak the node variables to adjust the code produced below:
755
      src_offset  = src_off_plus;
756
      dest_offset = dest_off_plus;
757
      copy_length = length_minus;
758
    }
759
  }
760
  *ctrl = slow_control;
761
  if (!(*ctrl)->is_top()) {
762
    Node* local_ctrl = *ctrl, *local_io = slow_i_o;
763
    MergeMemNode* local_mem = MergeMemNode::make(mem);
764
    transform_later(local_mem);
765

766
    // Generate the slow path, if needed.
767
    local_mem->set_memory_at(alias_idx, slow_mem);
768

769
    if (dest_needs_zeroing) {
770
      generate_clear_array(local_ctrl, local_mem,
771
                           adr_type, dest, basic_elem_type,
772
                           intcon(0), nullptr,
773
                           alloc->in(AllocateNode::AllocSize));
774
    }
775

776
    local_mem = generate_slow_arraycopy(ac,
777
                                        &local_ctrl, local_mem, &local_io,
778
                                        adr_type,
779
                                        src, src_offset, dest, dest_offset,
780
                                        copy_length, /*dest_uninitialized*/false);
781

782
    result_region->init_req(slow_call_path, local_ctrl);
783
    result_i_o   ->init_req(slow_call_path, local_io);
784
    result_memory->init_req(slow_call_path, local_mem->memory_at(alias_idx));
785
  } else {
786
    ShouldNotReachHere(); // no call to generate_slow_arraycopy:
787
                          // projections were not extracted
788
  }
789

790
  // Remove unused edges.
791
  for (uint i = 1; i < result_region->req(); i++) {
792
    if (result_region->in(i) == nullptr) {
793
      result_region->init_req(i, top());
794
    }
795
  }
796

797
  // Finished; return the combined state.
798
  *ctrl = result_region;
799
  *io = result_i_o;
800
  mem->set_memory_at(alias_idx, result_memory);
801

802
  // mem no longer guaranteed to stay a MergeMemNode
803
  Node* out_mem = mem;
804
  DEBUG_ONLY(mem = nullptr);
805

806
  // The memory edges above are precise in order to model effects around
807
  // array copies accurately to allow value numbering of field loads around
808
  // arraycopy.  Such field loads, both before and after, are common in Java
809
  // collections and similar classes involving header/array data structures.
810
  //
811
  // But with low number of register or when some registers are used or killed
812
  // by arraycopy calls it causes registers spilling on stack. See 6544710.
813
  // The next memory barrier is added to avoid it. If the arraycopy can be
814
  // optimized away (which it can, sometimes) then we can manually remove
815
  // the membar also.
816
  //
817
  // Do not let reads from the cloned object float above the arraycopy.
818
  if (alloc != nullptr && !alloc->initialization()->does_not_escape()) {
819
    // Do not let stores that initialize this object be reordered with
820
    // a subsequent store that would make this object accessible by
821
    // other threads.
822
    insert_mem_bar(ctrl, &out_mem, Op_MemBarStoreStore);
823
  } else {
824
    insert_mem_bar(ctrl, &out_mem, Op_MemBarCPUOrder);
825
  }
826

827
  if (is_partial_array_copy) {
828
    assert((*ctrl)->is_Proj(), "MemBar control projection");
829
    assert((*ctrl)->in(0)->isa_MemBar(), "MemBar node");
830
    (*ctrl)->in(0)->isa_MemBar()->set_trailing_partial_array_copy();
831
  }
832

833
  _igvn.replace_node(_callprojs.fallthrough_memproj, out_mem);
834
  if (_callprojs.fallthrough_ioproj != nullptr) {
835
    _igvn.replace_node(_callprojs.fallthrough_ioproj, *io);
836
  }
837
  _igvn.replace_node(_callprojs.fallthrough_catchproj, *ctrl);
838

839
#ifdef ASSERT
840
  const TypeOopPtr* dest_t = _igvn.type(dest)->is_oopptr();
841
  if (dest_t->is_known_instance() && !is_partial_array_copy) {
842
    ArrayCopyNode* ac = nullptr;
843
    assert(ArrayCopyNode::may_modify(dest_t, (*ctrl)->in(0)->as_MemBar(), &_igvn, ac), "dependency on arraycopy lost");
844
    assert(ac == nullptr, "no arraycopy anymore");
845
  }
846
#endif
847

848
  return out_mem;
849
}
850

851
// Helper for initialization of arrays, creating a ClearArray.
852
// It writes zero bits in [start..end), within the body of an array object.
853
// The memory effects are all chained onto the 'adr_type' alias category.
854
//
855
// Since the object is otherwise uninitialized, we are free
856
// to put a little "slop" around the edges of the cleared area,
857
// as long as it does not go back into the array's header,
858
// or beyond the array end within the heap.
859
//
860
// The lower edge can be rounded down to the nearest jint and the
861
// upper edge can be rounded up to the nearest MinObjAlignmentInBytes.
862
//
863
// Arguments:
864
//   adr_type           memory slice where writes are generated
865
//   dest               oop of the destination array
866
//   basic_elem_type    element type of the destination
867
//   slice_idx          array index of first element to store
868
//   slice_len          number of elements to store (or null)
869
//   dest_size          total size in bytes of the array object
870
//
871
// Exactly one of slice_len or dest_size must be non-null.
872
// If dest_size is non-null, zeroing extends to the end of the object.
873
// If slice_len is non-null, the slice_idx value must be a constant.
874
void PhaseMacroExpand::generate_clear_array(Node* ctrl, MergeMemNode* merge_mem,
875
                                            const TypePtr* adr_type,
876
                                            Node* dest,
877
                                            BasicType basic_elem_type,
878
                                            Node* slice_idx,
879
                                            Node* slice_len,
880
                                            Node* dest_size) {
881
  // one or the other but not both of slice_len and dest_size:
882
  assert((slice_len != nullptr? 1: 0) + (dest_size != nullptr? 1: 0) == 1, "");
883
  if (slice_len == nullptr)  slice_len = top();
884
  if (dest_size == nullptr)  dest_size = top();
885

886
  uint alias_idx = C->get_alias_index(adr_type);
887

888
  // operate on this memory slice:
889
  Node* mem = merge_mem->memory_at(alias_idx); // memory slice to operate on
890

891
  // scaling and rounding of indexes:
892
  int scale = exact_log2(type2aelembytes(basic_elem_type));
893
  int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
894
  int clear_low = (-1 << scale) & (BytesPerInt  - 1);
895
  int bump_bit  = (-1 << scale) & BytesPerInt;
896

897
  // determine constant starts and ends
898
  const intptr_t BIG_NEG = -128;
899
  assert(BIG_NEG + 2*abase < 0, "neg enough");
900
  intptr_t slice_idx_con = (intptr_t) _igvn.find_int_con(slice_idx, BIG_NEG);
901
  intptr_t slice_len_con = (intptr_t) _igvn.find_int_con(slice_len, BIG_NEG);
902
  if (slice_len_con == 0) {
903
    return;                     // nothing to do here
904
  }
905
  intptr_t start_con = (abase + (slice_idx_con << scale)) & ~clear_low;
906
  intptr_t end_con   = _igvn.find_intptr_t_con(dest_size, -1);
907
  if (slice_idx_con >= 0 && slice_len_con >= 0) {
908
    assert(end_con < 0, "not two cons");
909
    end_con = align_up(abase + ((slice_idx_con + slice_len_con) << scale),
910
                       BytesPerLong);
911
  }
912

913
  if (start_con >= 0 && end_con >= 0) {
914
    // Constant start and end.  Simple.
915
    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
916
                                       start_con, end_con, &_igvn);
917
  } else if (start_con >= 0 && dest_size != top()) {
918
    // Constant start, pre-rounded end after the tail of the array.
919
    Node* end = dest_size;
920
    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
921
                                       start_con, end, &_igvn);
922
  } else if (start_con >= 0 && slice_len != top()) {
923
    // Constant start, non-constant end.  End needs rounding up.
924
    // End offset = round_up(abase + ((slice_idx_con + slice_len) << scale), 8)
925
    intptr_t end_base  = abase + (slice_idx_con << scale);
926
    int      end_round = (-1 << scale) & (BytesPerLong  - 1);
927
    Node*    end       = ConvI2X(slice_len);
928
    if (scale != 0)
929
      end = transform_later(new LShiftXNode(end, intcon(scale) ));
930
    end_base += end_round;
931
    end = transform_later(new AddXNode(end, MakeConX(end_base)) );
932
    end = transform_later(new AndXNode(end, MakeConX(~end_round)) );
933
    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
934
                                       start_con, end, &_igvn);
935
  } else if (start_con < 0 && dest_size != top()) {
936
    // Non-constant start, pre-rounded end after the tail of the array.
937
    // This is almost certainly a "round-to-end" operation.
938
    Node* start = slice_idx;
939
    start = ConvI2X(start);
940
    if (scale != 0)
941
      start = transform_later(new LShiftXNode( start, intcon(scale) ));
942
    start = transform_later(new AddXNode(start, MakeConX(abase)) );
943
    if ((bump_bit | clear_low) != 0) {
944
      int to_clear = (bump_bit | clear_low);
945
      // Align up mod 8, then store a jint zero unconditionally
946
      // just before the mod-8 boundary.
947
      if (((abase + bump_bit) & ~to_clear) - bump_bit
948
          < arrayOopDesc::length_offset_in_bytes() + BytesPerInt) {
949
        bump_bit = 0;
950
        assert((abase & to_clear) == 0, "array base must be long-aligned");
951
      } else {
952
        // Bump 'start' up to (or past) the next jint boundary:
953
        start = transform_later( new AddXNode(start, MakeConX(bump_bit)) );
954
        assert((abase & clear_low) == 0, "array base must be int-aligned");
955
      }
956
      // Round bumped 'start' down to jlong boundary in body of array.
957
      start = transform_later(new AndXNode(start, MakeConX(~to_clear)) );
958
      if (bump_bit != 0) {
959
        // Store a zero to the immediately preceding jint:
960
        Node* x1 = transform_later(new AddXNode(start, MakeConX(-bump_bit)) );
961
        Node* p1 = basic_plus_adr(dest, x1);
962
        mem = StoreNode::make(_igvn, ctrl, mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered);
963
        mem = transform_later(mem);
964
      }
965
    }
966
    Node* end = dest_size; // pre-rounded
967
    mem = ClearArrayNode::clear_memory(ctrl, mem, dest,
968
                                       start, end, &_igvn);
969
  } else {
970
    // Non-constant start, unrounded non-constant end.
971
    // (Nobody zeroes a random midsection of an array using this routine.)
972
    ShouldNotReachHere();       // fix caller
973
  }
974

975
  // Done.
976
  merge_mem->set_memory_at(alias_idx, mem);
977
}
978

979
bool PhaseMacroExpand::generate_block_arraycopy(Node** ctrl, MergeMemNode** mem, Node* io,
980
                                                const TypePtr* adr_type,
981
                                                BasicType basic_elem_type,
982
                                                AllocateNode* alloc,
983
                                                Node* src,  Node* src_offset,
984
                                                Node* dest, Node* dest_offset,
985
                                                Node* dest_size, bool dest_uninitialized) {
986
  // See if there is an advantage from block transfer.
987
  int scale = exact_log2(type2aelembytes(basic_elem_type));
988
  if (scale >= LogBytesPerLong)
989
    return false;               // it is already a block transfer
990

991
  // Look at the alignment of the starting offsets.
992
  int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
993

994
  intptr_t src_off_con  = (intptr_t) _igvn.find_int_con(src_offset, -1);
995
  intptr_t dest_off_con = (intptr_t) _igvn.find_int_con(dest_offset, -1);
996
  if (src_off_con < 0 || dest_off_con < 0) {
997
    // At present, we can only understand constants.
998
    return false;
999
  }
1000

1001
  intptr_t src_off  = abase + (src_off_con  << scale);
1002
  intptr_t dest_off = abase + (dest_off_con << scale);
1003

1004
  if (((src_off | dest_off) & (BytesPerLong-1)) != 0) {
1005
    // Non-aligned; too bad.
1006
    // One more chance:  Pick off an initial 32-bit word.
1007
    // This is a common case, since abase can be odd mod 8.
1008
    if (((src_off | dest_off) & (BytesPerLong-1)) == BytesPerInt &&
1009
        ((src_off ^ dest_off) & (BytesPerLong-1)) == 0) {
1010
      Node* sptr = basic_plus_adr(src,  src_off);
1011
      Node* dptr = basic_plus_adr(dest, dest_off);
1012
      const TypePtr* s_adr_type = _igvn.type(sptr)->is_ptr();
1013
      assert(s_adr_type->isa_aryptr(), "impossible slice");
1014
      uint s_alias_idx = C->get_alias_index(s_adr_type);
1015
      uint d_alias_idx = C->get_alias_index(adr_type);
1016
      bool is_mismatched = (basic_elem_type != T_INT);
1017
      Node* sval = transform_later(
1018
          LoadNode::make(_igvn, *ctrl, (*mem)->memory_at(s_alias_idx), sptr, s_adr_type,
1019
                         TypeInt::INT, T_INT, MemNode::unordered, LoadNode::DependsOnlyOnTest,
1020
                         false /*require_atomic_access*/, false /*unaligned*/, is_mismatched));
1021
      Node* st = transform_later(
1022
          StoreNode::make(_igvn, *ctrl, (*mem)->memory_at(d_alias_idx), dptr, adr_type,
1023
                          sval, T_INT, MemNode::unordered));
1024
      if (is_mismatched) {
1025
        st->as_Store()->set_mismatched_access();
1026
      }
1027
      (*mem)->set_memory_at(d_alias_idx, st);
1028
      src_off += BytesPerInt;
1029
      dest_off += BytesPerInt;
1030
    } else {
1031
      return false;
1032
    }
1033
  }
1034
  assert(src_off % BytesPerLong == 0, "");
1035
  assert(dest_off % BytesPerLong == 0, "");
1036

1037
  // Do this copy by giant steps.
1038
  Node* sptr  = basic_plus_adr(src,  src_off);
1039
  Node* dptr  = basic_plus_adr(dest, dest_off);
1040
  Node* countx = dest_size;
1041
  countx = transform_later(new SubXNode(countx, MakeConX(dest_off)));
1042
  countx = transform_later(new URShiftXNode(countx, intcon(LogBytesPerLong)));
1043

1044
  bool disjoint_bases = true;   // since alloc isn't null
1045
  generate_unchecked_arraycopy(ctrl, mem,
1046
                               adr_type, T_LONG, disjoint_bases,
1047
                               sptr, nullptr, dptr, nullptr, countx, dest_uninitialized);
1048

1049
  return true;
1050
}
1051

1052
// Helper function; generates code for the slow case.
1053
// We make a call to a runtime method which emulates the native method,
1054
// but without the native wrapper overhead.
1055
MergeMemNode* PhaseMacroExpand::generate_slow_arraycopy(ArrayCopyNode *ac,
1056
                                                        Node** ctrl, Node* mem, Node** io,
1057
                                                        const TypePtr* adr_type,
1058
                                                        Node* src,  Node* src_offset,
1059
                                                        Node* dest, Node* dest_offset,
1060
                                                        Node* copy_length, bool dest_uninitialized) {
1061
  assert(!dest_uninitialized, "Invariant");
1062

1063
  const TypeFunc* call_type = OptoRuntime::slow_arraycopy_Type();
1064
  CallNode* call = new CallStaticJavaNode(call_type, OptoRuntime::slow_arraycopy_Java(),
1065
                                          "slow_arraycopy", TypePtr::BOTTOM);
1066

1067
  call->init_req(TypeFunc::Control, *ctrl);
1068
  call->init_req(TypeFunc::I_O    , *io);
1069
  call->init_req(TypeFunc::Memory , mem);
1070
  call->init_req(TypeFunc::ReturnAdr, top());
1071
  call->init_req(TypeFunc::FramePtr, top());
1072
  call->init_req(TypeFunc::Parms+0, src);
1073
  call->init_req(TypeFunc::Parms+1, src_offset);
1074
  call->init_req(TypeFunc::Parms+2, dest);
1075
  call->init_req(TypeFunc::Parms+3, dest_offset);
1076
  call->init_req(TypeFunc::Parms+4, copy_length);
1077
  call->copy_call_debug_info(&_igvn, ac);
1078

1079
  call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
1080
  _igvn.replace_node(ac, call);
1081
  transform_later(call);
1082

1083
  call->extract_projections(&_callprojs, false /*separate_io_proj*/, false /*do_asserts*/);
1084
  *ctrl = _callprojs.fallthrough_catchproj->clone();
1085
  transform_later(*ctrl);
1086

1087
  Node* m = _callprojs.fallthrough_memproj->clone();
1088
  transform_later(m);
1089

1090
  uint alias_idx = C->get_alias_index(adr_type);
1091
  MergeMemNode* out_mem;
1092
  if (alias_idx != Compile::AliasIdxBot) {
1093
    out_mem = MergeMemNode::make(mem);
1094
    out_mem->set_memory_at(alias_idx, m);
1095
  } else {
1096
    out_mem = MergeMemNode::make(m);
1097
  }
1098
  transform_later(out_mem);
1099

1100
  // When src is negative and arraycopy is before an infinite loop,_callprojs.fallthrough_ioproj
1101
  // could be null. Skip clone and update null fallthrough_ioproj.
1102
  if (_callprojs.fallthrough_ioproj != nullptr) {
1103
    *io = _callprojs.fallthrough_ioproj->clone();
1104
    transform_later(*io);
1105
  } else {
1106
    *io = nullptr;
1107
  }
1108

1109
  return out_mem;
1110
}
1111

1112
// Helper function; generates code for cases requiring runtime checks.
1113
Node* PhaseMacroExpand::generate_checkcast_arraycopy(Node** ctrl, MergeMemNode** mem,
1114
                                                     const TypePtr* adr_type,
1115
                                                     Node* dest_elem_klass,
1116
                                                     Node* src,  Node* src_offset,
1117
                                                     Node* dest, Node* dest_offset,
1118
                                                     Node* copy_length, bool dest_uninitialized) {
1119
  if ((*ctrl)->is_top())  return nullptr;
1120

1121
  address copyfunc_addr = StubRoutines::checkcast_arraycopy(dest_uninitialized);
1122
  if (copyfunc_addr == nullptr) { // Stub was not generated, go slow path.
1123
    return nullptr;
1124
  }
1125

1126
  // Pick out the parameters required to perform a store-check
1127
  // for the target array.  This is an optimistic check.  It will
1128
  // look in each non-null element's class, at the desired klass's
1129
  // super_check_offset, for the desired klass.
1130
  int sco_offset = in_bytes(Klass::super_check_offset_offset());
1131
  Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
1132
  Node* n3 = new LoadINode(nullptr, *mem /*memory(p3)*/, p3, _igvn.type(p3)->is_ptr(), TypeInt::INT, MemNode::unordered);
1133
  Node* check_offset = ConvI2X(transform_later(n3));
1134
  Node* check_value  = dest_elem_klass;
1135

1136
  Node* src_start  = array_element_address(src,  src_offset,  T_OBJECT);
1137
  Node* dest_start = array_element_address(dest, dest_offset, T_OBJECT);
1138

1139
  const TypeFunc* call_type = OptoRuntime::checkcast_arraycopy_Type();
1140
  Node* call = make_leaf_call(*ctrl, *mem, call_type, copyfunc_addr, "checkcast_arraycopy", adr_type,
1141
                              src_start, dest_start, copy_length XTOP, check_offset XTOP, check_value);
1142

1143
  finish_arraycopy_call(call, ctrl, mem, adr_type);
1144

1145
  Node* proj =  new ProjNode(call, TypeFunc::Parms);
1146
  transform_later(proj);
1147

1148
  return proj;
1149
}
1150

1151
// Helper function; generates code for cases requiring runtime checks.
1152
Node* PhaseMacroExpand::generate_generic_arraycopy(Node** ctrl, MergeMemNode** mem,
1153
                                                   const TypePtr* adr_type,
1154
                                                   Node* src,  Node* src_offset,
1155
                                                   Node* dest, Node* dest_offset,
1156
                                                   Node* copy_length, bool dest_uninitialized) {
1157
  if ((*ctrl)->is_top()) return nullptr;
1158
  assert(!dest_uninitialized, "Invariant");
1159

1160
  address copyfunc_addr = StubRoutines::generic_arraycopy();
1161
  if (copyfunc_addr == nullptr) { // Stub was not generated, go slow path.
1162
    return nullptr;
1163
  }
1164

1165
  const TypeFunc* call_type = OptoRuntime::generic_arraycopy_Type();
1166
  Node* call = make_leaf_call(*ctrl, *mem, call_type, copyfunc_addr, "generic_arraycopy", adr_type,
1167
                              src, src_offset, dest, dest_offset, copy_length);
1168

1169
  finish_arraycopy_call(call, ctrl, mem, adr_type);
1170

1171
  Node* proj =  new ProjNode(call, TypeFunc::Parms);
1172
  transform_later(proj);
1173

1174
  return proj;
1175
}
1176

1177
// Helper function; generates the fast out-of-line call to an arraycopy stub.
1178
bool PhaseMacroExpand::generate_unchecked_arraycopy(Node** ctrl, MergeMemNode** mem,
1179
                                                    const TypePtr* adr_type,
1180
                                                    BasicType basic_elem_type,
1181
                                                    bool disjoint_bases,
1182
                                                    Node* src,  Node* src_offset,
1183
                                                    Node* dest, Node* dest_offset,
1184
                                                    Node* copy_length, bool dest_uninitialized) {
1185
  if ((*ctrl)->is_top()) return false;
1186

1187
  Node* src_start  = src;
1188
  Node* dest_start = dest;
1189
  if (src_offset != nullptr || dest_offset != nullptr) {
1190
    src_start =  array_element_address(src, src_offset, basic_elem_type);
1191
    dest_start = array_element_address(dest, dest_offset, basic_elem_type);
1192
  }
1193

1194
  // Figure out which arraycopy runtime method to call.
1195
  const char* copyfunc_name = "arraycopy";
1196
  address     copyfunc_addr =
1197
      basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
1198
                          disjoint_bases, copyfunc_name, dest_uninitialized);
1199

1200
  Node* result_memory = nullptr;
1201
  RegionNode* exit_block = nullptr;
1202
  if (ArrayOperationPartialInlineSize > 0 && is_subword_type(basic_elem_type) &&
1203
    Matcher::vector_width_in_bytes(basic_elem_type) >= 16) {
1204
    generate_partial_inlining_block(ctrl, mem, adr_type, &exit_block, &result_memory,
1205
                                    copy_length, src_start, dest_start, basic_elem_type);
1206
  }
1207

1208
  const TypeFunc* call_type = OptoRuntime::fast_arraycopy_Type();
1209
  Node* call = make_leaf_call(*ctrl, *mem, call_type, copyfunc_addr, copyfunc_name, adr_type,
1210
                              src_start, dest_start, copy_length XTOP);
1211

1212
  finish_arraycopy_call(call, ctrl, mem, adr_type);
1213

1214
  // Connecting remaining edges for exit_block coming from stub_block.
1215
  if (exit_block) {
1216
    exit_block->init_req(2, *ctrl);
1217

1218
    // Memory edge corresponding to stub_region.
1219
    result_memory->init_req(2, *mem);
1220

1221
    uint alias_idx = C->get_alias_index(adr_type);
1222
    if (alias_idx != Compile::AliasIdxBot) {
1223
      *mem = MergeMemNode::make(*mem);
1224
      (*mem)->set_memory_at(alias_idx, result_memory);
1225
    } else {
1226
      *mem = MergeMemNode::make(result_memory);
1227
    }
1228
    transform_later(*mem);
1229
    *ctrl = exit_block;
1230
    return true;
1231
  }
1232
  return false;
1233
}
1234

1235
#undef XTOP
1236

1237
void PhaseMacroExpand::expand_arraycopy_node(ArrayCopyNode *ac) {
1238
  Node* ctrl = ac->in(TypeFunc::Control);
1239
  Node* io = ac->in(TypeFunc::I_O);
1240
  Node* src = ac->in(ArrayCopyNode::Src);
1241
  Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
1242
  Node* dest = ac->in(ArrayCopyNode::Dest);
1243
  Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
1244
  Node* length = ac->in(ArrayCopyNode::Length);
1245
  MergeMemNode* merge_mem = nullptr;
1246

1247
  if (ac->is_clonebasic()) {
1248
    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
1249
    bs->clone_at_expansion(this, ac);
1250
    return;
1251
  } else if (ac->is_copyof() || ac->is_copyofrange() || ac->is_clone_oop_array()) {
1252
    Node* mem = ac->in(TypeFunc::Memory);
1253
    merge_mem = MergeMemNode::make(mem);
1254
    transform_later(merge_mem);
1255

1256
    AllocateArrayNode* alloc = nullptr;
1257
    if (ac->is_alloc_tightly_coupled()) {
1258
      alloc = AllocateArrayNode::Ideal_array_allocation(dest);
1259
      assert(alloc != nullptr, "expect alloc");
1260
    }
1261

1262
    const TypePtr* adr_type = _igvn.type(dest)->is_oopptr()->add_offset(Type::OffsetBot);
1263
    if (ac->_dest_type != TypeOopPtr::BOTTOM) {
1264
      adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr();
1265
    }
1266
    generate_arraycopy(ac, alloc, &ctrl, merge_mem, &io,
1267
                       adr_type, T_OBJECT,
1268
                       src, src_offset, dest, dest_offset, length,
1269
                       true, ac->has_negative_length_guard());
1270

1271
    return;
1272
  }
1273

1274
  AllocateArrayNode* alloc = nullptr;
1275
  if (ac->is_alloc_tightly_coupled()) {
1276
    alloc = AllocateArrayNode::Ideal_array_allocation(dest);
1277
    assert(alloc != nullptr, "expect alloc");
1278
  }
1279

1280
  assert(ac->is_arraycopy() || ac->is_arraycopy_validated(), "should be an arraycopy");
1281

1282
  // Compile time checks.  If any of these checks cannot be verified at compile time,
1283
  // we do not make a fast path for this call.  Instead, we let the call remain as it
1284
  // is.  The checks we choose to mandate at compile time are:
1285
  //
1286
  // (1) src and dest are arrays.
1287
  const Type* src_type = src->Value(&_igvn);
1288
  const Type* dest_type = dest->Value(&_igvn);
1289
  const TypeAryPtr* top_src = src_type->isa_aryptr();
1290
  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
1291

1292
  BasicType src_elem = T_CONFLICT;
1293
  BasicType dest_elem = T_CONFLICT;
1294

1295
  if (top_src != nullptr && top_src->elem() != Type::BOTTOM) {
1296
    src_elem = top_src->elem()->array_element_basic_type();
1297
  }
1298
  if (top_dest != nullptr && top_dest->elem() != Type::BOTTOM) {
1299
    dest_elem = top_dest->elem()->array_element_basic_type();
1300
  }
1301
  if (is_reference_type(src_elem, true)) src_elem = T_OBJECT;
1302
  if (is_reference_type(dest_elem, true)) dest_elem = T_OBJECT;
1303

1304
  if (ac->is_arraycopy_validated() &&
1305
      dest_elem != T_CONFLICT &&
1306
      src_elem == T_CONFLICT) {
1307
    src_elem = dest_elem;
1308
  }
1309

1310
  if (src_elem == T_CONFLICT || dest_elem == T_CONFLICT) {
1311
    // Conservatively insert a memory barrier on all memory slices.
1312
    // Do not let writes into the source float below the arraycopy.
1313
    {
1314
      Node* mem = ac->in(TypeFunc::Memory);
1315
      insert_mem_bar(&ctrl, &mem, Op_MemBarCPUOrder);
1316

1317
      merge_mem = MergeMemNode::make(mem);
1318
      transform_later(merge_mem);
1319
    }
1320

1321
    // Call StubRoutines::generic_arraycopy stub.
1322
    Node* mem = generate_arraycopy(ac, nullptr, &ctrl, merge_mem, &io,
1323
                                   TypeRawPtr::BOTTOM, T_CONFLICT,
1324
                                   src, src_offset, dest, dest_offset, length,
1325
                                   // If a  negative length guard was generated for the ArrayCopyNode,
1326
                                   // the length of the array can never be negative.
1327
                                   false, ac->has_negative_length_guard());
1328
    return;
1329
  }
1330

1331
  assert(!ac->is_arraycopy_validated() || (src_elem == dest_elem && dest_elem != T_VOID), "validated but different basic types");
1332

1333
  // (2) src and dest arrays must have elements of the same BasicType
1334
  // Figure out the size and type of the elements we will be copying.
1335
  if (src_elem != dest_elem || dest_elem == T_VOID) {
1336
    // The component types are not the same or are not recognized.  Punt.
1337
    // (But, avoid the native method wrapper to JVM_ArrayCopy.)
1338
    {
1339
      Node* mem = ac->in(TypeFunc::Memory);
1340
      merge_mem = generate_slow_arraycopy(ac, &ctrl, mem, &io, TypePtr::BOTTOM, src, src_offset, dest, dest_offset, length, false);
1341
    }
1342

1343
    _igvn.replace_node(_callprojs.fallthrough_memproj, merge_mem);
1344
    if (_callprojs.fallthrough_ioproj != nullptr) {
1345
      _igvn.replace_node(_callprojs.fallthrough_ioproj, io);
1346
    }
1347
    _igvn.replace_node(_callprojs.fallthrough_catchproj, ctrl);
1348
    return;
1349
  }
1350

1351
  //---------------------------------------------------------------------------
1352
  // We will make a fast path for this call to arraycopy.
1353

1354
  // We have the following tests left to perform:
1355
  //
1356
  // (3) src and dest must not be null.
1357
  // (4) src_offset must not be negative.
1358
  // (5) dest_offset must not be negative.
1359
  // (6) length must not be negative.
1360
  // (7) src_offset + length must not exceed length of src.
1361
  // (8) dest_offset + length must not exceed length of dest.
1362
  // (9) each element of an oop array must be assignable
1363

1364
  {
1365
    Node* mem = ac->in(TypeFunc::Memory);
1366
    merge_mem = MergeMemNode::make(mem);
1367
    transform_later(merge_mem);
1368
  }
1369

1370
  RegionNode* slow_region = new RegionNode(1);
1371
  transform_later(slow_region);
1372

1373
  if (!ac->is_arraycopy_validated()) {
1374
    // (3) operands must not be null
1375
    // We currently perform our null checks with the null_check routine.
1376
    // This means that the null exceptions will be reported in the caller
1377
    // rather than (correctly) reported inside of the native arraycopy call.
1378
    // This should be corrected, given time.  We do our null check with the
1379
    // stack pointer restored.
1380
    // null checks done library_call.cpp
1381

1382
    // (4) src_offset must not be negative.
1383
    generate_negative_guard(&ctrl, src_offset, slow_region);
1384

1385
    // (5) dest_offset must not be negative.
1386
    generate_negative_guard(&ctrl, dest_offset, slow_region);
1387

1388
    // (6) length must not be negative (moved to generate_arraycopy()).
1389
    // generate_negative_guard(length, slow_region);
1390

1391
    // (7) src_offset + length must not exceed length of src.
1392
    Node* alen = ac->in(ArrayCopyNode::SrcLen);
1393
    assert(alen != nullptr, "need src len");
1394
    generate_limit_guard(&ctrl,
1395
                         src_offset, length,
1396
                         alen,
1397
                         slow_region);
1398

1399
    // (8) dest_offset + length must not exceed length of dest.
1400
    alen = ac->in(ArrayCopyNode::DestLen);
1401
    assert(alen != nullptr, "need dest len");
1402
    generate_limit_guard(&ctrl,
1403
                         dest_offset, length,
1404
                         alen,
1405
                         slow_region);
1406

1407
    // (9) each element of an oop array must be assignable
1408
    // The generate_arraycopy subroutine checks this.
1409
  }
1410
  // This is where the memory effects are placed:
1411
  const TypePtr* adr_type = nullptr;
1412
  if (ac->_dest_type != TypeOopPtr::BOTTOM) {
1413
    adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr();
1414
  } else {
1415
    adr_type = TypeAryPtr::get_array_body_type(dest_elem);
1416
  }
1417

1418
  generate_arraycopy(ac, alloc, &ctrl, merge_mem, &io,
1419
                     adr_type, dest_elem,
1420
                     src, src_offset, dest, dest_offset, length,
1421
                     // If a  negative length guard was generated for the ArrayCopyNode,
1422
                     // the length of the array can never be negative.
1423
                     false, ac->has_negative_length_guard(), slow_region);
1424
}
1425

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.