jdk

Форк
0
/
vtransform.cpp 
450 строк · 18.5 Кб
1
/*
2
 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 */
23

24
#include "precompiled.hpp"
25
#include "opto/vtransform.hpp"
26
#include "opto/vectornode.hpp"
27
#include "opto/convertnode.hpp"
28

29
void VTransformGraph::add_vtnode(VTransformNode* vtnode) {
30
  assert(vtnode->_idx == _vtnodes.length(), "position must match idx");
31
  _vtnodes.push(vtnode);
32
}
33

34
// Compute a linearization of the graph. We do this with a reverse-post-order of a DFS.
35
// This only works if the graph is a directed acyclic graph (DAG). The C2 graph, and
36
// the VLoopDependencyGraph are both DAGs, but after introduction of vectors/packs, the
37
// graph has additional constraints which can introduce cycles. Example:
38
//
39
//                                                       +--------+
40
//  A -> X                                               |        v
41
//                     Pack [A,B] and [X,Y]             [A,B]    [X,Y]
42
//  Y -> B                                                 ^        |
43
//                                                         +--------+
44
//
45
// We return "true" IFF we find no cycle, i.e. if the linearization succeeds.
46
bool VTransformGraph::schedule() {
47
  assert(!is_scheduled(), "not yet scheduled");
48

49
#ifndef PRODUCT
50
  if (_trace._verbose) {
51
    print_vtnodes();
52
  }
53
#endif
54

55
  ResourceMark rm;
56
  GrowableArray<VTransformNode*> stack;
57
  VectorSet pre_visited;
58
  VectorSet post_visited;
59

60
  collect_nodes_without_req_or_dependency(stack);
61

62
  // We create a reverse-post-visit order. This gives us a linearization, if there are
63
  // no cycles. Then, we simply reverse the order, and we have a schedule.
64
  int rpo_idx = _vtnodes.length() - 1;
65
  while (!stack.is_empty()) {
66
    VTransformNode* vtn = stack.top();
67
    if (!pre_visited.test_set(vtn->_idx)) {
68
      // Forward arc in graph (pre-visit).
69
    } else if (!post_visited.test(vtn->_idx)) {
70
      // Forward arc in graph. Check if all uses were already visited:
71
      //   Yes -> post-visit.
72
      //   No  -> we are mid-visit.
73
      bool all_uses_already_visited = true;
74

75
      for (int i = 0; i < vtn->outs(); i++) {
76
        VTransformNode* use = vtn->out(i);
77
        if (post_visited.test(use->_idx)) { continue; }
78
        if (pre_visited.test(use->_idx)) {
79
          // Cycle detected!
80
          // The nodes that are pre_visited but not yet post_visited form a path from
81
          // the "root" to the current vtn. Now, we are looking at an edge (vtn, use),
82
          // and discover that use is also pre_visited but not post_visited. Thus, use
83
          // lies on that path from "root" to vtn, and the edge (vtn, use) closes a
84
          // cycle.
85
          NOT_PRODUCT(if (_trace._rejections) { trace_schedule_cycle(stack, pre_visited, post_visited); } )
86
          return false;
87
        }
88
        stack.push(use);
89
        all_uses_already_visited = false;
90
      }
91

92
      if (all_uses_already_visited) {
93
        stack.pop();
94
        post_visited.set(vtn->_idx);           // post-visit
95
        _schedule.at_put_grow(rpo_idx--, vtn); // assign rpo_idx
96
      }
97
    } else {
98
      stack.pop(); // Already post-visited. Ignore secondary edge.
99
    }
100
  }
101

102
#ifndef PRODUCT
103
  if (_trace._verbose) {
104
    print_schedule();
105
  }
106
#endif
107

108
  assert(rpo_idx == -1, "used up all rpo_idx, rpo_idx=%d", rpo_idx);
109
  return true;
110
}
111

112
// Push all "root" nodes, i.e. those that have no inputs (req or dependency):
113
void VTransformGraph::collect_nodes_without_req_or_dependency(GrowableArray<VTransformNode*>& stack) const {
114
  for (int i = 0; i < _vtnodes.length(); i++) {
115
    VTransformNode* vtn = _vtnodes.at(i);
116
    if (!vtn->has_req_or_dependency()) {
117
      stack.push(vtn);
118
    }
119
  }
120
}
121

122
#ifndef PRODUCT
123
void VTransformGraph::trace_schedule_cycle(const GrowableArray<VTransformNode*>& stack,
124
                                           const VectorSet& pre_visited,
125
                                           const VectorSet& post_visited) const {
126
  tty->print_cr("\nVTransform::schedule found a cycle on path (P), vectorization attempt fails.");
127
  for (int j = 0; j < stack.length(); j++) {
128
    VTransformNode* n = stack.at(j);
129
    bool on_path = pre_visited.test(n->_idx) && !post_visited.test(n->_idx);
130
    tty->print("  %s ", on_path ? "P" : "_");
131
    n->print();
132
  }
133
}
134

135
void VTransformApplyResult::trace(VTransformNode* vtnode) const {
136
  tty->print("  apply: ");
137
  vtnode->print();
138
  tty->print("    ->   ");
139
  if (_node == nullptr) {
140
    tty->print_cr("nullptr");
141
  } else {
142
    _node->dump();
143
  }
144
}
145
#endif
146

147
Node* VTransformNode::find_transformed_input(int i, const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
148
  Node* n = vnode_idx_to_transformed_node.at(in(i)->_idx);
149
  assert(n != nullptr, "must find input IR node");
150
  return n;
151
}
152

153
VTransformApplyResult VTransformScalarNode::apply(const VLoopAnalyzer& vloop_analyzer,
154
                                                  const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
155
  // This was just wrapped. Now we simply unwap without touching the inputs.
156
  return VTransformApplyResult::make_scalar(_node);
157
}
158

159
VTransformApplyResult VTransformReplicateNode::apply(const VLoopAnalyzer& vloop_analyzer,
160
                                                     const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
161
  Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
162
  VectorNode* vn = VectorNode::scalar2vector(val, _vlen, _element_type);
163
  register_new_node_from_vectorization(vloop_analyzer, vn, val);
164
  return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
165
}
166

167
VTransformApplyResult VTransformConvI2LNode::apply(const VLoopAnalyzer& vloop_analyzer,
168
                                                   const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
169
  Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
170
  Node* n = new ConvI2LNode(val);
171
  register_new_node_from_vectorization(vloop_analyzer, n, val);
172
  return VTransformApplyResult::make_scalar(n);
173
}
174

175
VTransformApplyResult VTransformShiftCountNode::apply(const VLoopAnalyzer& vloop_analyzer,
176
                                                      const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
177
  PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
178
  Node* shift_count_in = find_transformed_input(1, vnode_idx_to_transformed_node);
179
  assert(shift_count_in->bottom_type()->isa_int(), "int type only for shift count");
180
  // The shift_count_in would be automatically truncated to the lowest _mask
181
  // bits in a scalar shift operation. But vector shift does not truncate, so
182
  // we must apply the mask now.
183
  Node* shift_count_masked = new AndINode(shift_count_in, phase->igvn().intcon(_mask));
184
  register_new_node_from_vectorization(vloop_analyzer, shift_count_masked, shift_count_in);
185
  // Now that masked value is "boadcast" (some platforms only set the lowest element).
186
  VectorNode* vn = VectorNode::shift_count(_shift_opcode, shift_count_masked, _vlen, _element_bt);
187
  register_new_node_from_vectorization(vloop_analyzer, vn, shift_count_in);
188
  return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
189
}
190

191

192
VTransformApplyResult VTransformPopulateIndexNode::apply(const VLoopAnalyzer& vloop_analyzer,
193
                                                         const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
194
  PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
195
  Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
196
  assert(val->is_Phi(), "expected to be iv");
197
  assert(VectorNode::is_populate_index_supported(_element_bt), "should support");
198
  const TypeVect* vt = TypeVect::make(_element_bt, _vlen);
199
  VectorNode* vn = new PopulateIndexNode(val, phase->igvn().intcon(1), vt);
200
  register_new_node_from_vectorization(vloop_analyzer, vn, val);
201
  return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
202
}
203

204
VTransformApplyResult VTransformElementWiseVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
205
                                                             const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
206
  Node* first = nodes().at(0);
207
  uint  vlen = nodes().length();
208
  int   opc  = first->Opcode();
209
  BasicType bt = vloop_analyzer.types().velt_basic_type(first);
210

211
  if (first->is_Cmp()) {
212
    // Cmp + Bool -> VectorMaskCmp
213
    // Handled by Bool / VTransformBoolVectorNode, so we do not generate any nodes here.
214
    return VTransformApplyResult::make_empty();
215
  }
216

217
  assert(2 <= req() && req() <= 4, "Must have 1-3 inputs");
218
  VectorNode* vn = nullptr;
219
  Node* in1 =                find_transformed_input(1, vnode_idx_to_transformed_node);
220
  Node* in2 = (req() >= 3) ? find_transformed_input(2, vnode_idx_to_transformed_node) : nullptr;
221
  Node* in3 = (req() >= 4) ? find_transformed_input(3, vnode_idx_to_transformed_node) : nullptr;
222

223
  if (first->is_CMove()) {
224
    assert(req() == 4, "three inputs expected: mask, blend1, blend2");
225
    vn = new VectorBlendNode(/* blend1 */ in2, /* blend2 */ in3, /* mask */ in1);
226
  } else if (VectorNode::is_convert_opcode(opc)) {
227
    assert(first->req() == 2 && req() == 2, "only one input expected");
228
    int vopc = VectorCastNode::opcode(opc, in1->bottom_type()->is_vect()->element_basic_type());
229
    vn = VectorCastNode::make(vopc, in1, bt, vlen);
230
  } else if (VectorNode::can_use_RShiftI_instead_of_URShiftI(first, bt)) {
231
    opc = Op_RShiftI;
232
    vn = VectorNode::make(opc, in1, in2, vlen, bt);
233
  } else if (VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(opc)) {
234
    // The scalar operation was a long -> int operation.
235
    // However, the vector operation is long -> long.
236
    VectorNode* long_vn = VectorNode::make(opc, in1, nullptr, vlen, T_LONG);
237
    register_new_node_from_vectorization(vloop_analyzer, long_vn, first);
238
    // Cast long -> int, to mimic the scalar long -> int operation.
239
    vn = VectorCastNode::make(Op_VectorCastL2X, long_vn, T_INT, vlen);
240
  } else if (req() == 3 ||
241
             VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(opc)) {
242
    assert(!VectorNode::is_roundopD(first) || in2->is_Con(), "rounding mode must be constant");
243
    vn = VectorNode::make(opc, in1, in2, vlen, bt); // unary and binary
244
  } else {
245
    assert(req() == 4, "three inputs expected");
246
    assert(opc == Op_FmaD ||
247
           opc == Op_FmaF ||
248
           opc == Op_SignumF ||
249
           opc == Op_SignumD,
250
           "element wise operation must be from this list");
251
    vn = VectorNode::make(opc, in1, in2, in3, vlen, bt); // ternary
252
  }
253

254
  register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
255
  return VTransformApplyResult::make_vector(vn, vlen, vn->length_in_bytes());
256
}
257

258
VTransformApplyResult VTransformBoolVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
259
                                                      const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
260
  BoolNode* first = nodes().at(0)->as_Bool();
261
  uint  vlen = nodes().length();
262
  BasicType bt = vloop_analyzer.types().velt_basic_type(first);
263

264
  // Cmp + Bool -> VectorMaskCmp
265
  VTransformElementWiseVectorNode* vtn_cmp = in(1)->isa_ElementWiseVector();
266
  assert(vtn_cmp != nullptr && vtn_cmp->nodes().at(0)->is_Cmp(),
267
         "bool vtn expects cmp vtn as input");
268

269
  Node* cmp_in1 = vtn_cmp->find_transformed_input(1, vnode_idx_to_transformed_node);
270
  Node* cmp_in2 = vtn_cmp->find_transformed_input(2, vnode_idx_to_transformed_node);
271
  BoolTest::mask mask = test()._mask;
272

273
  PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
274
  ConINode* mask_node  = phase->igvn().intcon((int)mask);
275
  const TypeVect* vt = TypeVect::make(bt, vlen);
276
  VectorNode* vn = new VectorMaskCmpNode(mask, cmp_in1, cmp_in2, mask_node, vt);
277
  register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
278
  return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes());
279
}
280

281
VTransformApplyResult VTransformReductionVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
282
                                                           const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
283
  Node* first = nodes().at(0);
284
  uint  vlen = nodes().length();
285
  int   opc  = first->Opcode();
286
  BasicType bt = first->bottom_type()->basic_type();
287

288
  Node* init = find_transformed_input(1, vnode_idx_to_transformed_node);
289
  Node* vec  = find_transformed_input(2, vnode_idx_to_transformed_node);
290

291
  ReductionNode* vn = ReductionNode::make(opc, nullptr, init, vec, bt);
292
  register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
293
  return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes());
294
}
295

296
VTransformApplyResult VTransformLoadVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
297
                                                      const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
298
  LoadNode* first = nodes().at(0)->as_Load();
299
  uint  vlen = nodes().length();
300
  Node* ctrl = first->in(MemNode::Control);
301
  Node* mem  = first->in(MemNode::Memory);
302
  Node* adr  = first->in(MemNode::Address);
303
  int   opc  = first->Opcode();
304
  const TypePtr* adr_type = first->adr_type();
305
  BasicType bt = vloop_analyzer.types().velt_basic_type(first);
306

307
  // Set the memory dependency of the LoadVector as early as possible.
308
  // Walk up the memory chain, and ignore any StoreVector that provably
309
  // does not have any memory dependency.
310
  while (mem->is_StoreVector()) {
311
    VPointer p_store(mem->as_Mem(), vloop_analyzer.vloop());
312
    if (p_store.overlap_possible_with_any_in(nodes())) {
313
      break;
314
    } else {
315
      mem = mem->in(MemNode::Memory);
316
    }
317
  }
318

319
  LoadVectorNode* vn = LoadVectorNode::make(opc, ctrl, mem, adr, adr_type, vlen, bt,
320
                                            control_dependency());
321
  DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } )
322
  register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
323
  return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size());
324
}
325

326
VTransformApplyResult VTransformStoreVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
327
                                                       const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
328
  StoreNode* first = nodes().at(0)->as_Store();
329
  uint  vlen = nodes().length();
330
  Node* ctrl = first->in(MemNode::Control);
331
  Node* mem  = first->in(MemNode::Memory);
332
  Node* adr  = first->in(MemNode::Address);
333
  int   opc  = first->Opcode();
334
  const TypePtr* adr_type = first->adr_type();
335

336
  Node* value = find_transformed_input(MemNode::ValueIn, vnode_idx_to_transformed_node);
337
  StoreVectorNode* vn = StoreVectorNode::make(opc, ctrl, mem, adr, adr_type, value, vlen);
338
  DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } )
339
  register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
340
  return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size());
341
}
342

343
void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes(const VLoopAnalyzer& vloop_analyzer, Node* vn) const {
344
  PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
345
  Node* first = nodes().at(0);
346

347
  register_new_node_from_vectorization(vloop_analyzer, vn, first);
348

349
  for (int i = 0; i < _nodes.length(); i++) {
350
    Node* n = _nodes.at(i);
351
    phase->igvn().replace_node(n, vn);
352
  }
353
}
354

355
void VTransformNode::register_new_node_from_vectorization(const VLoopAnalyzer& vloop_analyzer, Node* vn, Node* old_node) const {
356
  PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
357
  phase->register_new_node_with_ctrl_of(vn, old_node);
358
  phase->igvn()._worklist.push(vn);
359
  VectorNode::trace_new_vector(vn, "AutoVectorization");
360
}
361

362
#ifndef PRODUCT
363
void VTransformGraph::print_vtnodes() const {
364
  tty->print_cr("\nVTransformGraph::print_vtnodes:");
365
  for (int i = 0; i < _vtnodes.length(); i++) {
366
    _vtnodes.at(i)->print();
367
  }
368
}
369

370
void VTransformGraph::print_schedule() const {
371
  tty->print_cr("\nVTransformGraph::print_schedule:");
372
  for (int i = 0; i < _schedule.length(); i++) {
373
    tty->print(" %3d: ", i);
374
    VTransformNode* vtn = _schedule.at(i);
375
    if (vtn == nullptr) {
376
      tty->print_cr("nullptr");
377
    } else {
378
      vtn->print();
379
    }
380
  }
381
}
382

383
void VTransformGraph::print_memops_schedule() const {
384
  tty->print_cr("\nVTransformGraph::print_memops_schedule:");
385
  int i = 0;
386
  for_each_memop_in_schedule([&] (MemNode* mem) {
387
    tty->print(" %3d: ", i++);
388
    mem->dump();
389
  });
390
}
391

392
void VTransformNode::print() const {
393
  tty->print("%3d %s (", _idx, name());
394
  for (uint i = 0; i < _req; i++) {
395
    print_node_idx(_in.at(i));
396
  }
397
  if ((uint)_in.length() > _req) {
398
    tty->print(" |");
399
    for (int i = _req; i < _in.length(); i++) {
400
      print_node_idx(_in.at(i));
401
    }
402
  }
403
  tty->print(") [");
404
  for (int i = 0; i < _out.length(); i++) {
405
    print_node_idx(_out.at(i));
406
  }
407
  tty->print("] ");
408
  print_spec();
409
  tty->cr();
410
}
411

412
void VTransformNode::print_node_idx(const VTransformNode* vtn) {
413
  if (vtn == nullptr) {
414
    tty->print(" _");
415
  } else {
416
    tty->print(" %d", vtn->_idx);
417
  }
418
}
419

420
void VTransformScalarNode::print_spec() const {
421
  tty->print("node[%d %s]", _node->_idx, _node->Name());
422
}
423

424
void VTransformReplicateNode::print_spec() const {
425
  tty->print("vlen=%d element_type=", _vlen);
426
  _element_type->dump();
427
}
428

429
void VTransformShiftCountNode::print_spec() const {
430
  tty->print("vlen=%d element_bt=%s mask=%d shift_opcode=%s",
431
             _vlen, type2name(_element_bt), _mask,
432
             NodeClassNames[_shift_opcode]);
433
}
434

435
void VTransformPopulateIndexNode::print_spec() const {
436
  tty->print("vlen=%d element_bt=%s", _vlen, type2name(_element_bt));
437
}
438

439
void VTransformVectorNode::print_spec() const {
440
  tty->print("%d-pack[", _nodes.length());
441
  for (int i = 0; i < _nodes.length(); i++) {
442
    Node* n = _nodes.at(i);
443
    if (i > 0) {
444
      tty->print(", ");
445
    }
446
    tty->print("%d %s", n->_idx, n->Name());
447
  }
448
  tty->print("]");
449
}
450
#endif
451

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.