jdk

Форк
0
/
superwordVTransformBuilder.cpp 
307 строк · 13.7 Кб
1
/*
2
 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 */
23

24
#include "precompiled.hpp"
25
#include "opto/superwordVTransformBuilder.hpp"
26
#include "opto/vectornode.hpp"
27

28
void SuperWordVTransformBuilder::build() {
29
  assert(!_packset.is_empty(), "must have non-empty packset");
30
  assert(!_vtransform.has_graph(), "start with empty vtransform");
31

32
  // Create vtnodes for all nodes in the loop.
33
  build_vector_vtnodes_for_packed_nodes();
34
  build_scalar_vtnodes_for_non_packed_nodes();
35

36
  // Connect all vtnodes with their inputs. Possibly create vtnodes for input
37
  // nodes that are outside the loop.
38
  VectorSet vtn_dependencies; // Shared, but cleared for every vtnode.
39
  build_inputs_for_vector_vtnodes(vtn_dependencies);
40
  build_inputs_for_scalar_vtnodes(vtn_dependencies);
41
}
42

43
void SuperWordVTransformBuilder::build_vector_vtnodes_for_packed_nodes() {
44
  for (int i = 0; i < _packset.length(); i++) {
45
    Node_List* pack = _packset.at(i);
46
    VTransformVectorNode* vtn = make_vector_vtnode_for_pack(pack);
47
    for (uint k = 0; k < pack->size(); k++) {
48
      map_node_to_vtnode(pack->at(k), vtn);
49
    }
50
  }
51
}
52

53
void SuperWordVTransformBuilder::build_scalar_vtnodes_for_non_packed_nodes() {
54
  for (int i = 0; i < _vloop_analyzer.body().body().length(); i++) {
55
    Node* n = _vloop_analyzer.body().body().at(i);
56
    if (_packset.get_pack(n) != nullptr) { continue; }
57
    VTransformScalarNode* vtn = new (_vtransform.arena()) VTransformScalarNode(_vtransform, n);
58
    map_node_to_vtnode(n, vtn);
59
  }
60
}
61

62
void SuperWordVTransformBuilder::build_inputs_for_vector_vtnodes(VectorSet& vtn_dependencies) {
63
  for (int i = 0; i < _packset.length(); i++) {
64
    Node_List* pack = _packset.at(i);
65
    Node* p0 = pack->at(0);
66

67
    VTransformVectorNode* vtn = get_vtnode(p0)->isa_Vector();
68
    assert(vtn != nullptr, "all packs must have vector vtnodes");
69
    vtn_dependencies.clear(); // Add every dependency only once per vtn.
70

71
    if (p0->is_Load()) {
72
      set_req_with_scalar(p0,   vtn, vtn_dependencies, MemNode::Address);
73
    } else if (p0->is_Store()) {
74
      set_req_with_scalar(p0,   vtn, vtn_dependencies, MemNode::Address);
75
      set_req_with_vector(pack, vtn, vtn_dependencies, MemNode::ValueIn);
76
    } else if (vtn->isa_ReductionVector() != nullptr) {
77
      set_req_with_scalar(p0,   vtn, vtn_dependencies, 1); // scalar init
78
      set_req_with_vector(pack, vtn, vtn_dependencies, 2); // vector
79
    } else {
80
      assert(vtn->isa_ElementWiseVector() != nullptr, "all other vtnodes are handled above");
81
      if (VectorNode::is_scalar_rotate(p0) &&
82
          p0->in(2)->is_Con() &&
83
          Matcher::supports_vector_constant_rotates(p0->in(2)->get_int())) {
84
        set_req_with_vector(pack, vtn, vtn_dependencies, 1);
85
        set_req_with_scalar(p0,   vtn, vtn_dependencies, 2); // constant rotation
86
      } else if (VectorNode::is_roundopD(p0)) {
87
        set_req_with_vector(pack, vtn, vtn_dependencies, 1);
88
        set_req_with_scalar(p0,   vtn, vtn_dependencies, 2); // constant rounding mode
89
      } else if (p0->is_CMove()) {
90
        // Cmp + Bool + CMove -> VectorMaskCmp + VectorBlend.
91
        set_all_req_with_vectors(pack, vtn, vtn_dependencies);
92
        VTransformBoolVectorNode* vtn_mask_cmp = vtn->in(1)->isa_BoolVector();
93
        if (vtn_mask_cmp->test()._is_negated) {
94
          vtn->swap_req(2, 3); // swap if test was negated.
95
        }
96
      } else {
97
        set_all_req_with_vectors(pack, vtn, vtn_dependencies);
98
      }
99
    }
100

101
    for (uint k = 0; k < pack->size(); k++) {
102
      add_dependencies_of_node_to_vtnode(pack->at(k), vtn, vtn_dependencies);
103
    }
104
  }
105
}
106

107
void SuperWordVTransformBuilder::build_inputs_for_scalar_vtnodes(VectorSet& vtn_dependencies) {
108
  for (int i = 0; i < _vloop_analyzer.body().body().length(); i++) {
109
    Node* n = _vloop_analyzer.body().body().at(i);
110
    VTransformScalarNode* vtn = get_vtnode(n)->isa_Scalar();
111
    if (vtn == nullptr) { continue; }
112
    vtn_dependencies.clear(); // Add every dependency only once per vtn.
113

114
    if (n->is_Load()) {
115
      set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::Address);
116
    } else if (n->is_Store()) {
117
      set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::Address);
118
      set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::ValueIn);
119
    } else if (n->is_CountedLoop()) {
120
      continue; // Is "root", has no dependency.
121
    } else if (n->is_Phi()) {
122
      // CountedLoop Phi's: ignore backedge (and entry value).
123
      assert(n->in(0) == _vloop.cl(), "only Phi's from the CountedLoop allowed");
124
      set_req_with_scalar(n, vtn, vtn_dependencies, 0);
125
      continue;
126
    } else {
127
      set_all_req_with_scalars(n, vtn, vtn_dependencies);
128
    }
129

130
    add_dependencies_of_node_to_vtnode(n, vtn, vtn_dependencies);
131
  }
132
}
133

134
// Create a vtnode for each pack. No in/out edges set yet.
135
VTransformVectorNode* SuperWordVTransformBuilder::make_vector_vtnode_for_pack(const Node_List* pack) const {
136
  uint pack_size = pack->size();
137
  Node* p0 = pack->at(0);
138
  int opc = p0->Opcode();
139
  VTransformVectorNode* vtn = nullptr;
140

141
  if (p0->is_Load()) {
142
    vtn = new (_vtransform.arena()) VTransformLoadVectorNode(_vtransform, pack_size);
143
  } else if (p0->is_Store()) {
144
    vtn = new (_vtransform.arena()) VTransformStoreVectorNode(_vtransform, pack_size);
145
  } else if (p0->is_Bool()) {
146
    VTransformBoolTest kind = _packset.get_bool_test(pack);
147
    vtn = new (_vtransform.arena()) VTransformBoolVectorNode(_vtransform, pack_size, kind);
148
  } else if (_vloop_analyzer.reductions().is_marked_reduction(p0)) {
149
    vtn = new (_vtransform.arena()) VTransformReductionVectorNode(_vtransform, pack_size);
150
  } else if (VectorNode::is_muladds2i(p0)) {
151
    // A special kind of binary element-wise vector op: the inputs are "ints" a and b,
152
    // but reinterpreted as two "shorts" [a0, a1] and [b0, b1]:
153
    //   v = MulAddS2I(a, b) = a0 * b0 + a1 + b1
154
    assert(p0->req() == 5, "MulAddS2I should have 4 operands");
155
    vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, 3, pack_size);
156
  } else {
157
    assert(p0->req() == 3 ||
158
           p0->is_CMove() ||
159
           VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(opc) ||
160
           VectorNode::is_convert_opcode(opc) ||
161
           VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(opc) ||
162
           opc == Op_FmaD ||
163
           opc == Op_FmaF ||
164
           opc == Op_SignumF ||
165
           opc == Op_SignumD,
166
           "pack type must be in this list");
167
    vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, p0->req(), pack_size);
168
  }
169
  vtn->set_nodes(pack);
170
  return vtn;
171
}
172

173
void SuperWordVTransformBuilder::set_req_with_scalar(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies, const int index) {
174
  VTransformNode* req = get_vtnode_or_wrap_as_input_scalar(n->in(index));
175
  vtn->set_req(index, req);
176
  vtn_dependencies.set(req->_idx);
177
}
178

179
// Either get the existing vtnode vector input (when input is a pack), or else make a
180
// new vector vtnode for the input (e.g. for Replicate or PopulateIndex).
181
VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_index(const Node_List* pack, const int index) {
182
  Node* p0 = pack->at(0);
183

184
  Node_List* pack_in = _packset.pack_input_at_index_or_null(pack, index);
185
  if (pack_in != nullptr) {
186
    // Input is a matching pack -> vtnode already exists.
187
    assert(index != 2 || !VectorNode::is_shift(p0), "shift's count cannot be vector");
188
    return get_vtnode(pack_in->at(0));
189
  }
190

191
  if (VectorNode::is_muladds2i(p0)) {
192
    assert(_packset.is_muladds2i_pack_with_pack_inputs(pack), "inputs must all be packs");
193
    // All inputs are strided (stride = 2), either with offset 0 or 1.
194
    Node_List* pack_in0 = _packset.strided_pack_input_at_index_or_null(pack, index, 2, 0);
195
    if (pack_in0 != nullptr) {
196
      return get_vtnode(pack_in0->at(0));
197
    }
198
    Node_List* pack_in1 = _packset.strided_pack_input_at_index_or_null(pack, index, 2, 1);
199
    if (pack_in1 != nullptr) {
200
      return get_vtnode(pack_in1->at(0));
201
    }
202
  }
203

204
  Node* same_input = _packset.same_inputs_at_index_or_null(pack, index);
205
  if (same_input == nullptr && p0->in(index) == _vloop.iv()) {
206
    // PopulateIndex: [iv+0, iv+1, iv+2, ...]
207
    VTransformNode* iv_vtn = get_vtnode_or_wrap_as_input_scalar(_vloop.iv());
208
    BasicType p0_bt = _vloop_analyzer.types().velt_basic_type(p0);
209
    // If we have subword type, take that type directly. If p0 is some ConvI2L/F/D,
210
    // then the p0_bt can also be L/F/D but we need to produce ints for the input of
211
    // the ConvI2L/F/D.
212
    BasicType element_bt = is_subword_type(p0_bt) ? p0_bt : T_INT;
213
    VTransformNode* populate_index = new (_vtransform.arena()) VTransformPopulateIndexNode(_vtransform, pack->size(), element_bt);
214
    populate_index->set_req(1, iv_vtn);
215
    return populate_index;
216
  }
217

218
  if (same_input != nullptr) {
219
    VTransformNode* same_input_vtn = get_vtnode_or_wrap_as_input_scalar(same_input);
220
    if (index == 2 && VectorNode::is_shift(p0)) {
221
      // Scalar shift count for vector shift operation: vec2 = shiftV(vec1, scalar_count)
222
      // Scalar shift operations masks the shift count, but the vector shift does not, so
223
      // create a special ShiftCount node.
224
      BasicType element_bt = _vloop_analyzer.types().velt_basic_type(p0);
225
      juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
226
      VTransformNode* shift_count = new (_vtransform.arena()) VTransformShiftCountNode(_vtransform, pack->size(), element_bt, mask, p0->Opcode());
227
      shift_count->set_req(1, same_input_vtn);
228
      return shift_count;
229
    } else {
230
      // Replicate the scalar same_input to every vector element.
231
      const Type* element_type = _vloop_analyzer.types().velt_type(p0);
232
      if (index == 2 && VectorNode::is_scalar_rotate(p0) && element_type->isa_long()) {
233
        // Scalar rotate has int rotation value, but the scalar rotate expects longs.
234
        assert(same_input->bottom_type()->isa_int(), "scalar rotate expects int rotation");
235
        VTransformNode* conv = new (_vtransform.arena()) VTransformConvI2LNode(_vtransform);
236
        conv->set_req(1, same_input_vtn);
237
        same_input_vtn = conv;
238
      }
239
      VTransformNode* replicate = new (_vtransform.arena()) VTransformReplicateNode(_vtransform, pack->size(), element_type);
240
      replicate->set_req(1, same_input_vtn);
241
      return replicate;
242
    }
243
  }
244

245
  // The input is neither a pack not a same_input node. SuperWord::profitable does not allow
246
  // any other case. In the future, we could insert a PackNode.
247
#ifdef ASSERT
248
  tty->print_cr("\nSuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_index: index=%d", index);
249
  pack->dump();
250
  assert(false, "Pack input was neither a pack nor a same_input node");
251
#endif
252
  ShouldNotReachHere();
253
}
254

255
VTransformNode* SuperWordVTransformBuilder::get_vtnode_or_wrap_as_input_scalar(Node* n) {
256
  VTransformNode* vtn = get_vtnode_or_null(n);
257
  if (vtn != nullptr) { return vtn; }
258

259
  assert(!_vloop.in_bb(n), "only nodes outside the loop can be input nodes to the loop");
260
  vtn = new (_vtransform.arena()) VTransformInputScalarNode(_vtransform, n);
261
  map_node_to_vtnode(n, vtn);
262
  return vtn;
263
}
264

265
void SuperWordVTransformBuilder::set_req_with_vector(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies, int j) {
266
  VTransformNode* req = get_or_make_vtnode_vector_input_at_index(pack, j);
267
  vtn->set_req(j, req);
268
  vtn_dependencies.set(req->_idx);
269
}
270

271
void SuperWordVTransformBuilder::set_all_req_with_scalars(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies) {
272
  assert(vtn->req() == n->req(), "scalars must have same number of reqs");
273
  for (uint j = 0; j < n->req(); j++) {
274
    Node* def = n->in(j);
275
    if (def == nullptr) { continue; }
276
    set_req_with_scalar(n, vtn, vtn_dependencies, j);
277
  }
278
}
279

280
void SuperWordVTransformBuilder::set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies) {
281
  Node* p0 = pack->at(0);
282
  assert(vtn->req() <= p0->req(), "must have at at most as many reqs");
283
  // Vectors have no ctrl, so ignore it.
284
  for (uint j = 1; j < vtn->req(); j++) {
285
    Node* def = p0->in(j);
286
    if (def == nullptr) { continue; }
287
    set_req_with_vector(pack, vtn, vtn_dependencies, j);
288
  }
289
}
290

291
void SuperWordVTransformBuilder::add_dependencies_of_node_to_vtnode(Node*n, VTransformNode* vtn, VectorSet& vtn_dependencies) {
292
  for (VLoopDependencyGraph::PredsIterator preds(_vloop_analyzer.dependency_graph(), n); !preds.done(); preds.next()) {
293
    Node* pred = preds.current();
294
    if (!_vloop.in_bb(pred)) { continue; }
295

296
    // Only add memory dependencies to memory nodes. All others are taken care of with the req.
297
    if (n->is_Mem() && !pred->is_Mem()) { continue; }
298

299
    VTransformNode* dependency = get_vtnode(pred);
300

301
    // Reduction self-cycle?
302
    if (vtn == dependency && _vloop_analyzer.reductions().is_marked_reduction(n)) { continue; }
303

304
    if (vtn_dependencies.test_set(dependency->_idx)) { continue; }
305
    vtn->add_dependency(dependency); // Add every dependency only once per vtn.
306
  }
307
}
308

309

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.