2
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
24
#include "precompiled.hpp"
25
#include "opto/superwordVTransformBuilder.hpp"
26
#include "opto/vectornode.hpp"
28
void SuperWordVTransformBuilder::build() {
29
assert(!_packset.is_empty(), "must have non-empty packset");
30
assert(!_vtransform.has_graph(), "start with empty vtransform");
32
// Create vtnodes for all nodes in the loop.
33
build_vector_vtnodes_for_packed_nodes();
34
build_scalar_vtnodes_for_non_packed_nodes();
36
// Connect all vtnodes with their inputs. Possibly create vtnodes for input
37
// nodes that are outside the loop.
38
VectorSet vtn_dependencies; // Shared, but cleared for every vtnode.
39
build_inputs_for_vector_vtnodes(vtn_dependencies);
40
build_inputs_for_scalar_vtnodes(vtn_dependencies);
43
void SuperWordVTransformBuilder::build_vector_vtnodes_for_packed_nodes() {
44
for (int i = 0; i < _packset.length(); i++) {
45
Node_List* pack = _packset.at(i);
46
VTransformVectorNode* vtn = make_vector_vtnode_for_pack(pack);
47
for (uint k = 0; k < pack->size(); k++) {
48
map_node_to_vtnode(pack->at(k), vtn);
53
void SuperWordVTransformBuilder::build_scalar_vtnodes_for_non_packed_nodes() {
54
for (int i = 0; i < _vloop_analyzer.body().body().length(); i++) {
55
Node* n = _vloop_analyzer.body().body().at(i);
56
if (_packset.get_pack(n) != nullptr) { continue; }
57
VTransformScalarNode* vtn = new (_vtransform.arena()) VTransformScalarNode(_vtransform, n);
58
map_node_to_vtnode(n, vtn);
62
void SuperWordVTransformBuilder::build_inputs_for_vector_vtnodes(VectorSet& vtn_dependencies) {
63
for (int i = 0; i < _packset.length(); i++) {
64
Node_List* pack = _packset.at(i);
65
Node* p0 = pack->at(0);
67
VTransformVectorNode* vtn = get_vtnode(p0)->isa_Vector();
68
assert(vtn != nullptr, "all packs must have vector vtnodes");
69
vtn_dependencies.clear(); // Add every dependency only once per vtn.
72
set_req_with_scalar(p0, vtn, vtn_dependencies, MemNode::Address);
73
} else if (p0->is_Store()) {
74
set_req_with_scalar(p0, vtn, vtn_dependencies, MemNode::Address);
75
set_req_with_vector(pack, vtn, vtn_dependencies, MemNode::ValueIn);
76
} else if (vtn->isa_ReductionVector() != nullptr) {
77
set_req_with_scalar(p0, vtn, vtn_dependencies, 1); // scalar init
78
set_req_with_vector(pack, vtn, vtn_dependencies, 2); // vector
80
assert(vtn->isa_ElementWiseVector() != nullptr, "all other vtnodes are handled above");
81
if (VectorNode::is_scalar_rotate(p0) &&
82
p0->in(2)->is_Con() &&
83
Matcher::supports_vector_constant_rotates(p0->in(2)->get_int())) {
84
set_req_with_vector(pack, vtn, vtn_dependencies, 1);
85
set_req_with_scalar(p0, vtn, vtn_dependencies, 2); // constant rotation
86
} else if (VectorNode::is_roundopD(p0)) {
87
set_req_with_vector(pack, vtn, vtn_dependencies, 1);
88
set_req_with_scalar(p0, vtn, vtn_dependencies, 2); // constant rounding mode
89
} else if (p0->is_CMove()) {
90
// Cmp + Bool + CMove -> VectorMaskCmp + VectorBlend.
91
set_all_req_with_vectors(pack, vtn, vtn_dependencies);
92
VTransformBoolVectorNode* vtn_mask_cmp = vtn->in(1)->isa_BoolVector();
93
if (vtn_mask_cmp->test()._is_negated) {
94
vtn->swap_req(2, 3); // swap if test was negated.
97
set_all_req_with_vectors(pack, vtn, vtn_dependencies);
101
for (uint k = 0; k < pack->size(); k++) {
102
add_dependencies_of_node_to_vtnode(pack->at(k), vtn, vtn_dependencies);
107
void SuperWordVTransformBuilder::build_inputs_for_scalar_vtnodes(VectorSet& vtn_dependencies) {
108
for (int i = 0; i < _vloop_analyzer.body().body().length(); i++) {
109
Node* n = _vloop_analyzer.body().body().at(i);
110
VTransformScalarNode* vtn = get_vtnode(n)->isa_Scalar();
111
if (vtn == nullptr) { continue; }
112
vtn_dependencies.clear(); // Add every dependency only once per vtn.
115
set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::Address);
116
} else if (n->is_Store()) {
117
set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::Address);
118
set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::ValueIn);
119
} else if (n->is_CountedLoop()) {
120
continue; // Is "root", has no dependency.
121
} else if (n->is_Phi()) {
122
// CountedLoop Phi's: ignore backedge (and entry value).
123
assert(n->in(0) == _vloop.cl(), "only Phi's from the CountedLoop allowed");
124
set_req_with_scalar(n, vtn, vtn_dependencies, 0);
127
set_all_req_with_scalars(n, vtn, vtn_dependencies);
130
add_dependencies_of_node_to_vtnode(n, vtn, vtn_dependencies);
134
// Create a vtnode for each pack. No in/out edges set yet.
135
VTransformVectorNode* SuperWordVTransformBuilder::make_vector_vtnode_for_pack(const Node_List* pack) const {
136
uint pack_size = pack->size();
137
Node* p0 = pack->at(0);
138
int opc = p0->Opcode();
139
VTransformVectorNode* vtn = nullptr;
142
vtn = new (_vtransform.arena()) VTransformLoadVectorNode(_vtransform, pack_size);
143
} else if (p0->is_Store()) {
144
vtn = new (_vtransform.arena()) VTransformStoreVectorNode(_vtransform, pack_size);
145
} else if (p0->is_Bool()) {
146
VTransformBoolTest kind = _packset.get_bool_test(pack);
147
vtn = new (_vtransform.arena()) VTransformBoolVectorNode(_vtransform, pack_size, kind);
148
} else if (_vloop_analyzer.reductions().is_marked_reduction(p0)) {
149
vtn = new (_vtransform.arena()) VTransformReductionVectorNode(_vtransform, pack_size);
150
} else if (VectorNode::is_muladds2i(p0)) {
151
// A special kind of binary element-wise vector op: the inputs are "ints" a and b,
152
// but reinterpreted as two "shorts" [a0, a1] and [b0, b1]:
153
// v = MulAddS2I(a, b) = a0 * b0 + a1 + b1
154
assert(p0->req() == 5, "MulAddS2I should have 4 operands");
155
vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, 3, pack_size);
157
assert(p0->req() == 3 ||
159
VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(opc) ||
160
VectorNode::is_convert_opcode(opc) ||
161
VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(opc) ||
166
"pack type must be in this list");
167
vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, p0->req(), pack_size);
169
vtn->set_nodes(pack);
173
void SuperWordVTransformBuilder::set_req_with_scalar(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies, const int index) {
174
VTransformNode* req = get_vtnode_or_wrap_as_input_scalar(n->in(index));
175
vtn->set_req(index, req);
176
vtn_dependencies.set(req->_idx);
179
// Either get the existing vtnode vector input (when input is a pack), or else make a
180
// new vector vtnode for the input (e.g. for Replicate or PopulateIndex).
181
VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_index(const Node_List* pack, const int index) {
182
Node* p0 = pack->at(0);
184
Node_List* pack_in = _packset.pack_input_at_index_or_null(pack, index);
185
if (pack_in != nullptr) {
186
// Input is a matching pack -> vtnode already exists.
187
assert(index != 2 || !VectorNode::is_shift(p0), "shift's count cannot be vector");
188
return get_vtnode(pack_in->at(0));
191
if (VectorNode::is_muladds2i(p0)) {
192
assert(_packset.is_muladds2i_pack_with_pack_inputs(pack), "inputs must all be packs");
193
// All inputs are strided (stride = 2), either with offset 0 or 1.
194
Node_List* pack_in0 = _packset.strided_pack_input_at_index_or_null(pack, index, 2, 0);
195
if (pack_in0 != nullptr) {
196
return get_vtnode(pack_in0->at(0));
198
Node_List* pack_in1 = _packset.strided_pack_input_at_index_or_null(pack, index, 2, 1);
199
if (pack_in1 != nullptr) {
200
return get_vtnode(pack_in1->at(0));
204
Node* same_input = _packset.same_inputs_at_index_or_null(pack, index);
205
if (same_input == nullptr && p0->in(index) == _vloop.iv()) {
206
// PopulateIndex: [iv+0, iv+1, iv+2, ...]
207
VTransformNode* iv_vtn = get_vtnode_or_wrap_as_input_scalar(_vloop.iv());
208
BasicType p0_bt = _vloop_analyzer.types().velt_basic_type(p0);
209
// If we have subword type, take that type directly. If p0 is some ConvI2L/F/D,
210
// then the p0_bt can also be L/F/D but we need to produce ints for the input of
212
BasicType element_bt = is_subword_type(p0_bt) ? p0_bt : T_INT;
213
VTransformNode* populate_index = new (_vtransform.arena()) VTransformPopulateIndexNode(_vtransform, pack->size(), element_bt);
214
populate_index->set_req(1, iv_vtn);
215
return populate_index;
218
if (same_input != nullptr) {
219
VTransformNode* same_input_vtn = get_vtnode_or_wrap_as_input_scalar(same_input);
220
if (index == 2 && VectorNode::is_shift(p0)) {
221
// Scalar shift count for vector shift operation: vec2 = shiftV(vec1, scalar_count)
222
// Scalar shift operations masks the shift count, but the vector shift does not, so
223
// create a special ShiftCount node.
224
BasicType element_bt = _vloop_analyzer.types().velt_basic_type(p0);
225
juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
226
VTransformNode* shift_count = new (_vtransform.arena()) VTransformShiftCountNode(_vtransform, pack->size(), element_bt, mask, p0->Opcode());
227
shift_count->set_req(1, same_input_vtn);
230
// Replicate the scalar same_input to every vector element.
231
const Type* element_type = _vloop_analyzer.types().velt_type(p0);
232
if (index == 2 && VectorNode::is_scalar_rotate(p0) && element_type->isa_long()) {
233
// Scalar rotate has int rotation value, but the scalar rotate expects longs.
234
assert(same_input->bottom_type()->isa_int(), "scalar rotate expects int rotation");
235
VTransformNode* conv = new (_vtransform.arena()) VTransformConvI2LNode(_vtransform);
236
conv->set_req(1, same_input_vtn);
237
same_input_vtn = conv;
239
VTransformNode* replicate = new (_vtransform.arena()) VTransformReplicateNode(_vtransform, pack->size(), element_type);
240
replicate->set_req(1, same_input_vtn);
245
// The input is neither a pack not a same_input node. SuperWord::profitable does not allow
246
// any other case. In the future, we could insert a PackNode.
248
tty->print_cr("\nSuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_index: index=%d", index);
250
assert(false, "Pack input was neither a pack nor a same_input node");
252
ShouldNotReachHere();
255
VTransformNode* SuperWordVTransformBuilder::get_vtnode_or_wrap_as_input_scalar(Node* n) {
256
VTransformNode* vtn = get_vtnode_or_null(n);
257
if (vtn != nullptr) { return vtn; }
259
assert(!_vloop.in_bb(n), "only nodes outside the loop can be input nodes to the loop");
260
vtn = new (_vtransform.arena()) VTransformInputScalarNode(_vtransform, n);
261
map_node_to_vtnode(n, vtn);
265
void SuperWordVTransformBuilder::set_req_with_vector(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies, int j) {
266
VTransformNode* req = get_or_make_vtnode_vector_input_at_index(pack, j);
267
vtn->set_req(j, req);
268
vtn_dependencies.set(req->_idx);
271
void SuperWordVTransformBuilder::set_all_req_with_scalars(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies) {
272
assert(vtn->req() == n->req(), "scalars must have same number of reqs");
273
for (uint j = 0; j < n->req(); j++) {
274
Node* def = n->in(j);
275
if (def == nullptr) { continue; }
276
set_req_with_scalar(n, vtn, vtn_dependencies, j);
280
void SuperWordVTransformBuilder::set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies) {
281
Node* p0 = pack->at(0);
282
assert(vtn->req() <= p0->req(), "must have at at most as many reqs");
283
// Vectors have no ctrl, so ignore it.
284
for (uint j = 1; j < vtn->req(); j++) {
285
Node* def = p0->in(j);
286
if (def == nullptr) { continue; }
287
set_req_with_vector(pack, vtn, vtn_dependencies, j);
291
void SuperWordVTransformBuilder::add_dependencies_of_node_to_vtnode(Node*n, VTransformNode* vtn, VectorSet& vtn_dependencies) {
292
for (VLoopDependencyGraph::PredsIterator preds(_vloop_analyzer.dependency_graph(), n); !preds.done(); preds.next()) {
293
Node* pred = preds.current();
294
if (!_vloop.in_bb(pred)) { continue; }
296
// Only add memory dependencies to memory nodes. All others are taken care of with the req.
297
if (n->is_Mem() && !pred->is_Mem()) { continue; }
299
VTransformNode* dependency = get_vtnode(pred);
301
// Reduction self-cycle?
302
if (vtn == dependency && _vloop_analyzer.reductions().is_marked_reduction(n)) { continue; }
304
if (vtn_dependencies.test_set(dependency->_idx)) { continue; }
305
vtn->add_dependency(dependency); // Add every dependency only once per vtn.