24
#include "precompiled.hpp"
25
#include "opto/vtransform.hpp"
26
#include "opto/vectornode.hpp"
27
#include "opto/convertnode.hpp"
29
void VTransformGraph::add_vtnode(VTransformNode* vtnode) {
30
assert(vtnode->_idx == _vtnodes.length(), "position must match idx");
31
_vtnodes.push(vtnode);
46
bool VTransformGraph::schedule() {
47
assert(!is_scheduled(), "not yet scheduled");
50
if (_trace._verbose) {
56
GrowableArray<VTransformNode*> stack;
57
VectorSet pre_visited;
58
VectorSet post_visited;
60
collect_nodes_without_req_or_dependency(stack);
64
int rpo_idx = _vtnodes.length() - 1;
65
while (!stack.is_empty()) {
66
VTransformNode* vtn = stack.top();
67
if (!pre_visited.test_set(vtn->_idx)) {
69
} else if (!post_visited.test(vtn->_idx)) {
73
bool all_uses_already_visited = true;
75
for (int i = 0; i < vtn->outs(); i++) {
76
VTransformNode* use = vtn->out(i);
77
if (post_visited.test(use->_idx)) { continue; }
78
if (pre_visited.test(use->_idx)) {
85
NOT_PRODUCT(if (_trace._rejections) { trace_schedule_cycle(stack, pre_visited, post_visited); } )
89
all_uses_already_visited = false;
92
if (all_uses_already_visited) {
94
post_visited.set(vtn->_idx);
95
_schedule.at_put_grow(rpo_idx--, vtn);
103
if (_trace._verbose) {
108
assert(rpo_idx == -1, "used up all rpo_idx, rpo_idx=%d", rpo_idx);
113
void VTransformGraph::collect_nodes_without_req_or_dependency(GrowableArray<VTransformNode*>& stack) const {
114
for (int i = 0; i < _vtnodes.length(); i++) {
115
VTransformNode* vtn = _vtnodes.at(i);
116
if (!vtn->has_req_or_dependency()) {
123
void VTransformGraph::trace_schedule_cycle(const GrowableArray<VTransformNode*>& stack,
124
const VectorSet& pre_visited,
125
const VectorSet& post_visited) const {
126
tty->print_cr("\nVTransform::schedule found a cycle on path (P), vectorization attempt fails.");
127
for (int j = 0; j < stack.length(); j++) {
128
VTransformNode* n = stack.at(j);
129
bool on_path = pre_visited.test(n->_idx) && !post_visited.test(n->_idx);
130
tty->print(" %s ", on_path ? "P" : "_");
135
void VTransformApplyResult::trace(VTransformNode* vtnode) const {
136
tty->print(" apply: ");
139
if (_node == nullptr) {
140
tty->print_cr("nullptr");
147
Node* VTransformNode::find_transformed_input(int i, const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
148
Node* n = vnode_idx_to_transformed_node.at(in(i)->_idx);
149
assert(n != nullptr, "must find input IR node");
153
VTransformApplyResult VTransformScalarNode::apply(const VLoopAnalyzer& vloop_analyzer,
154
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
156
return VTransformApplyResult::make_scalar(_node);
159
VTransformApplyResult VTransformReplicateNode::apply(const VLoopAnalyzer& vloop_analyzer,
160
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
161
Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
162
VectorNode* vn = VectorNode::scalar2vector(val, _vlen, _element_type);
163
register_new_node_from_vectorization(vloop_analyzer, vn, val);
164
return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
167
VTransformApplyResult VTransformConvI2LNode::apply(const VLoopAnalyzer& vloop_analyzer,
168
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
169
Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
170
Node* n = new ConvI2LNode(val);
171
register_new_node_from_vectorization(vloop_analyzer, n, val);
172
return VTransformApplyResult::make_scalar(n);
175
VTransformApplyResult VTransformShiftCountNode::apply(const VLoopAnalyzer& vloop_analyzer,
176
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
177
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
178
Node* shift_count_in = find_transformed_input(1, vnode_idx_to_transformed_node);
179
assert(shift_count_in->bottom_type()->isa_int(), "int type only for shift count");
183
Node* shift_count_masked = new AndINode(shift_count_in, phase->igvn().intcon(_mask));
184
register_new_node_from_vectorization(vloop_analyzer, shift_count_masked, shift_count_in);
186
VectorNode* vn = VectorNode::shift_count(_shift_opcode, shift_count_masked, _vlen, _element_bt);
187
register_new_node_from_vectorization(vloop_analyzer, vn, shift_count_in);
188
return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
192
VTransformApplyResult VTransformPopulateIndexNode::apply(const VLoopAnalyzer& vloop_analyzer,
193
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
194
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
195
Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
196
assert(val->is_Phi(), "expected to be iv");
197
assert(VectorNode::is_populate_index_supported(_element_bt), "should support");
198
const TypeVect* vt = TypeVect::make(_element_bt, _vlen);
199
VectorNode* vn = new PopulateIndexNode(val, phase->igvn().intcon(1), vt);
200
register_new_node_from_vectorization(vloop_analyzer, vn, val);
201
return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
204
VTransformApplyResult VTransformElementWiseVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
205
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
206
Node* first = nodes().at(0);
207
uint vlen = nodes().length();
208
int opc = first->Opcode();
209
BasicType bt = vloop_analyzer.types().velt_basic_type(first);
211
if (first->is_Cmp()) {
214
return VTransformApplyResult::make_empty();
217
assert(2 <= req() && req() <= 4, "Must have 1-3 inputs");
218
VectorNode* vn = nullptr;
219
Node* in1 = find_transformed_input(1, vnode_idx_to_transformed_node);
220
Node* in2 = (req() >= 3) ? find_transformed_input(2, vnode_idx_to_transformed_node) : nullptr;
221
Node* in3 = (req() >= 4) ? find_transformed_input(3, vnode_idx_to_transformed_node) : nullptr;
223
if (first->is_CMove()) {
224
assert(req() == 4, "three inputs expected: mask, blend1, blend2");
225
vn = new VectorBlendNode( in2, in3, in1);
226
} else if (VectorNode::is_convert_opcode(opc)) {
227
assert(first->req() == 2 && req() == 2, "only one input expected");
228
int vopc = VectorCastNode::opcode(opc, in1->bottom_type()->is_vect()->element_basic_type());
229
vn = VectorCastNode::make(vopc, in1, bt, vlen);
230
} else if (VectorNode::can_use_RShiftI_instead_of_URShiftI(first, bt)) {
232
vn = VectorNode::make(opc, in1, in2, vlen, bt);
233
} else if (VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(opc)) {
236
VectorNode* long_vn = VectorNode::make(opc, in1, nullptr, vlen, T_LONG);
237
register_new_node_from_vectorization(vloop_analyzer, long_vn, first);
239
vn = VectorCastNode::make(Op_VectorCastL2X, long_vn, T_INT, vlen);
240
} else if (req() == 3 ||
241
VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(opc)) {
242
assert(!VectorNode::is_roundopD(first) || in2->is_Con(), "rounding mode must be constant");
243
vn = VectorNode::make(opc, in1, in2, vlen, bt);
245
assert(req() == 4, "three inputs expected");
246
assert(opc == Op_FmaD ||
250
"element wise operation must be from this list");
251
vn = VectorNode::make(opc, in1, in2, in3, vlen, bt);
254
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
255
return VTransformApplyResult::make_vector(vn, vlen, vn->length_in_bytes());
258
VTransformApplyResult VTransformBoolVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
259
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
260
BoolNode* first = nodes().at(0)->as_Bool();
261
uint vlen = nodes().length();
262
BasicType bt = vloop_analyzer.types().velt_basic_type(first);
265
VTransformElementWiseVectorNode* vtn_cmp = in(1)->isa_ElementWiseVector();
266
assert(vtn_cmp != nullptr && vtn_cmp->nodes().at(0)->is_Cmp(),
267
"bool vtn expects cmp vtn as input");
269
Node* cmp_in1 = vtn_cmp->find_transformed_input(1, vnode_idx_to_transformed_node);
270
Node* cmp_in2 = vtn_cmp->find_transformed_input(2, vnode_idx_to_transformed_node);
271
BoolTest::mask mask = test()._mask;
273
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
274
ConINode* mask_node = phase->igvn().intcon((int)mask);
275
const TypeVect* vt = TypeVect::make(bt, vlen);
276
VectorNode* vn = new VectorMaskCmpNode(mask, cmp_in1, cmp_in2, mask_node, vt);
277
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
278
return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes());
281
VTransformApplyResult VTransformReductionVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
282
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
283
Node* first = nodes().at(0);
284
uint vlen = nodes().length();
285
int opc = first->Opcode();
286
BasicType bt = first->bottom_type()->basic_type();
288
Node* init = find_transformed_input(1, vnode_idx_to_transformed_node);
289
Node* vec = find_transformed_input(2, vnode_idx_to_transformed_node);
291
ReductionNode* vn = ReductionNode::make(opc, nullptr, init, vec, bt);
292
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
293
return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes());
296
VTransformApplyResult VTransformLoadVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
297
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
298
LoadNode* first = nodes().at(0)->as_Load();
299
uint vlen = nodes().length();
300
Node* ctrl = first->in(MemNode::Control);
301
Node* mem = first->in(MemNode::Memory);
302
Node* adr = first->in(MemNode::Address);
303
int opc = first->Opcode();
304
const TypePtr* adr_type = first->adr_type();
305
BasicType bt = vloop_analyzer.types().velt_basic_type(first);
310
while (mem->is_StoreVector()) {
311
VPointer p_store(mem->as_Mem(), vloop_analyzer.vloop());
312
if (p_store.overlap_possible_with_any_in(nodes())) {
315
mem = mem->in(MemNode::Memory);
319
LoadVectorNode* vn = LoadVectorNode::make(opc, ctrl, mem, adr, adr_type, vlen, bt,
320
control_dependency());
321
DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } )
322
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
323
return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size());
326
VTransformApplyResult VTransformStoreVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
327
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
328
StoreNode* first = nodes().at(0)->as_Store();
329
uint vlen = nodes().length();
330
Node* ctrl = first->in(MemNode::Control);
331
Node* mem = first->in(MemNode::Memory);
332
Node* adr = first->in(MemNode::Address);
333
int opc = first->Opcode();
334
const TypePtr* adr_type = first->adr_type();
336
Node* value = find_transformed_input(MemNode::ValueIn, vnode_idx_to_transformed_node);
337
StoreVectorNode* vn = StoreVectorNode::make(opc, ctrl, mem, adr, adr_type, value, vlen);
338
DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } )
339
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
340
return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size());
343
void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes(const VLoopAnalyzer& vloop_analyzer, Node* vn) const {
344
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
345
Node* first = nodes().at(0);
347
register_new_node_from_vectorization(vloop_analyzer, vn, first);
349
for (int i = 0; i < _nodes.length(); i++) {
350
Node* n = _nodes.at(i);
351
phase->igvn().replace_node(n, vn);
355
void VTransformNode::register_new_node_from_vectorization(const VLoopAnalyzer& vloop_analyzer, Node* vn, Node* old_node) const {
356
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
357
phase->register_new_node_with_ctrl_of(vn, old_node);
358
phase->igvn()._worklist.push(vn);
359
VectorNode::trace_new_vector(vn, "AutoVectorization");
363
void VTransformGraph::print_vtnodes() const {
364
tty->print_cr("\nVTransformGraph::print_vtnodes:");
365
for (int i = 0; i < _vtnodes.length(); i++) {
366
_vtnodes.at(i)->print();
370
void VTransformGraph::print_schedule() const {
371
tty->print_cr("\nVTransformGraph::print_schedule:");
372
for (int i = 0; i < _schedule.length(); i++) {
373
tty->print(" %3d: ", i);
374
VTransformNode* vtn = _schedule.at(i);
375
if (vtn == nullptr) {
376
tty->print_cr("nullptr");
383
void VTransformGraph::print_memops_schedule() const {
384
tty->print_cr("\nVTransformGraph::print_memops_schedule:");
386
for_each_memop_in_schedule([&] (MemNode* mem) {
387
tty->print(" %3d: ", i++);
392
void VTransformNode::print() const {
393
tty->print("%3d %s (", _idx, name());
394
for (uint i = 0; i < _req; i++) {
395
print_node_idx(_in.at(i));
397
if ((uint)_in.length() > _req) {
399
for (int i = _req; i < _in.length(); i++) {
400
print_node_idx(_in.at(i));
404
for (int i = 0; i < _out.length(); i++) {
405
print_node_idx(_out.at(i));
412
void VTransformNode::print_node_idx(const VTransformNode* vtn) {
413
if (vtn == nullptr) {
416
tty->print(" %d", vtn->_idx);
420
void VTransformScalarNode::print_spec() const {
421
tty->print("node[%d %s]", _node->_idx, _node->Name());
424
void VTransformReplicateNode::print_spec() const {
425
tty->print("vlen=%d element_type=", _vlen);
426
_element_type->dump();
429
void VTransformShiftCountNode::print_spec() const {
430
tty->print("vlen=%d element_bt=%s mask=%d shift_opcode=%s",
431
_vlen, type2name(_element_bt), _mask,
432
NodeClassNames[_shift_opcode]);
435
void VTransformPopulateIndexNode::print_spec() const {
436
tty->print("vlen=%d element_bt=%s", _vlen, type2name(_element_bt));
439
void VTransformVectorNode::print_spec() const {
440
tty->print("%d-pack[", _nodes.length());
441
for (int i = 0; i < _nodes.length(); i++) {
442
Node* n = _nodes.at(i);
446
tty->print("%d %s", n->_idx, n->Name());