jdk

Форк
0
953 строки · 33.9 Кб
1
/*
2
 * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 *
23
 */
24

25
#include "precompiled.hpp"
26
#include "compiler/oopMap.hpp"
27
#include "memory/allocation.inline.hpp"
28
#include "memory/resourceArea.hpp"
29
#include "opto/addnode.hpp"
30
#include "opto/block.hpp"
31
#include "opto/callnode.hpp"
32
#include "opto/cfgnode.hpp"
33
#include "opto/chaitin.hpp"
34
#include "opto/coalesce.hpp"
35
#include "opto/indexSet.hpp"
36
#include "opto/machnode.hpp"
37
#include "opto/memnode.hpp"
38
#include "opto/opcodes.hpp"
39

40
#include <fenv.h>
41

42
PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
43
}
44

45
void PhaseIFG::init( uint maxlrg ) {
46
  _maxlrg = maxlrg;
47
  _yanked = new (_arena) VectorSet(_arena);
48
  _is_square = false;
49
  // Make uninitialized adjacency lists
50
  _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
51
  // Also make empty live range structures
52
  _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
53
  memset((void*)_lrgs,0,sizeof(LRG)*maxlrg);
54
  // Init all to empty
55
  for( uint i = 0; i < maxlrg; i++ ) {
56
    _adjs[i].initialize(maxlrg);
57
    _lrgs[i].Set_All();
58
  }
59
}
60

61
// Add edge between vertices a & b.  These are sorted (triangular matrix),
62
// then the smaller number is inserted in the larger numbered array.
63
int PhaseIFG::add_edge( uint a, uint b ) {
64
  lrgs(a).invalid_degree();
65
  lrgs(b).invalid_degree();
66
  // Sort a and b, so that a is bigger
67
  assert( !_is_square, "only on triangular" );
68
  if( a < b ) { uint tmp = a; a = b; b = tmp; }
69
  return _adjs[a].insert( b );
70
}
71

72
// Is there an edge between a and b?
73
int PhaseIFG::test_edge( uint a, uint b ) const {
74
  // Sort a and b, so that a is larger
75
  assert( !_is_square, "only on triangular" );
76
  if( a < b ) { uint tmp = a; a = b; b = tmp; }
77
  return _adjs[a].member(b);
78
}
79

80
// Convert triangular matrix to square matrix
81
void PhaseIFG::SquareUp() {
82
  assert( !_is_square, "only on triangular" );
83

84
  // Simple transpose
85
  for(uint i = 0; i < _maxlrg; i++ ) {
86
    if (!_adjs[i].is_empty()) {
87
      IndexSetIterator elements(&_adjs[i]);
88
      uint datum;
89
      while ((datum = elements.next()) != 0) {
90
        _adjs[datum].insert(i);
91
      }
92
    }
93
  }
94
  _is_square = true;
95
}
96

97
// Compute effective degree in bulk
98
void PhaseIFG::Compute_Effective_Degree() {
99
  assert( _is_square, "only on square" );
100

101
  for( uint i = 0; i < _maxlrg; i++ )
102
    lrgs(i).set_degree(effective_degree(i));
103
}
104

105
int PhaseIFG::test_edge_sq( uint a, uint b ) const {
106
  assert( _is_square, "only on square" );
107
  // Swap, so that 'a' has the lesser count.  Then binary search is on
108
  // the smaller of a's list and b's list.
109
  if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
110
  //return _adjs[a].unordered_member(b);
111
  return _adjs[a].member(b);
112
}
113

114
// Union edges of B into A
115
void PhaseIFG::Union(uint a, uint b) {
116
  assert( _is_square, "only on square" );
117
  IndexSet *A = &_adjs[a];
118
  if (!_adjs[b].is_empty()) {
119
    IndexSetIterator b_elements(&_adjs[b]);
120
    uint datum;
121
    while ((datum = b_elements.next()) != 0) {
122
      if (A->insert(datum)) {
123
        _adjs[datum].insert(a);
124
        lrgs(a).invalid_degree();
125
        lrgs(datum).invalid_degree();
126
      }
127
    }
128
  }
129
}
130

131
// Yank a Node and all connected edges from the IFG.  Return a
132
// list of neighbors (edges) yanked.
133
IndexSet *PhaseIFG::remove_node( uint a ) {
134
  assert( _is_square, "only on square" );
135
  assert( !_yanked->test(a), "" );
136
  _yanked->set(a);
137

138
  // I remove the LRG from all neighbors.
139
  LRG &lrg_a = lrgs(a);
140

141
  if (!_adjs[a].is_empty()) {
142
    IndexSetIterator elements(&_adjs[a]);
143
    uint datum;
144
    while ((datum = elements.next()) != 0) {
145
      _adjs[datum].remove(a);
146
      lrgs(datum).inc_degree(-lrg_a.compute_degree(lrgs(datum)));
147
    }
148
  }
149
  return neighbors(a);
150
}
151

152
// Re-insert a yanked Node.
153
void PhaseIFG::re_insert(uint a) {
154
  assert( _is_square, "only on square" );
155
  assert( _yanked->test(a), "" );
156
  _yanked->remove(a);
157

158
  if (_adjs[a].is_empty()) return;
159

160
  IndexSetIterator elements(&_adjs[a]);
161
  uint datum;
162
  while ((datum = elements.next()) != 0) {
163
    _adjs[datum].insert(a);
164
    lrgs(datum).invalid_degree();
165
  }
166
}
167

168
// Compute the degree between 2 live ranges.  If both live ranges are
169
// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
170
// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
171
// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
172
// this is so.
173
int LRG::compute_degree(LRG &l) const {
174
  int tmp;
175
  int num_regs = _num_regs;
176
  int nregs = l.num_regs();
177
  tmp =  (_fat_proj || l._fat_proj)     // either is a fat-proj?
178
    ? (num_regs * nregs)                // then use product
179
    : MAX2(num_regs,nregs);             // else use max
180
  return tmp;
181
}
182

183
// Compute effective degree for this live range.  If both live ranges are
184
// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
185
// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
186
// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
187
// this is so.
188
int PhaseIFG::effective_degree(uint lidx) const {
189
  IndexSet *s = neighbors(lidx);
190
  if (s->is_empty()) return 0;
191
  int eff = 0;
192
  int num_regs = lrgs(lidx).num_regs();
193
  int fat_proj = lrgs(lidx)._fat_proj;
194
  IndexSetIterator elements(s);
195
  uint nidx;
196
  while ((nidx = elements.next()) != 0) {
197
    LRG &lrgn = lrgs(nidx);
198
    int nregs = lrgn.num_regs();
199
    eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
200
      ? (num_regs * nregs)              // then use product
201
      : MAX2(num_regs,nregs);           // else use max
202
  }
203
  return eff;
204
}
205

206

207
#ifndef PRODUCT
208
void PhaseIFG::dump() const {
209
  tty->print_cr("-- Interference Graph --%s--",
210
                _is_square ? "square" : "triangular" );
211
  if (_is_square) {
212
    for (uint i = 0; i < _maxlrg; i++) {
213
      tty->print(_yanked->test(i) ? "XX " : "  ");
214
      tty->print("L%d: { ",i);
215
      if (!_adjs[i].is_empty()) {
216
        IndexSetIterator elements(&_adjs[i]);
217
        uint datum;
218
        while ((datum = elements.next()) != 0) {
219
          tty->print("L%d ", datum);
220
        }
221
      }
222
      tty->print_cr("}");
223

224
    }
225
    return;
226
  }
227

228
  // Triangular
229
  for( uint i = 0; i < _maxlrg; i++ ) {
230
    uint j;
231
    tty->print(_yanked->test(i) ? "XX " : "  ");
232
    tty->print("L%d: { ",i);
233
    for( j = _maxlrg; j > i; j-- )
234
      if( test_edge(j - 1,i) ) {
235
        tty->print("L%d ",j - 1);
236
      }
237
    tty->print("| ");
238
    if (!_adjs[i].is_empty()) {
239
      IndexSetIterator elements(&_adjs[i]);
240
      uint datum;
241
      while ((datum = elements.next()) != 0) {
242
        tty->print("L%d ", datum);
243
      }
244
    }
245
    tty->print("}\n");
246
  }
247
  tty->print("\n");
248
}
249

250
void PhaseIFG::stats() const {
251
  ResourceMark rm;
252
  int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
253
  memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
254
  uint i;
255
  for( i = 0; i < _maxlrg; i++ ) {
256
    h_cnt[neighbor_cnt(i)]++;
257
  }
258
  tty->print_cr("--Histogram of counts--");
259
  for( i = 0; i < _maxlrg*2; i++ )
260
    if( h_cnt[i] )
261
      tty->print("%d/%d ",i,h_cnt[i]);
262
  tty->cr();
263
}
264

265
void PhaseIFG::verify( const PhaseChaitin *pc ) const {
266
  // IFG is square, sorted and no need for Find
267
  for( uint i = 0; i < _maxlrg; i++ ) {
268
    assert(!_yanked->test(i) || !neighbor_cnt(i), "Is removed completely" );
269
    IndexSet *set = &_adjs[i];
270
    if (!set->is_empty()) {
271
      IndexSetIterator elements(set);
272
      uint idx;
273
      uint last = 0;
274
      while ((idx = elements.next()) != 0) {
275
        assert(idx != i, "Must have empty diagonal");
276
        assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
277
        assert(_adjs[idx].member(i), "IFG not square");
278
        assert(!_yanked->test(idx), "No yanked neighbors");
279
        assert(last < idx, "not sorted increasing");
280
        last = idx;
281
      }
282
    }
283
    assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
284
  }
285
}
286
#endif
287

288
/*
289
 * Interfere this register with everything currently live.
290
 * Check for interference by checking overlap of regmasks.
291
 * Only interfere if acceptable register masks overlap.
292
 */
293
void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) {
294
  if (!liveout->is_empty()) {
295
    LRG& lrg = lrgs(lid);
296
    const RegMask &rm = lrg.mask();
297
    IndexSetIterator elements(liveout);
298
    uint interfering_lid = elements.next();
299
    while (interfering_lid != 0) {
300
      LRG& interfering_lrg = lrgs(interfering_lid);
301
      if (rm.overlap(interfering_lrg.mask())) {
302
        _ifg->add_edge(lid, interfering_lid);
303
      }
304
      interfering_lid = elements.next();
305
    }
306
  }
307
}
308

309
// Actually build the interference graph.  Uses virtual registers only, no
310
// physical register masks.  This allows me to be very aggressive when
311
// coalescing copies.  Some of this aggressiveness will have to be undone
312
// later, but I'd rather get all the copies I can now (since unremoved copies
313
// at this point can end up in bad places).  Copies I re-insert later I have
314
// more opportunity to insert them in low-frequency locations.
315
void PhaseChaitin::build_ifg_virtual( ) {
316
  Compile::TracePhase tp("buildIFG_virt", &timers[_t_buildIFGvirtual]);
317

318
  // For all blocks (in any order) do...
319
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
320
    Block* block = _cfg.get_block(i);
321
    IndexSet* liveout = _live->live(block);
322

323
    // The IFG is built by a single reverse pass over each basic block.
324
    // Starting with the known live-out set, we remove things that get
325
    // defined and add things that become live (essentially executing one
326
    // pass of a standard LIVE analysis). Just before a Node defines a value
327
    // (and removes it from the live-ness set) that value is certainly live.
328
    // The defined value interferes with everything currently live.  The
329
    // value is then removed from the live-ness set and it's inputs are
330
    // added to the live-ness set.
331
    for (uint j = block->end_idx() + 1; j > 1; j--) {
332
      Node* n = block->get_node(j - 1);
333

334
      // Get value being defined
335
      uint r = _lrg_map.live_range_id(n);
336

337
      // Some special values do not allocate
338
      if (r) {
339

340
        // Remove from live-out set
341
        liveout->remove(r);
342

343
        // Copies do not define a new value and so do not interfere.
344
        // Remove the copies source from the liveout set before interfering.
345
        uint idx = n->is_Copy();
346
        if (idx != 0) {
347
          liveout->remove(_lrg_map.live_range_id(n->in(idx)));
348
        }
349

350
        // Interfere with everything live
351
        interfere_with_live(r, liveout);
352
      }
353

354
      // Make all inputs live
355
      if (!n->is_Phi()) {      // Phi function uses come from prior block
356
        for(uint k = 1; k < n->req(); k++) {
357
          liveout->insert(_lrg_map.live_range_id(n->in(k)));
358
        }
359
      }
360

361
      // 2-address instructions always have the defined value live
362
      // on entry to the instruction, even though it is being defined
363
      // by the instruction.  We pretend a virtual copy sits just prior
364
      // to the instruction and kills the src-def'd register.
365
      // In other words, for 2-address instructions the defined value
366
      // interferes with all inputs.
367
      uint idx;
368
      if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
369
        const MachNode *mach = n->as_Mach();
370
        // Sometimes my 2-address ADDs are commuted in a bad way.
371
        // We generally want the USE-DEF register to refer to the
372
        // loop-varying quantity, to avoid a copy.
373
        uint op = mach->ideal_Opcode();
374
        // Check that mach->num_opnds() == 3 to ensure instruction is
375
        // not subsuming constants, effectively excludes addI_cin_imm
376
        // Can NOT swap for instructions like addI_cin_imm since it
377
        // is adding zero to yhi + carry and the second ideal-input
378
        // points to the result of adding low-halves.
379
        // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
380
        if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
381
            n->in(1)->bottom_type()->base() == Type::Int &&
382
            // See if the ADD is involved in a tight data loop the wrong way
383
            n->in(2)->is_Phi() &&
384
            n->in(2)->in(2) == n ) {
385
          Node *tmp = n->in(1);
386
          n->set_req( 1, n->in(2) );
387
          n->set_req( 2, tmp );
388
        }
389
        // Defined value interferes with all inputs
390
        uint lidx = _lrg_map.live_range_id(n->in(idx));
391
        for (uint k = 1; k < n->req(); k++) {
392
          uint kidx = _lrg_map.live_range_id(n->in(k));
393
          if (kidx != lidx) {
394
            _ifg->add_edge(r, kidx);
395
          }
396
        }
397
      }
398
    } // End of forall instructions in block
399
  } // End of forall blocks
400
}
401

402
#ifdef ASSERT
403
uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
404
  if (liveout->is_empty()) {
405
    return 0;
406
  }
407
  IndexSetIterator elements(liveout);
408
  uint lidx = elements.next();
409
  uint cnt = 0;
410
  while (lidx != 0) {
411
    LRG& lrg = lrgs(lidx);
412
    if (lrg.mask_is_nonempty_and_up() &&
413
        !lrg.is_float_or_vector() &&
414
        (lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
415
         (Matcher::has_predicated_vectors() &&
416
          lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegVectMask])))) {
417
      cnt += lrg.reg_pressure();
418
    }
419
    lidx = elements.next();
420
  }
421
  return cnt;
422
}
423

424
uint PhaseChaitin::count_float_pressure(IndexSet* liveout) {
425
  if (liveout->is_empty()) {
426
    return 0;
427
  }
428
  IndexSetIterator elements(liveout);
429
  uint lidx = elements.next();
430
  uint cnt = 0;
431
  while (lidx != 0) {
432
    LRG& lrg = lrgs(lidx);
433
    if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) {
434
      cnt += lrg.reg_pressure();
435
    }
436
    lidx = elements.next();
437
  }
438
  return cnt;
439
}
440
#endif
441

442
/*
443
 * Adjust register pressure down by 1.  Capture last hi-to-low transition,
444
 */
445
void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) {
446
  if (lrg.mask_is_nonempty_and_up()) {
447
    if (lrg.is_float_or_vector()) {
448
      float_pressure.lower(lrg, location);
449
    } else {
450
      // Do not count the SP and flag registers
451
      const RegMask& r = lrg.mask();
452
      if (r.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
453
           (Matcher::has_predicated_vectors() &&
454
            r.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
455
        int_pressure.lower(lrg, location);
456
      }
457
    }
458
  }
459
  if (_scheduling_info_generated == false) {
460
    assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
461
    assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
462
  }
463
}
464

465
/* Go to the first non-phi index in a block */
466
static uint first_nonphi_index(Block* b) {
467
  uint i;
468
  uint end_idx = b->end_idx();
469
  for (i = 1; i < end_idx; i++) {
470
    Node* n = b->get_node(i);
471
    if (!n->is_Phi()) {
472
      break;
473
    }
474
  }
475
  return i;
476
}
477

478
/*
479
 * Spills could be inserted before a CreateEx node which should be the first
480
 * instruction in a block after Phi nodes. If so, move the CreateEx node up.
481
 */
482
static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) {
483
  for (uint i = first_inst; i < last_inst; i++) {
484
    Node* ex = b->get_node(i);
485
    if (ex->is_SpillCopy()) {
486
      continue;
487
    }
488

489
    if (i > first_inst &&
490
        ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
491
      b->remove_node(i);
492
      b->insert_node(ex, first_inst);
493
    }
494
    // Stop once a CreateEx or any other node is found
495
    break;
496
  }
497
}
498

499
/*
500
 * When new live ranges are live, we raise the register pressure
501
 */
502
void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) {
503
  if (lrg.mask_is_nonempty_and_up()) {
504
    if (lrg.is_float_or_vector()) {
505
      float_pressure.raise(lrg);
506
    } else {
507
      // Do not count the SP and flag registers
508
      const RegMask& rm = lrg.mask();
509
      if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
510
           (Matcher::has_predicated_vectors() &&
511
            rm.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
512
        int_pressure.raise(lrg);
513
      }
514
    }
515
  }
516
}
517

518

519
/*
520
 * Computes the initial register pressure of a block, looking at all live
521
 * ranges in the liveout. The register pressure is computed for both float
522
 * and int/pointer registers.
523
 * Live ranges in the liveout are presumed live for the whole block.
524
 * We add the cost for the whole block to the area of the live ranges initially.
525
 * If a live range gets killed in the block, we'll subtract the unused part of
526
 * the block from the area.
527
 */
528
void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) {
529
  if (!liveout->is_empty()) {
530
    IndexSetIterator elements(liveout);
531
    uint lid = elements.next();
532
    while (lid != 0) {
533
      LRG &lrg = lrgs(lid);
534
      lrg._area += cost;
535
      raise_pressure(b, lrg, int_pressure, float_pressure);
536
      lid = elements.next();
537
    }
538
  }
539
  assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
540
  assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
541
}
542

543
/*
544
* Computes the entry register pressure of a block, looking at all live
545
* ranges in the livein. The register pressure is computed for both float
546
* and int/pointer registers.
547
*/
548
void PhaseChaitin::compute_entry_block_pressure(Block* b) {
549
  IndexSet *livein = _live->livein(b);
550
  if (!livein->is_empty()) {
551
    IndexSetIterator elements(livein);
552
    uint lid = elements.next();
553
    while (lid != 0) {
554
      LRG &lrg = lrgs(lid);
555
      raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
556
      lid = elements.next();
557
    }
558
  }
559
  // Now check phis for locally defined inputs
560
  for (uint j = 0; j < b->number_of_nodes(); j++) {
561
    Node* n = b->get_node(j);
562
    if (n->is_Phi()) {
563
      for (uint k = 1; k < n->req(); k++) {
564
        Node* phi_in = n->in(k);
565
        // Because we are talking about phis, raise register pressure once for each
566
        // instance of a phi to account for a single value
567
        if (_cfg.get_block_for_node(phi_in) == b) {
568
          LRG& lrg = lrgs(phi_in->_idx);
569
          raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
570
          break;
571
        }
572
      }
573
    }
574
  }
575
  _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure());
576
  _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure());
577
}
578

579
/*
580
* Computes the exit register pressure of a block, looking at all live
581
* ranges in the liveout. The register pressure is computed for both float
582
* and int/pointer registers.
583
*/
584
void PhaseChaitin::compute_exit_block_pressure(Block* b) {
585

586
  IndexSet* livein = _live->live(b);
587
  _sched_int_pressure.set_current_pressure(0);
588
  _sched_float_pressure.set_current_pressure(0);
589
  if (!livein->is_empty()) {
590
    IndexSetIterator elements(livein);
591
    uint lid = elements.next();
592
    while (lid != 0) {
593
      LRG &lrg = lrgs(lid);
594
      raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
595
      lid = elements.next();
596
    }
597
  }
598
}
599

600
/*
601
 * Remove dead node if it's not used.
602
 * We only remove projection nodes if the node "defining" the projection is
603
 * dead, for example on x86, if we have a dead Add node we remove its
604
 * RFLAGS node.
605
 */
606
bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) {
607
  Node* def = n->in(0);
608
  if (!n->is_Proj() ||
609
      (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) {
610
    if (n->is_MachProj()) {
611
      // Don't remove KILL projections if their "defining" nodes have
612
      // memory effects (have SCMemProj projection node) -
613
      // they are not dead even when their result is not used.
614
      // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes.
615
      // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list)
616
      // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed
617
      // in block in such order that KILL MachProj nodes are processed first.
618
      if (def->has_out_with(Op_SCMemProj)) {
619
        return false;
620
      }
621
    }
622
    b->remove_node(location);
623
    LRG& lrg = lrgs(lid);
624
    if (lrg._def == n) {
625
      lrg._def = nullptr;
626
    }
627
    n->disconnect_inputs(C);
628
    _cfg.unmap_node_from_block(n);
629
    n->replace_by(C->top());
630
    return true;
631
  }
632
  return false;
633
}
634

635
/*
636
 * When encountering a fat projection, we might go from a low to high to low
637
 * (since the fat proj only lives at this instruction) going backwards in the
638
 * block. If we find a low to high transition, we record it.
639
 */
640
void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) {
641
  RegMask mask_tmp = lrg.mask();
642
  mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]);
643
  pressure.check_pressure_at_fatproj(location, mask_tmp);
644
}
645

646
/*
647
 * Insure high score for immediate-use spill copies so they get a color.
648
 * All single-use MachSpillCopy(s) that immediately precede their
649
 * use must color early.  If a longer live range steals their
650
 * color, the spill copy will split and may push another spill copy
651
 * further away resulting in an infinite spill-split-retry cycle.
652
 * Assigning a zero area results in a high score() and a good
653
 * location in the simplify list.
654
 */
655
void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) {
656
  if (n->is_SpillCopy() &&
657
      lrg.is_singledef() && // A multi defined live range can still split
658
      n->outcnt() == 1 &&   // and use must be in this block
659
      _cfg.get_block_for_node(n->unique_out()) == b) {
660

661
    Node* single_use = n->unique_out();
662
    assert(b->find_node(single_use) >= next_inst, "Use must be later in block");
663
    // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
664

665
    // Find first non SpillCopy 'm' that follows the current instruction
666
    // (current_inst - 1) is index for current instruction 'n'
667
    Node* m = n;
668
    for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) {
669
      m = b->get_node(i);
670
    }
671
    if (m == single_use) {
672
      lrg._area = 0.0;
673
    }
674
  }
675
}
676

677
/*
678
 * Copies do not define a new value and so do not interfere.
679
 * Remove the copies source from the liveout set before interfering.
680
 */
681
void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
682
  if (liveout->remove(lid_copy)) {
683
    LRG& lrg_copy = lrgs(lid_copy);
684
    lrg_copy._area -= cost;
685

686
    // Lower register pressure since copy and definition can share the same register
687
    lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure);
688
  }
689
}
690

691
/*
692
 * The defined value must go in a particular register. Remove that register from
693
 * all conflicting parties and avoid the interference.
694
 */
695
void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) {
696
  if (liveout->is_empty()) return;
697
  // Check for common case
698
  const RegMask& rm = lrg.mask();
699
  int r_size = lrg.num_regs();
700
  // Smear odd bits
701
  IndexSetIterator elements(liveout);
702
  uint l = elements.next();
703
  while (l != 0) {
704
    LRG& interfering_lrg = lrgs(l);
705
    // If 'l' must spill already, do not further hack his bits.
706
    // He'll get some interferences and be forced to spill later.
707
    if (interfering_lrg._must_spill) {
708
      l = elements.next();
709
      continue;
710
    }
711

712
    // Remove bound register(s) from 'l's choices
713
    RegMask old = interfering_lrg.mask();
714
    uint old_size = interfering_lrg.mask_size();
715

716
    // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no
717
    // longer interferes with 'rm'.  If 'l' requires aligned
718
    // adjacent pairs, subtract out bit pairs.
719
    assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity");
720

721
    if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) {
722
      RegMask r2mask = rm;
723
      // Leave only aligned set of bits.
724
      r2mask.smear_to_sets(interfering_lrg.num_regs());
725
      // It includes vector case.
726
      interfering_lrg.SUBTRACT(r2mask);
727
      interfering_lrg.compute_set_mask_size();
728
    } else if (r_size != 1) {
729
      // fat proj
730
      interfering_lrg.SUBTRACT(rm);
731
      interfering_lrg.compute_set_mask_size();
732
    } else {
733
      // Common case: size 1 bound removal
734
      OptoReg::Name r_reg = rm.find_first_elem();
735
      if (interfering_lrg.mask().Member(r_reg)) {
736
        interfering_lrg.Remove(r_reg);
737
        interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
738
      }
739
    }
740

741
    // If 'l' goes completely dry, it must spill.
742
    if (interfering_lrg.not_free()) {
743
      // Give 'l' some kind of reasonable mask, so it picks up
744
      // interferences (and will spill later).
745
      interfering_lrg.set_mask(old);
746
      interfering_lrg.set_mask_size(old_size);
747
      must_spill++;
748
      interfering_lrg._must_spill = 1;
749
      interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
750
    }
751
    l = elements.next();
752
  }
753
}
754

755
/*
756
 * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the
757
 * sole use of a StoreLConditional. While StoreLConditionals set memory (the
758
 * SCMemProj use) they also def flags; if that flag def is unused the allocator
759
 * sees a flag-setting instruction with no use of the flags and assumes it's
760
 * dead.  This keeps the (useless) flag-setting behavior alive while also
761
 * keeping the (useful) memory update effect.
762
 */
763
void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
764
  JVMState* jvms = n->jvms();
765
  uint debug_start = jvms ? jvms->debug_start() : 999999;
766

767
  for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
768
    Node* def = n->in(k);
769
    uint lid = _lrg_map.live_range_id(def);
770
    if (!lid) {
771
      continue;
772
    }
773
    LRG& lrg = lrgs(lid);
774

775
    // No use-side cost for spilling debug info
776
    if (k < debug_start) {
777
      // A USE costs twice block frequency (once for the Load, once
778
      // for a Load-delay).  Rematerialized uses only cost once.
779
      lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2));
780
    }
781

782
    if (liveout->insert(lid)) {
783
      // Newly live things assumed live from here to top of block
784
      lrg._area += cost;
785
      raise_pressure(b, lrg, int_pressure, float_pressure);
786
      assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
787
      assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
788
    }
789
    assert(lrg._area >= 0.0, "unexpected spill area value %g (rounding mode %x)", lrg._area, fegetround());
790
  }
791
}
792

793
/*
794
 * If we run off the top of the block with high pressure just record that the
795
 * whole block is high pressure. (Even though we might have a transition
796
 * later down in the block)
797
 */
798
void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) {
799
  // current pressure now means the pressure before the first instruction in the block
800
  // (since we have stepped through all instructions backwards)
801
  if (pressure.current_pressure() > pressure.high_pressure_limit()) {
802
    pressure.set_high_pressure_index_to_block_start();
803
  }
804
}
805

806
/*
807
 * Compute high pressure indice; avoid landing in the middle of projnodes
808
 * and set the high pressure index for the block
809
 */
810
void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) {
811
  uint i = pressure.high_pressure_index();
812
  if (i < b->number_of_nodes() && i < b->end_idx() + 1) {
813
    Node* cur = b->get_node(i);
814
    while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
815
      cur = b->get_node(--i);
816
    }
817
  }
818
  block_hrp_index = i;
819
}
820

821
void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) {
822
  if (str != nullptr) {
823
    tty->print_cr("#  *** %s ***", str);
824
  }
825
  tty->print_cr("#     start pressure is = %d", pressure.start_pressure());
826
  tty->print_cr("#     max pressure is = %d", pressure.final_pressure());
827
  tty->print_cr("#     end pressure is = %d", pressure.current_pressure());
828
  tty->print_cr("#");
829
}
830

831
/* Build an interference graph:
832
 *   That is, if 2 live ranges are simultaneously alive but in their acceptable
833
 *   register sets do not overlap, then they do not interfere. The IFG is built
834
 *   by a single reverse pass over each basic block. Starting with the known
835
 *   live-out set, we remove things that get defined and add things that become
836
 *   live (essentially executing one pass of a standard LIVE analysis). Just
837
 *   before a Node defines a value (and removes it from the live-ness set) that
838
 *   value is certainly live. The defined value interferes with everything
839
 *   currently live. The value is then removed from the live-ness set and it's
840
 *   inputs are added to the live-ness set.
841
 * Compute register pressure for each block:
842
 *   We store the biggest register pressure for each block and also the first
843
 *   low to high register pressure transition within the block (if any).
844
 */
845
uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
846
  Compile::TracePhase tp("buildIFG", &timers[_t_buildIFGphysical]);
847

848
  uint must_spill = 0;
849
  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
850
    Block* block = _cfg.get_block(i);
851

852
    // Clone (rather than smash in place) the liveout info, so it is alive
853
    // for the "collect_gc_info" phase later.
854
    IndexSet liveout(_live->live(block));
855

856
    uint first_inst = first_nonphi_index(block);
857
    uint last_inst = block->end_idx();
858

859
    move_exception_node_up(block, first_inst, last_inst);
860

861
    Pressure int_pressure(last_inst + 1, Matcher::int_pressure_limit());
862
    Pressure float_pressure(last_inst + 1, Matcher::float_pressure_limit());
863
    block->_reg_pressure = 0;
864
    block->_freg_pressure = 0;
865

866
    int inst_count = last_inst - first_inst;
867
    double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
868
    assert(cost >= 0.0, "negative spill cost" );
869

870
    compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost);
871

872
    for (uint location = last_inst; location > 0; location--) {
873
      Node* n = block->get_node(location);
874
      uint lid = _lrg_map.live_range_id(n);
875

876
      if (lid) {
877
        LRG& lrg = lrgs(lid);
878

879
        // A DEF normally costs block frequency; rematerialized values are
880
        // removed from the DEF sight, so LOWER costs here.
881
        lrg._cost += n->rematerialize() ? 0 : block->_freq;
882

883
        if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) {
884
          if (remove_node_if_not_used(block, location, n, lid, &liveout)) {
885
            float_pressure.lower_high_pressure_index();
886
            int_pressure.lower_high_pressure_index();
887
            continue;
888
          }
889
          if (lrg._fat_proj) {
890
            check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI);
891
            check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD);
892
          }
893
        } else {
894
          // A live range ends at its definition, remove the remaining area.
895
          // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf,
896
          // and +Inf - +Inf = NaN. So let's not do that subtraction.
897
          if (g_isfinite(cost)) {
898
            lrg._area -= cost;
899
          }
900
          assert(lrg._area >= 0.0, "unexpected spill area value %g (rounding mode %x)", lrg._area, fegetround());
901

902
          assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst);
903

904
          if (liveout.remove(lid)) {
905
            lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure);
906
          }
907
          uint copy_idx = n->is_Copy();
908
          if (copy_idx) {
909
            uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx));
910
            remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure);
911
          }
912
        }
913

914
        // Since rematerializable DEFs are not bound but the live range is,
915
        // some uses must be bound. If we spill live range 'r', it can
916
        // rematerialize at each use site according to its bindings.
917
        if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) {
918
          remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill);
919
        }
920
        interfere_with_live(lid, &liveout);
921
      }
922

923
      // Area remaining in the block
924
      inst_count--;
925
      cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
926

927
      if (!n->is_Phi()) {
928
        add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure);
929
      }
930
    }
931

932
    check_for_high_pressure_block(int_pressure);
933
    check_for_high_pressure_block(float_pressure);
934
    adjust_high_pressure_index(block, block->_ihrp_index, int_pressure);
935
    adjust_high_pressure_index(block, block->_fhrp_index, float_pressure);
936
    // set the final_pressure as the register pressure for the block
937
    block->_reg_pressure = int_pressure.final_pressure();
938
    block->_freg_pressure = float_pressure.final_pressure();
939

940
#ifndef PRODUCT
941
    // Gather Register Pressure Statistics
942
    if (PrintOptoStatistics) {
943
      if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) {
944
        _high_pressure++;
945
      } else {
946
        _low_pressure++;
947
      }
948
    }
949
#endif
950
  }
951

952
  return must_spill;
953
}
954

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.