llvm-project

Форк
0
/
BypassSlowDivision.cpp 
480 строк · 17.9 Кб
1
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains an optimization for div and rem on architectures that
10
// execute short instructions significantly faster than longer instructions.
11
// For example, on Intel Atom 32-bit divides are slow enough that during
12
// runtime it is profitable to check the value of the operands, and if they are
13
// positive and less than 256 use an unsigned 8-bit divide.
14
//
15
//===----------------------------------------------------------------------===//
16

17
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
18
#include "llvm/ADT/DenseMap.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SmallPtrSet.h"
21
#include "llvm/Transforms/Utils/Local.h"
22
#include "llvm/Analysis/ValueTracking.h"
23
#include "llvm/IR/BasicBlock.h"
24
#include "llvm/IR/Constants.h"
25
#include "llvm/IR/DerivedTypes.h"
26
#include "llvm/IR/Function.h"
27
#include "llvm/IR/IRBuilder.h"
28
#include "llvm/IR/Instruction.h"
29
#include "llvm/IR/Instructions.h"
30
#include "llvm/IR/Module.h"
31
#include "llvm/IR/Type.h"
32
#include "llvm/IR/Value.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/KnownBits.h"
35
#include <cassert>
36
#include <cstdint>
37

38
using namespace llvm;
39

40
#define DEBUG_TYPE "bypass-slow-division"
41

42
namespace {
43

44
  struct QuotRemPair {
45
    Value *Quotient;
46
    Value *Remainder;
47

48
    QuotRemPair(Value *InQuotient, Value *InRemainder)
49
        : Quotient(InQuotient), Remainder(InRemainder) {}
50
  };
51

52
  /// A quotient and remainder, plus a BB from which they logically "originate".
53
  /// If you use Quotient or Remainder in a Phi node, you should use BB as its
54
  /// corresponding predecessor.
55
  struct QuotRemWithBB {
56
    BasicBlock *BB = nullptr;
57
    Value *Quotient = nullptr;
58
    Value *Remainder = nullptr;
59
  };
60

61
using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
62
using BypassWidthsTy = DenseMap<unsigned, unsigned>;
63
using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
64

65
enum ValueRange {
66
  /// Operand definitely fits into BypassType. No runtime checks are needed.
67
  VALRNG_KNOWN_SHORT,
68
  /// A runtime check is required, as value range is unknown.
69
  VALRNG_UNKNOWN,
70
  /// Operand is unlikely to fit into BypassType. The bypassing should be
71
  /// disabled.
72
  VALRNG_LIKELY_LONG
73
};
74

75
class FastDivInsertionTask {
76
  bool IsValidTask = false;
77
  Instruction *SlowDivOrRem = nullptr;
78
  IntegerType *BypassType = nullptr;
79
  BasicBlock *MainBB = nullptr;
80

81
  bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
82
  ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
83
  QuotRemWithBB createSlowBB(BasicBlock *Successor);
84
  QuotRemWithBB createFastBB(BasicBlock *Successor);
85
  QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
86
                                   BasicBlock *PhiBB);
87
  Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
88
  std::optional<QuotRemPair> insertFastDivAndRem();
89

90
  bool isSignedOp() {
91
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
92
           SlowDivOrRem->getOpcode() == Instruction::SRem;
93
  }
94

95
  bool isDivisionOp() {
96
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
97
           SlowDivOrRem->getOpcode() == Instruction::UDiv;
98
  }
99

100
  Type *getSlowType() { return SlowDivOrRem->getType(); }
101

102
public:
103
  FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
104

105
  Value *getReplacement(DivCacheTy &Cache);
106
};
107

108
} // end anonymous namespace
109

110
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
111
                                           const BypassWidthsTy &BypassWidths) {
112
  switch (I->getOpcode()) {
113
  case Instruction::UDiv:
114
  case Instruction::SDiv:
115
  case Instruction::URem:
116
  case Instruction::SRem:
117
    SlowDivOrRem = I;
118
    break;
119
  default:
120
    // I is not a div/rem operation.
121
    return;
122
  }
123

124
  // Skip division on vector types. Only optimize integer instructions.
125
  IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
126
  if (!SlowType)
127
    return;
128

129
  // Skip if this bitwidth is not bypassed.
130
  auto BI = BypassWidths.find(SlowType->getBitWidth());
131
  if (BI == BypassWidths.end())
132
    return;
133

134
  // Get type for div/rem instruction with bypass bitwidth.
135
  IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
136
  BypassType = BT;
137

138
  // The original basic block.
139
  MainBB = I->getParent();
140

141
  // The instruction is indeed a slow div or rem operation.
142
  IsValidTask = true;
143
}
144

145
/// Reuses previously-computed dividend or remainder from the current BB if
146
/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
147
/// perform the optimization and caches the resulting dividend and remainder.
148
/// If no replacement can be generated, nullptr is returned.
149
Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
150
  // First, make sure that the task is valid.
151
  if (!IsValidTask)
152
    return nullptr;
153

154
  // Then, look for a value in Cache.
155
  Value *Dividend = SlowDivOrRem->getOperand(0);
156
  Value *Divisor = SlowDivOrRem->getOperand(1);
157
  DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
158
  auto CacheI = Cache.find(Key);
159

160
  if (CacheI == Cache.end()) {
161
    // If previous instance does not exist, try to insert fast div.
162
    std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
163
    // Bail out if insertFastDivAndRem has failed.
164
    if (!OptResult)
165
      return nullptr;
166
    CacheI = Cache.insert({Key, *OptResult}).first;
167
  }
168

169
  QuotRemPair &Value = CacheI->second;
170
  return isDivisionOp() ? Value.Quotient : Value.Remainder;
171
}
172

173
/// Check if a value looks like a hash.
174
///
175
/// The routine is expected to detect values computed using the most common hash
176
/// algorithms. Typically, hash computations end with one of the following
177
/// instructions:
178
///
179
/// 1) MUL with a constant wider than BypassType
180
/// 2) XOR instruction
181
///
182
/// And even if we are wrong and the value is not a hash, it is still quite
183
/// unlikely that such values will fit into BypassType.
184
///
185
/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
186
/// It is implemented as a depth-first search for values that look neither long
187
/// nor hash-like.
188
bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
189
  Instruction *I = dyn_cast<Instruction>(V);
190
  if (!I)
191
    return false;
192

193
  switch (I->getOpcode()) {
194
  case Instruction::Xor:
195
    return true;
196
  case Instruction::Mul: {
197
    // After Constant Hoisting pass, long constants may be represented as
198
    // bitcast instructions. As a result, some constants may look like an
199
    // instruction at first, and an additional check is necessary to find out if
200
    // an operand is actually a constant.
201
    Value *Op1 = I->getOperand(1);
202
    ConstantInt *C = dyn_cast<ConstantInt>(Op1);
203
    if (!C && isa<BitCastInst>(Op1))
204
      C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
205
    return C && C->getValue().getSignificantBits() > BypassType->getBitWidth();
206
  }
207
  case Instruction::PHI:
208
    // Stop IR traversal in case of a crazy input code. This limits recursion
209
    // depth.
210
    if (Visited.size() >= 16)
211
      return false;
212
    // Do not visit nodes that have been visited already. We return true because
213
    // it means that we couldn't find any value that doesn't look hash-like.
214
    if (!Visited.insert(I).second)
215
      return true;
216
    return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
217
      // Ignore undef values as they probably don't affect the division
218
      // operands.
219
      return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
220
             isa<UndefValue>(V);
221
    });
222
  default:
223
    return false;
224
  }
225
}
226

227
/// Check if an integer value fits into our bypass type.
228
ValueRange FastDivInsertionTask::getValueRange(Value *V,
229
                                               VisitedSetTy &Visited) {
230
  unsigned ShortLen = BypassType->getBitWidth();
231
  unsigned LongLen = V->getType()->getIntegerBitWidth();
232

233
  assert(LongLen > ShortLen && "Value type must be wider than BypassType");
234
  unsigned HiBits = LongLen - ShortLen;
235

236
  const DataLayout &DL = SlowDivOrRem->getDataLayout();
237
  KnownBits Known(LongLen);
238

239
  computeKnownBits(V, Known, DL);
240

241
  if (Known.countMinLeadingZeros() >= HiBits)
242
    return VALRNG_KNOWN_SHORT;
243

244
  if (Known.countMaxLeadingZeros() < HiBits)
245
    return VALRNG_LIKELY_LONG;
246

247
  // Long integer divisions are often used in hashtable implementations. It's
248
  // not worth bypassing such divisions because hash values are extremely
249
  // unlikely to have enough leading zeros. The call below tries to detect
250
  // values that are unlikely to fit BypassType (including hashes).
251
  if (isHashLikeValue(V, Visited))
252
    return VALRNG_LIKELY_LONG;
253

254
  return VALRNG_UNKNOWN;
255
}
256

257
/// Add new basic block for slow div and rem operations and put it before
258
/// SuccessorBB.
259
QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
260
  QuotRemWithBB DivRemPair;
261
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
262
                                     MainBB->getParent(), SuccessorBB);
263
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
264
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
265

266
  Value *Dividend = SlowDivOrRem->getOperand(0);
267
  Value *Divisor = SlowDivOrRem->getOperand(1);
268

269
  if (isSignedOp()) {
270
    DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
271
    DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
272
  } else {
273
    DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
274
    DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
275
  }
276

277
  Builder.CreateBr(SuccessorBB);
278
  return DivRemPair;
279
}
280

281
/// Add new basic block for fast div and rem operations and put it before
282
/// SuccessorBB.
283
QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
284
  QuotRemWithBB DivRemPair;
285
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
286
                                     MainBB->getParent(), SuccessorBB);
287
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
288
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
289

290
  Value *Dividend = SlowDivOrRem->getOperand(0);
291
  Value *Divisor = SlowDivOrRem->getOperand(1);
292
  Value *ShortDivisorV =
293
      Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
294
  Value *ShortDividendV =
295
      Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
296

297
  // udiv/urem because this optimization only handles positive numbers.
298
  Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
299
  Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
300
  DivRemPair.Quotient =
301
      Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
302
  DivRemPair.Remainder =
303
      Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
304
  Builder.CreateBr(SuccessorBB);
305

306
  return DivRemPair;
307
}
308

309
/// Creates Phi nodes for result of Div and Rem.
310
QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
311
                                                       QuotRemWithBB &RHS,
312
                                                       BasicBlock *PhiBB) {
313
  IRBuilder<> Builder(PhiBB, PhiBB->begin());
314
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
315
  PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
316
  QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
317
  QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
318
  PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
319
  RemPhi->addIncoming(LHS.Remainder, LHS.BB);
320
  RemPhi->addIncoming(RHS.Remainder, RHS.BB);
321
  return QuotRemPair(QuoPhi, RemPhi);
322
}
323

324
/// Creates a runtime check to test whether both the divisor and dividend fit
325
/// into BypassType. The check is inserted at the end of MainBB. True return
326
/// value means that the operands fit. Either of the operands may be NULL if it
327
/// doesn't need a runtime check.
328
Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
329
  assert((Op1 || Op2) && "Nothing to check");
330
  IRBuilder<> Builder(MainBB, MainBB->end());
331
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
332

333
  Value *OrV;
334
  if (Op1 && Op2)
335
    OrV = Builder.CreateOr(Op1, Op2);
336
  else
337
    OrV = Op1 ? Op1 : Op2;
338

339
  // BitMask is inverted to check if the operands are
340
  // larger than the bypass type
341
  uint64_t BitMask = ~BypassType->getBitMask();
342
  Value *AndV = Builder.CreateAnd(OrV, BitMask);
343

344
  // Compare operand values
345
  Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
346
  return Builder.CreateICmpEQ(AndV, ZeroV);
347
}
348

349
/// Substitutes the div/rem instruction with code that checks the value of the
350
/// operands and uses a shorter-faster div/rem instruction when possible.
351
std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
352
  Value *Dividend = SlowDivOrRem->getOperand(0);
353
  Value *Divisor = SlowDivOrRem->getOperand(1);
354

355
  VisitedSetTy SetL;
356
  ValueRange DividendRange = getValueRange(Dividend, SetL);
357
  if (DividendRange == VALRNG_LIKELY_LONG)
358
    return std::nullopt;
359

360
  VisitedSetTy SetR;
361
  ValueRange DivisorRange = getValueRange(Divisor, SetR);
362
  if (DivisorRange == VALRNG_LIKELY_LONG)
363
    return std::nullopt;
364

365
  bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
366
  bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
367

368
  if (DividendShort && DivisorShort) {
369
    // If both operands are known to be short then just replace the long
370
    // division with a short one in-place.  Since we're not introducing control
371
    // flow in this case, narrowing the division is always a win, even if the
372
    // divisor is a constant (and will later get replaced by a multiplication).
373

374
    IRBuilder<> Builder(SlowDivOrRem);
375
    Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
376
    Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
377
    Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
378
    Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
379
    Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
380
    Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
381
    return QuotRemPair(ExtDiv, ExtRem);
382
  }
383

384
  if (isa<ConstantInt>(Divisor)) {
385
    // If the divisor is not a constant, DAGCombiner will convert it to a
386
    // multiplication by a magic constant.  It isn't clear if it is worth
387
    // introducing control flow to get a narrower multiply.
388
    return std::nullopt;
389
  }
390

391
  // After Constant Hoisting pass, long constants may be represented as
392
  // bitcast instructions. As a result, some constants may look like an
393
  // instruction at first, and an additional check is necessary to find out if
394
  // an operand is actually a constant.
395
  if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
396
    if (BCI->getParent() == SlowDivOrRem->getParent() &&
397
        isa<ConstantInt>(BCI->getOperand(0)))
398
      return std::nullopt;
399

400
  IRBuilder<> Builder(MainBB, MainBB->end());
401
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
402

403
  if (DividendShort && !isSignedOp()) {
404
    // If the division is unsigned and Dividend is known to be short, then
405
    // either
406
    // 1) Divisor is less or equal to Dividend, and the result can be computed
407
    //    with a short division.
408
    // 2) Divisor is greater than Dividend. In this case, no division is needed
409
    //    at all: The quotient is 0 and the remainder is equal to Dividend.
410
    //
411
    // So instead of checking at runtime whether Divisor fits into BypassType,
412
    // we emit a runtime check to differentiate between these two cases. This
413
    // lets us entirely avoid a long div.
414

415
    // Split the basic block before the div/rem.
416
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
417
    // Remove the unconditional branch from MainBB to SuccessorBB.
418
    MainBB->back().eraseFromParent();
419
    QuotRemWithBB Long;
420
    Long.BB = MainBB;
421
    Long.Quotient = ConstantInt::get(getSlowType(), 0);
422
    Long.Remainder = Dividend;
423
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
424
    QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
425
    Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
426
    Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
427
    return Result;
428
  } else {
429
    // General case. Create both slow and fast div/rem pairs and choose one of
430
    // them at runtime.
431

432
    // Split the basic block before the div/rem.
433
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
434
    // Remove the unconditional branch from MainBB to SuccessorBB.
435
    MainBB->back().eraseFromParent();
436
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
437
    QuotRemWithBB Slow = createSlowBB(SuccessorBB);
438
    QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
439
    Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
440
                                            DivisorShort ? nullptr : Divisor);
441
    Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
442
    return Result;
443
  }
444
}
445

446
/// This optimization identifies DIV/REM instructions in a BB that can be
447
/// profitably bypassed and carried out with a shorter, faster divide.
448
bool llvm::bypassSlowDivision(BasicBlock *BB,
449
                              const BypassWidthsTy &BypassWidths) {
450
  DivCacheTy PerBBDivCache;
451

452
  bool MadeChange = false;
453
  Instruction *Next = &*BB->begin();
454
  while (Next != nullptr) {
455
    // We may add instructions immediately after I, but we want to skip over
456
    // them.
457
    Instruction *I = Next;
458
    Next = Next->getNextNode();
459

460
    // Ignore dead code to save time and avoid bugs.
461
    if (I->hasNUses(0))
462
      continue;
463

464
    FastDivInsertionTask Task(I, BypassWidths);
465
    if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
466
      I->replaceAllUsesWith(Replacement);
467
      I->eraseFromParent();
468
      MadeChange = true;
469
    }
470
  }
471

472
  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
473
  // create divrem machine instructions.  Now erase any unused divs / rems so we
474
  // don't leave extra instructions sitting around.
475
  for (auto &KV : PerBBDivCache)
476
    for (Value *V : {KV.second.Quotient, KV.second.Remainder})
477
      RecursivelyDeleteTriviallyDeadInstructions(V);
478

479
  return MadeChange;
480
}
481

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.