llvm-project
585 строк · 19.7 Кб
1//===- DemandedBits.cpp - Determine demanded bits -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements a demanded bits analysis. A demanded bit is one that
10// contributes to a result; bits that are not demanded can be either zero or
11// one without affecting control or data flow. For example in this sequence:
12//
13// %1 = add i32 %x, %y
14// %2 = trunc i32 %1 to i16
15//
16// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
17// trunc.
18//
19//===----------------------------------------------------------------------===//
20
21#include "llvm/Analysis/DemandedBits.h"
22#include "llvm/ADT/APInt.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/Analysis/AssumptionCache.h"
25#include "llvm/Analysis/ValueTracking.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/InstIterator.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/IntrinsicInst.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/Operator.h"
33#include "llvm/IR/PassManager.h"
34#include "llvm/IR/PatternMatch.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/Support/Casting.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Support/KnownBits.h"
40#include "llvm/Support/raw_ostream.h"
41#include <algorithm>
42#include <cstdint>
43
44using namespace llvm;
45using namespace llvm::PatternMatch;
46
47#define DEBUG_TYPE "demanded-bits"
48
49static bool isAlwaysLive(Instruction *I) {
50return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
51I->mayHaveSideEffects();
52}
53
54void DemandedBits::determineLiveOperandBits(
55const Instruction *UserI, const Value *Val, unsigned OperandNo,
56const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2,
57bool &KnownBitsComputed) {
58unsigned BitWidth = AB.getBitWidth();
59
60// We're called once per operand, but for some instructions, we need to
61// compute known bits of both operands in order to determine the live bits of
62// either (when both operands are instructions themselves). We don't,
63// however, want to do this twice, so we cache the result in APInts that live
64// in the caller. For the two-relevant-operands case, both operand values are
65// provided here.
66auto ComputeKnownBits =
67[&](unsigned BitWidth, const Value *V1, const Value *V2) {
68if (KnownBitsComputed)
69return;
70KnownBitsComputed = true;
71
72const DataLayout &DL = UserI->getDataLayout();
73Known = KnownBits(BitWidth);
74computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
75
76if (V2) {
77Known2 = KnownBits(BitWidth);
78computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT);
79}
80};
81
82switch (UserI->getOpcode()) {
83default: break;
84case Instruction::Call:
85case Instruction::Invoke:
86if (const auto *II = dyn_cast<IntrinsicInst>(UserI)) {
87switch (II->getIntrinsicID()) {
88default: break;
89case Intrinsic::bswap:
90// The alive bits of the input are the swapped alive bits of
91// the output.
92AB = AOut.byteSwap();
93break;
94case Intrinsic::bitreverse:
95// The alive bits of the input are the reversed alive bits of
96// the output.
97AB = AOut.reverseBits();
98break;
99case Intrinsic::ctlz:
100if (OperandNo == 0) {
101// We need some output bits, so we need all bits of the
102// input to the left of, and including, the leftmost bit
103// known to be one.
104ComputeKnownBits(BitWidth, Val, nullptr);
105AB = APInt::getHighBitsSet(BitWidth,
106std::min(BitWidth, Known.countMaxLeadingZeros()+1));
107}
108break;
109case Intrinsic::cttz:
110if (OperandNo == 0) {
111// We need some output bits, so we need all bits of the
112// input to the right of, and including, the rightmost bit
113// known to be one.
114ComputeKnownBits(BitWidth, Val, nullptr);
115AB = APInt::getLowBitsSet(BitWidth,
116std::min(BitWidth, Known.countMaxTrailingZeros()+1));
117}
118break;
119case Intrinsic::fshl:
120case Intrinsic::fshr: {
121const APInt *SA;
122if (OperandNo == 2) {
123// Shift amount is modulo the bitwidth. For powers of two we have
124// SA % BW == SA & (BW - 1).
125if (isPowerOf2_32(BitWidth))
126AB = BitWidth - 1;
127} else if (match(II->getOperand(2), m_APInt(SA))) {
128// Normalize to funnel shift left. APInt shifts of BitWidth are well-
129// defined, so no need to special-case zero shifts here.
130uint64_t ShiftAmt = SA->urem(BitWidth);
131if (II->getIntrinsicID() == Intrinsic::fshr)
132ShiftAmt = BitWidth - ShiftAmt;
133
134if (OperandNo == 0)
135AB = AOut.lshr(ShiftAmt);
136else if (OperandNo == 1)
137AB = AOut.shl(BitWidth - ShiftAmt);
138}
139break;
140}
141case Intrinsic::umax:
142case Intrinsic::umin:
143case Intrinsic::smax:
144case Intrinsic::smin:
145// If low bits of result are not demanded, they are also not demanded
146// for the min/max operands.
147AB = APInt::getBitsSetFrom(BitWidth, AOut.countr_zero());
148break;
149}
150}
151break;
152case Instruction::Add:
153if (AOut.isMask()) {
154AB = AOut;
155} else {
156ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
157AB = determineLiveOperandBitsAdd(OperandNo, AOut, Known, Known2);
158}
159break;
160case Instruction::Sub:
161if (AOut.isMask()) {
162AB = AOut;
163} else {
164ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
165AB = determineLiveOperandBitsSub(OperandNo, AOut, Known, Known2);
166}
167break;
168case Instruction::Mul:
169// Find the highest live output bit. We don't need any more input
170// bits than that (adds, and thus subtracts, ripple only to the
171// left).
172AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
173break;
174case Instruction::Shl:
175if (OperandNo == 0) {
176const APInt *ShiftAmtC;
177if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
178uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
179AB = AOut.lshr(ShiftAmt);
180
181// If the shift is nuw/nsw, then the high bits are not dead
182// (because we've promised that they *must* be zero).
183const auto *S = cast<ShlOperator>(UserI);
184if (S->hasNoSignedWrap())
185AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
186else if (S->hasNoUnsignedWrap())
187AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
188}
189}
190break;
191case Instruction::LShr:
192if (OperandNo == 0) {
193const APInt *ShiftAmtC;
194if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
195uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
196AB = AOut.shl(ShiftAmt);
197
198// If the shift is exact, then the low bits are not dead
199// (they must be zero).
200if (cast<LShrOperator>(UserI)->isExact())
201AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
202}
203}
204break;
205case Instruction::AShr:
206if (OperandNo == 0) {
207const APInt *ShiftAmtC;
208if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
209uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
210AB = AOut.shl(ShiftAmt);
211// Because the high input bit is replicated into the
212// high-order bits of the result, if we need any of those
213// bits, then we must keep the highest input bit.
214if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
215.getBoolValue())
216AB.setSignBit();
217
218// If the shift is exact, then the low bits are not dead
219// (they must be zero).
220if (cast<AShrOperator>(UserI)->isExact())
221AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
222}
223}
224break;
225case Instruction::And:
226AB = AOut;
227
228// For bits that are known zero, the corresponding bits in the
229// other operand are dead (unless they're both zero, in which
230// case they can't both be dead, so just mark the LHS bits as
231// dead).
232ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
233if (OperandNo == 0)
234AB &= ~Known2.Zero;
235else
236AB &= ~(Known.Zero & ~Known2.Zero);
237break;
238case Instruction::Or:
239AB = AOut;
240
241// For bits that are known one, the corresponding bits in the
242// other operand are dead (unless they're both one, in which
243// case they can't both be dead, so just mark the LHS bits as
244// dead).
245ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
246if (OperandNo == 0)
247AB &= ~Known2.One;
248else
249AB &= ~(Known.One & ~Known2.One);
250break;
251case Instruction::Xor:
252case Instruction::PHI:
253AB = AOut;
254break;
255case Instruction::Trunc:
256AB = AOut.zext(BitWidth);
257break;
258case Instruction::ZExt:
259AB = AOut.trunc(BitWidth);
260break;
261case Instruction::SExt:
262AB = AOut.trunc(BitWidth);
263// Because the high input bit is replicated into the
264// high-order bits of the result, if we need any of those
265// bits, then we must keep the highest input bit.
266if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
267AOut.getBitWidth() - BitWidth))
268.getBoolValue())
269AB.setSignBit();
270break;
271case Instruction::Select:
272if (OperandNo != 0)
273AB = AOut;
274break;
275case Instruction::ExtractElement:
276if (OperandNo == 0)
277AB = AOut;
278break;
279case Instruction::InsertElement:
280case Instruction::ShuffleVector:
281if (OperandNo == 0 || OperandNo == 1)
282AB = AOut;
283break;
284}
285}
286
287void DemandedBits::performAnalysis() {
288if (Analyzed)
289// Analysis already completed for this function.
290return;
291Analyzed = true;
292
293Visited.clear();
294AliveBits.clear();
295DeadUses.clear();
296
297SmallSetVector<Instruction*, 16> Worklist;
298
299// Collect the set of "root" instructions that are known live.
300for (Instruction &I : instructions(F)) {
301if (!isAlwaysLive(&I))
302continue;
303
304LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
305// For integer-valued instructions, set up an initial empty set of alive
306// bits and add the instruction to the work list. For other instructions
307// add their operands to the work list (for integer values operands, mark
308// all bits as live).
309Type *T = I.getType();
310if (T->isIntOrIntVectorTy()) {
311if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
312Worklist.insert(&I);
313
314continue;
315}
316
317// Non-integer-typed instructions...
318for (Use &OI : I.operands()) {
319if (auto *J = dyn_cast<Instruction>(OI)) {
320Type *T = J->getType();
321if (T->isIntOrIntVectorTy())
322AliveBits[J] = APInt::getAllOnes(T->getScalarSizeInBits());
323else
324Visited.insert(J);
325Worklist.insert(J);
326}
327}
328// To save memory, we don't add I to the Visited set here. Instead, we
329// check isAlwaysLive on every instruction when searching for dead
330// instructions later (we need to check isAlwaysLive for the
331// integer-typed instructions anyway).
332}
333
334// Propagate liveness backwards to operands.
335while (!Worklist.empty()) {
336Instruction *UserI = Worklist.pop_back_val();
337
338LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
339APInt AOut;
340bool InputIsKnownDead = false;
341if (UserI->getType()->isIntOrIntVectorTy()) {
342AOut = AliveBits[UserI];
343LLVM_DEBUG(dbgs() << " Alive Out: 0x"
344<< Twine::utohexstr(AOut.getLimitedValue()));
345
346// If all bits of the output are dead, then all bits of the input
347// are also dead.
348InputIsKnownDead = !AOut && !isAlwaysLive(UserI);
349}
350LLVM_DEBUG(dbgs() << "\n");
351
352KnownBits Known, Known2;
353bool KnownBitsComputed = false;
354// Compute the set of alive bits for each operand. These are anded into the
355// existing set, if any, and if that changes the set of alive bits, the
356// operand is added to the work-list.
357for (Use &OI : UserI->operands()) {
358// We also want to detect dead uses of arguments, but will only store
359// demanded bits for instructions.
360auto *I = dyn_cast<Instruction>(OI);
361if (!I && !isa<Argument>(OI))
362continue;
363
364Type *T = OI->getType();
365if (T->isIntOrIntVectorTy()) {
366unsigned BitWidth = T->getScalarSizeInBits();
367APInt AB = APInt::getAllOnes(BitWidth);
368if (InputIsKnownDead) {
369AB = APInt(BitWidth, 0);
370} else {
371// Bits of each operand that are used to compute alive bits of the
372// output are alive, all others are dead.
373determineLiveOperandBits(UserI, OI, OI.getOperandNo(), AOut, AB,
374Known, Known2, KnownBitsComputed);
375
376// Keep track of uses which have no demanded bits.
377if (AB.isZero())
378DeadUses.insert(&OI);
379else
380DeadUses.erase(&OI);
381}
382
383if (I) {
384// If we've added to the set of alive bits (or the operand has not
385// been previously visited), then re-queue the operand to be visited
386// again.
387auto Res = AliveBits.try_emplace(I);
388if (Res.second || (AB |= Res.first->second) != Res.first->second) {
389Res.first->second = std::move(AB);
390Worklist.insert(I);
391}
392}
393} else if (I && Visited.insert(I).second) {
394Worklist.insert(I);
395}
396}
397}
398}
399
400APInt DemandedBits::getDemandedBits(Instruction *I) {
401performAnalysis();
402
403auto Found = AliveBits.find(I);
404if (Found != AliveBits.end())
405return Found->second;
406
407const DataLayout &DL = I->getDataLayout();
408return APInt::getAllOnes(DL.getTypeSizeInBits(I->getType()->getScalarType()));
409}
410
411APInt DemandedBits::getDemandedBits(Use *U) {
412Type *T = (*U)->getType();
413auto *UserI = cast<Instruction>(U->getUser());
414const DataLayout &DL = UserI->getDataLayout();
415unsigned BitWidth = DL.getTypeSizeInBits(T->getScalarType());
416
417// We only track integer uses, everything else produces a mask with all bits
418// set
419if (!T->isIntOrIntVectorTy())
420return APInt::getAllOnes(BitWidth);
421
422if (isUseDead(U))
423return APInt(BitWidth, 0);
424
425performAnalysis();
426
427APInt AOut = getDemandedBits(UserI);
428APInt AB = APInt::getAllOnes(BitWidth);
429KnownBits Known, Known2;
430bool KnownBitsComputed = false;
431
432determineLiveOperandBits(UserI, *U, U->getOperandNo(), AOut, AB, Known,
433Known2, KnownBitsComputed);
434
435return AB;
436}
437
438bool DemandedBits::isInstructionDead(Instruction *I) {
439performAnalysis();
440
441return !Visited.count(I) && !AliveBits.contains(I) && !isAlwaysLive(I);
442}
443
444bool DemandedBits::isUseDead(Use *U) {
445// We only track integer uses, everything else is assumed live.
446if (!(*U)->getType()->isIntOrIntVectorTy())
447return false;
448
449// Uses by always-live instructions are never dead.
450auto *UserI = cast<Instruction>(U->getUser());
451if (isAlwaysLive(UserI))
452return false;
453
454performAnalysis();
455if (DeadUses.count(U))
456return true;
457
458// If no output bits are demanded, no input bits are demanded and the use
459// is dead. These uses might not be explicitly present in the DeadUses map.
460if (UserI->getType()->isIntOrIntVectorTy()) {
461auto Found = AliveBits.find(UserI);
462if (Found != AliveBits.end() && Found->second.isZero())
463return true;
464}
465
466return false;
467}
468
469void DemandedBits::print(raw_ostream &OS) {
470auto PrintDB = [&](const Instruction *I, const APInt &A, Value *V = nullptr) {
471OS << "DemandedBits: 0x" << Twine::utohexstr(A.getLimitedValue())
472<< " for ";
473if (V) {
474V->printAsOperand(OS, false);
475OS << " in ";
476}
477OS << *I << '\n';
478};
479
480OS << "Printing analysis 'Demanded Bits Analysis' for function '" << F.getName() << "':\n";
481performAnalysis();
482for (auto &KV : AliveBits) {
483Instruction *I = KV.first;
484PrintDB(I, KV.second);
485
486for (Use &OI : I->operands()) {
487PrintDB(I, getDemandedBits(&OI), OI);
488}
489}
490}
491
492static APInt determineLiveOperandBitsAddCarry(unsigned OperandNo,
493const APInt &AOut,
494const KnownBits &LHS,
495const KnownBits &RHS,
496bool CarryZero, bool CarryOne) {
497assert(!(CarryZero && CarryOne) &&
498"Carry can't be zero and one at the same time");
499
500// The following check should be done by the caller, as it also indicates
501// that LHS and RHS don't need to be computed.
502//
503// if (AOut.isMask())
504// return AOut;
505
506// Boundary bits' carry out is unaffected by their carry in.
507APInt Bound = (LHS.Zero & RHS.Zero) | (LHS.One & RHS.One);
508
509// First, the alive carry bits are determined from the alive output bits:
510// Let demand ripple to the right but only up to any set bit in Bound.
511// AOut = -1----
512// Bound = ----1-
513// ACarry&~AOut = --111-
514APInt RBound = Bound.reverseBits();
515APInt RAOut = AOut.reverseBits();
516APInt RProp = RAOut + (RAOut | ~RBound);
517APInt RACarry = RProp ^ ~RBound;
518APInt ACarry = RACarry.reverseBits();
519
520// Then, the alive input bits are determined from the alive carry bits:
521APInt NeededToMaintainCarryZero;
522APInt NeededToMaintainCarryOne;
523if (OperandNo == 0) {
524NeededToMaintainCarryZero = LHS.Zero | ~RHS.Zero;
525NeededToMaintainCarryOne = LHS.One | ~RHS.One;
526} else {
527NeededToMaintainCarryZero = RHS.Zero | ~LHS.Zero;
528NeededToMaintainCarryOne = RHS.One | ~LHS.One;
529}
530
531// As in computeForAddCarry
532APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero;
533APInt PossibleSumOne = LHS.One + RHS.One + CarryOne;
534
535// The below is simplified from
536//
537// APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
538// APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One;
539// APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne);
540//
541// APInt NeededToMaintainCarry =
542// (CarryKnownZero & NeededToMaintainCarryZero) |
543// (CarryKnownOne & NeededToMaintainCarryOne) |
544// CarryUnknown;
545
546APInt NeededToMaintainCarry = (~PossibleSumZero | NeededToMaintainCarryZero) &
547(PossibleSumOne | NeededToMaintainCarryOne);
548
549APInt AB = AOut | (ACarry & NeededToMaintainCarry);
550return AB;
551}
552
553APInt DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo,
554const APInt &AOut,
555const KnownBits &LHS,
556const KnownBits &RHS) {
557return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, RHS, true,
558false);
559}
560
561APInt DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo,
562const APInt &AOut,
563const KnownBits &LHS,
564const KnownBits &RHS) {
565KnownBits NRHS;
566NRHS.Zero = RHS.One;
567NRHS.One = RHS.Zero;
568return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, NRHS, false,
569true);
570}
571
572AnalysisKey DemandedBitsAnalysis::Key;
573
574DemandedBits DemandedBitsAnalysis::run(Function &F,
575FunctionAnalysisManager &AM) {
576auto &AC = AM.getResult<AssumptionAnalysis>(F);
577auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
578return DemandedBits(F, AC, DT);
579}
580
581PreservedAnalyses DemandedBitsPrinterPass::run(Function &F,
582FunctionAnalysisManager &AM) {
583AM.getResult<DemandedBitsAnalysis>(F).print(OS);
584return PreservedAnalyses::all();
585}
586