llvm-project
2116 строк · 73.0 Кб
1//===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the ShrinkWrapping class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ShrinkWrapping.h"
14#include "bolt/Passes/DataflowInfoManager.h"
15#include "bolt/Passes/MCF.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include <numeric>
18#include <optional>
19#include <stack>
20
21#define DEBUG_TYPE "shrinkwrapping"
22
23using namespace llvm;
24
25namespace opts {
26
27extern cl::opt<bool> TimeOpts;
28extern cl::OptionCategory BoltOptCategory;
29
30static cl::opt<unsigned> ShrinkWrappingThreshold(
31"shrink-wrapping-threshold",
32cl::desc("Percentage of prologue execution count to use as threshold when"
33" evaluating whether a block is cold enough to be profitable to"
34" move eligible spills there"),
35cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
36} // namespace opts
37
38namespace llvm {
39namespace bolt {
40
41void CalleeSavedAnalysis::analyzeSaves() {
42ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
43StackReachingUses &SRU = Info.getStackReachingUses();
44auto &InsnToBB = Info.getInsnToBBMap();
45BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
46
47LLVM_DEBUG(dbgs() << "Checking spill locations\n");
48for (BinaryBasicBlock &BB : BF) {
49LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
50const MCInst *Prev = nullptr;
51for (MCInst &Inst : BB) {
52if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
53// Blacklist weird stores we don't understand
54if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
55FIE->IsStoreFromReg) {
56BlacklistedRegs.set(FIE->RegOrImm);
57CalleeSaved.reset(FIE->RegOrImm);
58Prev = &Inst;
59continue;
60}
61
62if (!FIE->IsStore || !FIE->IsStoreFromReg ||
63BlacklistedRegs[FIE->RegOrImm]) {
64Prev = &Inst;
65continue;
66}
67
68// If this reg is defined locally, it is not a callee-saved reg
69if (RD.isReachedBy(FIE->RegOrImm,
70Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
71BlacklistedRegs.set(FIE->RegOrImm);
72CalleeSaved.reset(FIE->RegOrImm);
73Prev = &Inst;
74continue;
75}
76
77// If this stack position is accessed in another function, we are
78// probably dealing with a parameter passed in a stack -- do not mess
79// with it
80if (SRU.isStoreUsed(*FIE,
81Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
82/*IncludeLocalAccesses=*/false) {
83BlacklistedRegs.set(FIE->RegOrImm);
84CalleeSaved.reset(FIE->RegOrImm);
85Prev = &Inst;
86continue;
87}
88
89// If this stack position is loaded elsewhere in another reg, we can't
90// update it, so blacklist it.
91if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
92: SRU.expr_begin(BB))) {
93BlacklistedRegs.set(FIE->RegOrImm);
94CalleeSaved.reset(FIE->RegOrImm);
95Prev = &Inst;
96continue;
97}
98
99// Ignore regs with multiple saves
100if (CalleeSaved[FIE->RegOrImm]) {
101BlacklistedRegs.set(FIE->RegOrImm);
102CalleeSaved.reset(FIE->RegOrImm);
103Prev = &Inst;
104continue;
105}
106
107CalleeSaved.set(FIE->RegOrImm);
108SaveFIEByReg[FIE->RegOrImm] = &*FIE;
109SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
110BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
111OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
112LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
113<< FIE->RegOrImm << "\n");
114}
115Prev = &Inst;
116}
117}
118}
119
120void CalleeSavedAnalysis::analyzeRestores() {
121ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
122
123// Now compute all restores of these callee-saved regs
124for (BinaryBasicBlock &BB : BF) {
125const MCInst *Prev = nullptr;
126for (MCInst &Inst : llvm::reverse(BB)) {
127if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
128if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
129Prev = &Inst;
130continue;
131}
132
133// If this reg is used locally after a restore, then we are probably
134// not dealing with a callee-saved reg. Except if this use is by
135// another store, but we don't cover this case yet.
136// Also not callee-saved if this load accesses caller stack or isn't
137// simple.
138if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
139RU.isReachedBy(FIE->RegOrImm,
140Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
141CalleeSaved.reset(FIE->RegOrImm);
142Prev = &Inst;
143continue;
144}
145// If stack offsets between saves/store don't agree with each other,
146// we don't completely understand what's happening here
147if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
148CalleeSaved.reset(FIE->RegOrImm);
149LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
150"mismatching restore: "
151<< FIE->RegOrImm << "\n");
152Prev = &Inst;
153continue;
154}
155
156LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
157<< "\n");
158if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
159LoadFIEByReg[FIE->RegOrImm] = &*FIE;
160BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
161AllocatorId);
162HasRestores.set(FIE->RegOrImm);
163}
164Prev = &Inst;
165}
166}
167}
168
169std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
170std::vector<MCInst *> Results;
171for (BinaryBasicBlock &BB : BF)
172for (MCInst &Inst : BB)
173if (getSavedReg(Inst) == Reg)
174Results.push_back(&Inst);
175return Results;
176}
177
178std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
179std::vector<MCInst *> Results;
180for (BinaryBasicBlock &BB : BF)
181for (MCInst &Inst : BB)
182if (getRestoredReg(Inst) == Reg)
183Results.push_back(&Inst);
184return Results;
185}
186
187CalleeSavedAnalysis::~CalleeSavedAnalysis() {
188for (BinaryBasicBlock &BB : BF) {
189for (MCInst &Inst : BB) {
190BC.MIB->removeAnnotation(Inst, getSaveTag());
191BC.MIB->removeAnnotation(Inst, getRestoreTag());
192}
193}
194}
195
196void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
197if (BlacklistedRegions[Offset] < Size)
198BlacklistedRegions[Offset] = Size;
199}
200
201bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
202for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
203if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
204return true;
205return false;
206}
207
208bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
209int64_t Size) {
210bool HasConflict = false;
211for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
212std::pair<const int64_t, int64_t> &Elem = *Iter;
213if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
214(Offset != Elem.first || Size != Elem.second)) {
215Iter = AvailableRegions.erase(Iter);
216HasConflict = true;
217continue;
218}
219++Iter;
220}
221if (HasConflict) {
222blacklistRegion(Offset, Size);
223return true;
224}
225return false;
226}
227
228void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
229StackPointerTracking &SPT = Info.getStackPointerTracking();
230if (!BC.MII->get(Point.getOpcode())
231.hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
232return;
233
234int SPVal, FPVal;
235std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
236std::pair<MCPhysReg, int64_t> FP;
237
238if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
239FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
240else
241FP = std::make_pair(0, 0);
242std::pair<MCPhysReg, int64_t> SP;
243
244if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
245SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
246else
247SP = std::make_pair(0, 0);
248
249int64_t Output;
250if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
251return;
252
253// Not your regular frame pointer initialization... bail
254if (Output != SPVal)
255blacklistRegion(0, 0);
256}
257
258void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
259StackPointerTracking &SPT = Info.getStackPointerTracking();
260if (!BC.MII->get(Point.getOpcode())
261.hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
262return;
263// Check if the definition of SP comes from FP -- in this case, this
264// value may need to be updated depending on our stack layout changes
265bool UsesFP = llvm::any_of(BC.MIB->useOperands(Point), [&](MCOperand &Op) {
266return Op.isReg() && Op.getReg() == BC.MIB->getFramePointer();
267});
268if (!UsesFP)
269return;
270
271// Setting up evaluation
272int SPVal, FPVal;
273std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
274std::pair<MCPhysReg, int64_t> FP;
275
276if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
277FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
278else
279FP = std::make_pair(0, 0);
280std::pair<MCPhysReg, int64_t> SP;
281
282if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
283SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
284else
285SP = std::make_pair(0, 0);
286
287int64_t Output;
288if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
289return;
290
291// If the value is the same of FP, no need to adjust it
292if (Output == FPVal)
293return;
294
295// If an allocation happened through FP, bail
296if (Output <= SPVal) {
297blacklistRegion(0, 0);
298return;
299}
300
301// We are restoring SP to an old value based on FP. Mark it as a stack
302// access to be fixed later.
303BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
304}
305
306void StackLayoutModifier::classifyStackAccesses() {
307// Understand when stack slots are being used non-locally
308StackReachingUses &SRU = Info.getStackReachingUses();
309
310for (BinaryBasicBlock &BB : BF) {
311const MCInst *Prev = nullptr;
312for (MCInst &Inst : llvm::reverse(BB)) {
313checkFramePointerInitialization(Inst);
314checkStackPointerRestore(Inst);
315ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
316if (!FIEX) {
317Prev = &Inst;
318continue;
319}
320if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
321blacklistRegion(FIEX->StackOffset, FIEX->Size);
322Prev = &Inst;
323continue;
324}
325// If this stack position is accessed in another function, we are
326// probably dealing with a parameter passed in a stack -- do not mess
327// with it
328if (SRU.isStoreUsed(*FIEX,
329Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
330/*IncludeLocalAccesses=*/false)) {
331blacklistRegion(FIEX->StackOffset, FIEX->Size);
332Prev = &Inst;
333continue;
334}
335// Now we have a clear stack slot access. Check if its blacklisted or if
336// it conflicts with another chunk.
337if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
338blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
339Prev = &Inst;
340continue;
341}
342// We are free to go. Add it as available stack slot which we know how
343// to move it.
344AvailableRegions[FIEX->StackOffset] = FIEX->Size;
345BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
346RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
347RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
348LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
349<< (int)FIEX->Size << "\n");
350}
351}
352}
353
354void StackLayoutModifier::classifyCFIs() {
355std::stack<std::pair<int64_t, uint16_t>> CFIStack;
356int64_t CfaOffset = -8;
357uint16_t CfaReg = 7;
358
359auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
360const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
361if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
362BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
363LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
364} else {
365IsSimple = false;
366return;
367}
368};
369
370for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
371for (MCInst &Inst : *BB) {
372if (!BC.MIB->isCFI(Inst))
373continue;
374const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
375switch (CFI->getOperation()) {
376case MCCFIInstruction::OpDefCfa:
377CfaOffset = -CFI->getOffset();
378recordAccess(&Inst, CfaOffset);
379[[fallthrough]];
380case MCCFIInstruction::OpDefCfaRegister:
381CfaReg = CFI->getRegister();
382break;
383case MCCFIInstruction::OpDefCfaOffset:
384CfaOffset = -CFI->getOffset();
385recordAccess(&Inst, CfaOffset);
386break;
387case MCCFIInstruction::OpOffset:
388recordAccess(&Inst, CFI->getOffset());
389BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
390BC.MRI->getLLVMRegNum(CFI->getRegister(),
391/*isEH=*/false),
392AllocatorId);
393break;
394case MCCFIInstruction::OpSameValue:
395BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
396BC.MRI->getLLVMRegNum(CFI->getRegister(),
397/*isEH=*/false),
398AllocatorId);
399break;
400case MCCFIInstruction::OpRememberState:
401CFIStack.push(std::make_pair(CfaOffset, CfaReg));
402break;
403case MCCFIInstruction::OpRestoreState: {
404assert(!CFIStack.empty() && "Corrupt CFI stack");
405std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
406CFIStack.pop();
407CfaOffset = Elem.first;
408CfaReg = Elem.second;
409break;
410}
411case MCCFIInstruction::OpRelOffset:
412case MCCFIInstruction::OpAdjustCfaOffset:
413llvm_unreachable("Unhandled AdjustCfaOffset");
414break;
415default:
416break;
417}
418}
419}
420}
421
422void StackLayoutModifier::scheduleChange(
423MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
424auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
425Inst, getTodoTag(), AllocatorId);
426WList.push_back(Item);
427}
428
429bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
430if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
431return false;
432
433ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
434if (!FIE)
435return false;
436
437return canCollapseRegion(FIE->StackOffset);
438}
439
440bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
441if (!IsInitialized)
442initialize();
443if (!IsSimple)
444return false;
445
446if (CollapsedRegions.count(RegionAddr))
447return true;
448
449// Check if it is possible to readjust all accesses below RegionAddr
450if (!BlacklistedRegions.empty())
451return false;
452
453return true;
454}
455
456bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
457ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
458if (!FIE)
459return false;
460int64_t RegionAddr = FIE->StackOffset;
461int64_t RegionSz = FIE->Size;
462return collapseRegion(DeletedPush, RegionAddr, RegionSz);
463}
464
465bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
466int64_t RegionSz) {
467if (!canCollapseRegion(RegionAddr))
468return false;
469
470assert(IsInitialized);
471StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
472
473for (BinaryBasicBlock &BB : BF) {
474for (MCInst &Inst : BB) {
475if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
476continue;
477auto Slot =
478BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
479Inst, getSlotTag());
480if (!AvailableRegions.count(Slot))
481continue;
482// We need to ensure this access is affected by the deleted push
483if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
484continue;
485
486if (BC.MIB->isCFI(Inst)) {
487if (Slot > RegionAddr)
488continue;
489scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
490continue;
491}
492ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
493if (!FIE) {
494if (Slot > RegionAddr)
495continue;
496// SP update based on frame pointer
497scheduleChange(
498Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
499continue;
500}
501
502if (Slot == RegionAddr) {
503BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
504continue;
505}
506if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
507continue;
508
509if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
510continue;
511
512if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
513continue;
514
515scheduleChange(
516Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
517}
518}
519
520CollapsedRegions.insert(RegionAddr);
521return true;
522}
523
524void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
525for (BinaryBasicBlock &BB : BF) {
526for (MCInst &Inst : BB) {
527if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
528continue;
529BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
530scheduleChange(
531Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
532}
533}
534}
535
536bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
537if (!IsInitialized)
538initialize();
539if (!IsSimple)
540return false;
541
542StackPointerTracking &SPT = Info.getStackPointerTracking();
543int64_t RegionAddr = SPT.getStateBefore(P)->first;
544if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
545return false;
546
547if (InsertedRegions.count(RegionAddr))
548return true;
549
550// Check if we are going to screw up stack accesses at call sites that
551// pass parameters via stack
552if (!BlacklistedRegions.empty())
553return false;
554
555return true;
556}
557
558bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
559if (!canInsertRegion(P))
560return false;
561
562assert(IsInitialized);
563StackPointerTracking &SPT = Info.getStackPointerTracking();
564// This RegionAddr is slightly different from the one seen in collapseRegion
565// This is the value of SP before the allocation the user wants to make.
566int64_t RegionAddr = SPT.getStateBefore(P)->first;
567if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
568return false;
569
570DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
571
572for (BinaryBasicBlock &BB : BF) {
573for (MCInst &Inst : BB) {
574if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
575continue;
576auto Slot =
577BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
578Inst, getSlotTag());
579if (!AvailableRegions.count(Slot))
580continue;
581
582if (!(DA.doesADominateB(P, Inst)))
583continue;
584
585if (BC.MIB->isCFI(Inst)) {
586if (Slot >= RegionAddr)
587continue;
588scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
589continue;
590}
591ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
592if (!FIE) {
593if (Slot >= RegionAddr)
594continue;
595scheduleChange(
596Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
597continue;
598}
599
600if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
601continue;
602if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
603continue;
604if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
605continue;
606scheduleChange(
607Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
608}
609}
610
611InsertedRegions.insert(RegionAddr);
612return true;
613}
614
615void StackLayoutModifier::performChanges() {
616std::set<uint32_t> ModifiedCFIIndices;
617for (BinaryBasicBlock &BB : BF) {
618for (MCInst &Inst : llvm::reverse(BB)) {
619if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
620assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
621BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
622}
623if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
624continue;
625auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
626Inst, getTodoTag());
627int64_t Adjustment = 0;
628WorklistItem::ActionType AdjustmentType = WorklistItem::None;
629for (WorklistItem &WI : WList) {
630if (WI.Action == WorklistItem::None)
631continue;
632assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
633WI.Action == WorklistItem::AdjustCFI);
634assert((AdjustmentType == WorklistItem::None ||
635AdjustmentType == WI.Action) &&
636"Conflicting actions requested at the same program point");
637AdjustmentType = WI.Action;
638Adjustment += WI.OffsetUpdate;
639}
640if (!Adjustment)
641continue;
642if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
643assert(BC.MIB->isCFI(Inst));
644uint32_t CFINum = Inst.getOperand(0).getImm();
645if (ModifiedCFIIndices.count(CFINum))
646continue;
647ModifiedCFIIndices.insert(CFINum);
648const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
649const MCCFIInstruction::OpType Operation = CFI->getOperation();
650if (Operation == MCCFIInstruction::OpDefCfa ||
651Operation == MCCFIInstruction::OpDefCfaOffset)
652Adjustment = 0 - Adjustment;
653LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
654<< " to " << (CFI->getOffset() + Adjustment) << "\n");
655BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
656continue;
657}
658int32_t SrcImm = 0;
659MCPhysReg Reg = 0;
660MCPhysReg StackPtrReg = 0;
661int64_t StackOffset = 0;
662bool IsIndexed = false;
663bool IsLoad = false;
664bool IsStore = false;
665bool IsSimple = false;
666bool IsStoreFromReg = false;
667uint8_t Size = 0;
668bool Success = false;
669Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
670Reg, SrcImm, StackPtrReg, StackOffset,
671Size, IsSimple, IsIndexed);
672if (!Success) {
673// SP update based on FP value
674Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
675assert(Success);
676continue;
677}
678assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
679if (StackPtrReg != BC.MIB->getFramePointer())
680Adjustment = -Adjustment;
681if (IsLoad)
682BC.MIB->createRestoreFromStack(Inst, StackPtrReg,
683StackOffset + Adjustment, Reg, Size);
684else if (IsStore)
685BC.MIB->createSaveToStack(Inst, StackPtrReg, StackOffset + Adjustment,
686Reg, Size);
687LLVM_DEBUG({
688dbgs() << "Adjusted instruction: ";
689Inst.dump();
690});
691}
692}
693}
694
695void StackLayoutModifier::initialize() {
696classifyStackAccesses();
697classifyCFIs();
698IsInitialized = true;
699}
700
701std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
702std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
703std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
704std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
705std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
706std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
707
708using BBIterTy = BinaryBasicBlock::iterator;
709
710void ShrinkWrapping::classifyCSRUses() {
711DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
712StackPointerTracking &SPT = Info.getStackPointerTracking();
713UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
714BitVector(DA.NumInstrs, false));
715
716const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
717for (BinaryBasicBlock &BB : BF) {
718for (MCInst &Inst : BB) {
719if (BC.MIB->isCFI(Inst))
720continue;
721BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
722BC.MIB->getTouchedRegs(Inst, BV);
723BV &= CSA.CalleeSaved;
724for (int I : BV.set_bits()) {
725if (I == 0)
726continue;
727if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
728UsesByReg[I].set(DA.ExprToIdx[&Inst]);
729}
730if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
731continue;
732BV = CSA.CalleeSaved;
733BV &= FPAliases;
734for (int I : BV.set_bits())
735UsesByReg[I].set(DA.ExprToIdx[&Inst]);
736}
737}
738}
739
740void ShrinkWrapping::pruneUnwantedCSRs() {
741BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
742for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
743if (!CSA.CalleeSaved[I])
744continue;
745if (ParamRegs[I]) {
746CSA.CalleeSaved.reset(I);
747continue;
748}
749if (UsesByReg[I].empty()) {
750LLVM_DEBUG(
751dbgs()
752<< "Dismissing Callee-Saved Reg because we found no uses of it:" << I
753<< "\n");
754CSA.CalleeSaved.reset(I);
755continue;
756}
757if (!CSA.HasRestores[I]) {
758LLVM_DEBUG(
759dbgs() << "Dismissing Callee-Saved Reg because it does not have "
760"restores:"
761<< I << "\n");
762CSA.CalleeSaved.reset(I);
763}
764}
765}
766
767void ShrinkWrapping::computeSaveLocations() {
768BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
769ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
770DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
771StackPointerTracking &SPT = Info.getStackPointerTracking();
772
773LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
774for (BinaryBasicBlock &BB : BF) {
775LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
776
777MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
778if (!First)
779continue;
780
781// Use reaching instructions to detect if we are inside a loop - if we
782// are, do not consider this BB as valid placement for saves.
783if (RI.isInLoop(BB))
784continue;
785
786const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
787// If we don't know stack state at this point, bail
788if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
789(SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
790continue;
791
792for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
793if (!CSA.CalleeSaved[I])
794continue;
795
796BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
797for (int J : UsesByReg[I].set_bits())
798if (DA.doesADominateB(*First, J))
799BBDominatedUses.set(J);
800LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
801<< BBDominatedUses.count() << " uses for reg " << I
802<< ". Total uses for reg is " << UsesByReg[I].count()
803<< "\n");
804BBDominatedUses &= UsesByReg[I];
805if (BBDominatedUses == UsesByReg[I]) {
806LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
807<< " as a save pos for " << I << "\n");
808BestSavePos[I].push_back(First);
809LLVM_DEBUG({
810dbgs() << "Dominated uses are:\n";
811for (int J : UsesByReg[I].set_bits()) {
812dbgs() << "Idx " << J << ": ";
813BC.printInstruction(dbgs(), *DA.Expressions[J]);
814DA.Expressions[J]->dump();
815}
816});
817}
818}
819}
820
821BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
822
823auto &InsnToBB = Info.getInsnToBBMap();
824for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
825if (!CSA.CalleeSaved[I])
826continue;
827
828std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(),
829[&](const MCInst *A, const MCInst *B) {
830const BinaryBasicBlock *BBA = InsnToBB[A];
831const BinaryBasicBlock *BBB = InsnToBB[B];
832const uint64_t CountA = BBA->getKnownExecutionCount();
833const uint64_t CountB = BBB->getKnownExecutionCount();
834return CountB < CountA;
835});
836
837for (MCInst *Pos : BestSavePos[I]) {
838const BinaryBasicBlock *BB = InsnToBB[Pos];
839const uint64_t Count = BB->getKnownExecutionCount();
840BestSaveCount[I].push_back(Count);
841}
842}
843}
844
845void ShrinkWrapping::computeDomOrder() {
846DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
847std::vector<MCPhysReg> Order;
848for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
849Order.push_back(I);
850}
851
852DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
853auto &InsnToBB = Info.getInsnToBBMap();
854llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) {
855BinaryBasicBlock *BBA =
856BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
857BinaryBasicBlock *BBB =
858BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
859if (BBA == BBB)
860return A < B;
861if (!BBA && BBB)
862return false;
863if (BBA && !BBB)
864return true;
865if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back()))
866return true;
867if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back()))
868return false;
869return A < B;
870});
871
872for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
873DomOrder[Order[I]] = I;
874}
875
876bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
877uint64_t &TotalEstimatedWin) {
878const uint64_t CurSavingCost = CSA.SavingCost[CSR];
879if (!CSA.CalleeSaved[CSR])
880return false;
881
882assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
883"save position vectors out of sync");
884if (BestSaveCount[CSR].empty())
885return false;
886
887const uint64_t BestCount = BestSaveCount[CSR].back();
888BestPosSave = BestSavePos[CSR].back();
889if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
890return false;
891
892LLVM_DEBUG({
893auto &InsnToBB = Info.getInsnToBBMap();
894dbgs() << "Better position for saves found in func " << BF.getPrintName()
895<< " count << " << BF.getKnownExecutionCount() << "\n";
896dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
897<< " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
898});
899
900TotalEstimatedWin = CurSavingCost - BestCount;
901return true;
902}
903
904/// Auxiliary function used to create basic blocks for critical edges and update
905/// the dominance frontier with these new locations
906void ShrinkWrapping::splitFrontierCritEdges(
907BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
908const SmallVector<bool, 4> &IsCritEdge,
909const SmallVector<BinaryBasicBlock *, 4> &From,
910const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
911LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
912<< BF.getPrintName() << "\n");
913// For every FromBB, there might be one or more critical edges, with
914// To[I] containing destination BBs. It's important to memorize
915// the original size of the Frontier as we may append to it while splitting
916// critical edges originating with blocks with multiple destinations.
917for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
918if (!IsCritEdge[I])
919continue;
920if (To[I].empty())
921continue;
922BinaryBasicBlock *FromBB = From[I];
923LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
924<< "\n");
925// Split edge for every DestinationBBs
926for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
927BinaryBasicBlock *DestinationBB = To[I][DI];
928LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n");
929BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
930// Insert dummy instruction so this BB is never empty (we need this for
931// PredictiveStackPointerTracking to work, since it annotates instructions
932// and not BBs).
933if (NewBB->empty()) {
934MCInst NewInst;
935BC.MIB->createNoop(NewInst);
936NewBB->addInstruction(std::move(NewInst));
937scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
938}
939
940// Update frontier
941ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
942if (DI == 0) {
943// Update frontier inplace
944Frontier[I] = NewFrontierPP;
945LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName()
946<< '\n');
947} else {
948// Append new frontier to the end of the list
949Frontier.push_back(NewFrontierPP);
950LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName()
951<< '\n');
952}
953}
954}
955}
956
957SmallVector<ProgramPoint, 4>
958ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
959uint64_t TotalEstimatedWin) {
960SmallVector<ProgramPoint, 4> Frontier;
961SmallVector<bool, 4> IsCritEdge;
962DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
963
964SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
965SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
966// In case of a critical edge, we need to create extra BBs to host restores
967// into edges transitioning to the dominance frontier, otherwise we pull these
968// restores to inside the dominated area.
969Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
970LLVM_DEBUG({
971dbgs() << "Dumping dominance frontier for ";
972BC.printInstruction(dbgs(), *BestPosSave);
973for (ProgramPoint &PP : Frontier)
974if (PP.isInst())
975BC.printInstruction(dbgs(), *PP.getInst());
976else
977dbgs() << PP.getBB()->getName() << "\n";
978});
979for (ProgramPoint &PP : Frontier) {
980bool HasCritEdges = false;
981if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
982doesInstUsesCSR(*PP.getInst(), CSR)) {
983Frontier.clear();
984return Frontier;
985}
986BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
987CritEdgesFrom.emplace_back(FrontierBB);
988CritEdgesTo.emplace_back(0);
989SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
990// Check for invoke instructions at the dominance frontier, which indicates
991// the landing pad is not dominated.
992if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
993Frontier.clear();
994return Frontier;
995}
996doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
997if (!DA.doesADominateB(*BestPosSave, P)) {
998Dests.emplace_back(Info.getParentBB(P));
999return;
1000}
1001HasCritEdges = true;
1002});
1003IsCritEdge.push_back(HasCritEdges);
1004}
1005// Restores cannot be placed in empty BBs because we have a dataflow
1006// analysis that depends on insertions happening before real instructions
1007// (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1008// dummy nop that is scheduled to be removed later.
1009bool InvalidateRequired = false;
1010for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1011if (BB->size() != 0)
1012continue;
1013MCInst NewInst;
1014BC.MIB->createNoop(NewInst);
1015auto II = BB->addInstruction(std::move(NewInst));
1016scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1017InvalidateRequired = true;
1018}
1019if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1020LLVM_DEBUG({
1021dbgs() << "Now detected critical edges in the following frontier:\n";
1022for (ProgramPoint &PP : Frontier) {
1023if (PP.isBB()) {
1024dbgs() << " BB: " << PP.getBB()->getName() << "\n";
1025} else {
1026dbgs() << " Inst: ";
1027PP.getInst()->dump();
1028}
1029}
1030});
1031splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1032CritEdgesTo);
1033InvalidateRequired = true;
1034}
1035if (InvalidateRequired) {
1036// BitVectors that represent all insns of the function are invalid now
1037// since we changed BBs/Insts. Re-run steps that depend on pointers being
1038// valid
1039Info.invalidateAll();
1040classifyCSRUses();
1041}
1042return Frontier;
1043}
1044
1045bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1046int64_t SaveOffset) {
1047if (FA.requiresAlignment(BF)) {
1048LLVM_DEBUG({
1049dbgs() << "Reg " << CSR
1050<< " is not using push/pops due to function "
1051"alignment requirements.\n";
1052});
1053return false;
1054}
1055if (FA.hasStackArithmetic(BF)) {
1056LLVM_DEBUG({
1057dbgs() << "Reg " << CSR
1058<< " is not using push/pops due to function "
1059"taking the address of a stack position.\n";
1060});
1061return false;
1062}
1063for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1064if (!SLM.canCollapseRegion(Save)) {
1065LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1066return false;
1067}
1068}
1069// Abort if one of the restores for this CSR is not a POP.
1070for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1071if (!BC.MIB->isPop(*Load)) {
1072LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1073return false;
1074}
1075}
1076
1077StackPointerTracking &SPT = Info.getStackPointerTracking();
1078// Abort if we are inserting a push into an entry BB (offset -8) and this
1079// func sets up a frame pointer.
1080if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1081SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1082LLVM_DEBUG({
1083dbgs() << "Reg " << CSR
1084<< " cannot insert region or we are "
1085"trying to insert a push into entry bb.\n";
1086});
1087return false;
1088}
1089return true;
1090}
1091
1092SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1093const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1094unsigned CSR) {
1095SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1096// Moving pop locations to the correct sp offset
1097ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1098StackPointerTracking &SPT = Info.getStackPointerTracking();
1099for (ProgramPoint &PP : FixedRestorePoints) {
1100BinaryBasicBlock *BB = Info.getParentBB(PP);
1101bool Found = false;
1102if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1103SaveOffset) {
1104BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1105BV &= UsesByReg[CSR];
1106if (!BV.any()) {
1107Found = true;
1108PP = BB;
1109continue;
1110}
1111}
1112for (MCInst &Inst : llvm::reverse(*BB)) {
1113if (SPT.getStateBefore(Inst)->first == SaveOffset) {
1114BitVector BV = *RI.getStateAt(Inst);
1115BV &= UsesByReg[CSR];
1116if (!BV.any()) {
1117Found = true;
1118PP = &Inst;
1119break;
1120}
1121}
1122}
1123if (!Found) {
1124LLVM_DEBUG({
1125dbgs() << "Could not find restore insertion point for " << CSR
1126<< ", falling back to load/store mode\n";
1127});
1128FixedRestorePoints.clear();
1129return FixedRestorePoints;
1130}
1131}
1132return FixedRestorePoints;
1133}
1134
1135void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1136bool UsePushPops) {
1137
1138for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1139std::vector<MCInst *> CFIs;
1140for (MCInst &Inst : llvm::reverse(*BB)) {
1141if (BC.MIB->isCFI(Inst)) {
1142// Delete all offset CFIs related to this CSR
1143if (SLM.getOffsetCFIReg(Inst) == CSR) {
1144HasDeletedOffsetCFIs[CSR] = true;
1145scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1146continue;
1147}
1148CFIs.push_back(&Inst);
1149continue;
1150}
1151
1152uint16_t SavedReg = CSA.getSavedReg(Inst);
1153uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1154if (SavedReg != CSR && RestoredReg != CSR) {
1155CFIs.clear();
1156continue;
1157}
1158
1159scheduleChange(&Inst, WorklistItem(UsePushPops
1160? WorklistItem::Erase
1161: WorklistItem::ChangeToAdjustment,
1162CSR));
1163
1164// Delete associated CFIs
1165const bool RecordDeletedPushCFIs =
1166SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1167const bool RecordDeletedPopCFIs =
1168RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1169for (MCInst *CFI : CFIs) {
1170const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1171// Do not touch these...
1172if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1173MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1174continue;
1175scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1176if (RecordDeletedPushCFIs) {
1177// Do not record this to be replayed later because we are going to
1178// rebuild it.
1179if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1180continue;
1181DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1182}
1183if (RecordDeletedPopCFIs) {
1184if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1185continue;
1186DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1187}
1188}
1189CFIs.clear();
1190}
1191}
1192}
1193
1194bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1195if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1196CSA.getRestoredReg(Inst) == CSR)
1197return false;
1198BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1199BC.MIB->getTouchedRegs(Inst, BV);
1200return BV[CSR];
1201}
1202
1203void ShrinkWrapping::scheduleSaveRestoreInsertions(
1204unsigned CSR, MCInst *BestPosSave,
1205SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1206auto &InsnToBB = Info.getInsnToBBMap();
1207const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1208const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1209assert(FIESave && FIELoad && "Invalid CSR");
1210
1211LLVM_DEBUG({
1212dbgs() << "Scheduling save insertion at: ";
1213BestPosSave->dump();
1214});
1215
1216scheduleChange(BestPosSave,
1217UsePushPops ? WorklistItem::InsertPushOrPop
1218: WorklistItem::InsertLoadOrStore,
1219*FIESave, CSR);
1220
1221for (ProgramPoint &PP : RestorePoints) {
1222BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1223LLVM_DEBUG({
1224dbgs() << "Scheduling restore insertion at: ";
1225if (PP.isInst())
1226PP.getInst()->dump();
1227else
1228dbgs() << PP.getBB()->getName() << "\n";
1229});
1230MCInst *Term =
1231FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1232if (Term)
1233PP = Term;
1234bool PrecededByPrefix = false;
1235if (PP.isInst()) {
1236auto Iter = FrontierBB->findInstruction(PP.getInst());
1237if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1238--Iter;
1239PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1240}
1241}
1242if (PP.isInst() &&
1243(doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1244assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1245"cannot move to end of bb");
1246scheduleChange(InsnToBB[PP.getInst()],
1247UsePushPops ? WorklistItem::InsertPushOrPop
1248: WorklistItem::InsertLoadOrStore,
1249*FIELoad, CSR);
1250continue;
1251}
1252scheduleChange(PP,
1253UsePushPops ? WorklistItem::InsertPushOrPop
1254: WorklistItem::InsertLoadOrStore,
1255*FIELoad, CSR);
1256}
1257}
1258
1259void ShrinkWrapping::moveSaveRestores() {
1260bool DisablePushPopMode = false;
1261bool UsedPushPopMode = false;
1262// Keeps info about successfully moved regs: reg index, save position and
1263// save size
1264std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1265uint64_t TotalEstimatedWin = 0;
1266
1267computeDomOrder();
1268for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1269MCInst *BestPosSave = nullptr;
1270uint64_t EstimatedWin = 0;
1271SmallVector<ProgramPoint, 4> RestorePoints;
1272while (RestorePoints.empty() &&
1273isBestSavePosCold(I, BestPosSave, EstimatedWin)) {
1274RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin);
1275if (RestorePoints.empty()) {
1276LLVM_DEBUG({
1277dbgs() << "Dropping opportunity because restore placement failed"
1278" -- total est. freq reduc: "
1279<< EstimatedWin << ". Will try "
1280<< (BestSaveCount[I].size() - 1) << " more times.\n";
1281});
1282BestSaveCount[I].pop_back();
1283BestSavePos[I].pop_back();
1284computeDomOrder();
1285}
1286}
1287if (RestorePoints.empty()) {
1288SpillsFailedDynamicCount += EstimatedWin;
1289continue;
1290}
1291
1292const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1293const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1294(void)FIELoad;
1295assert(FIESave && FIELoad);
1296StackPointerTracking &SPT = Info.getStackPointerTracking();
1297const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1298int SaveOffset = SPFP.first;
1299uint8_t SaveSize = FIESave->Size;
1300
1301// If we don't know stack state at this point, bail
1302if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1303(SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
1304SpillsFailedDynamicCount += EstimatedWin;
1305continue;
1306}
1307
1308// Operation mode: if true, will insert push/pops instead of loads/restores
1309bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1310
1311if (UsePushPops) {
1312SmallVector<ProgramPoint, 4> FixedRestorePoints =
1313fixPopsPlacements(RestorePoints, SaveOffset, I);
1314if (FixedRestorePoints.empty())
1315UsePushPops = false;
1316else
1317RestorePoints = FixedRestorePoints;
1318}
1319
1320// Disable push-pop mode for all CSRs in this function
1321if (!UsePushPops)
1322DisablePushPopMode = true;
1323else
1324UsedPushPopMode = true;
1325
1326scheduleOldSaveRestoresRemoval(I, UsePushPops);
1327scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1328MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1329TotalEstimatedWin += EstimatedWin;
1330}
1331
1332// Revert push-pop mode if it failed for a single CSR
1333if (DisablePushPopMode && UsedPushPopMode) {
1334UsedPushPopMode = false;
1335for (BinaryBasicBlock &BB : BF) {
1336auto WRI = Todo.find(&BB);
1337if (WRI != Todo.end()) {
1338std::vector<WorklistItem> &TodoList = WRI->second;
1339for (WorklistItem &Item : TodoList)
1340if (Item.Action == WorklistItem::InsertPushOrPop)
1341Item.Action = WorklistItem::InsertLoadOrStore;
1342}
1343for (MCInst &Inst : llvm::reverse(BB)) {
1344auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1345Inst, getAnnotationIndex());
1346if (!TodoList)
1347continue;
1348bool isCFI = BC.MIB->isCFI(Inst);
1349for (WorklistItem &Item : *TodoList) {
1350if (Item.Action == WorklistItem::InsertPushOrPop)
1351Item.Action = WorklistItem::InsertLoadOrStore;
1352if (!isCFI && Item.Action == WorklistItem::Erase)
1353Item.Action = WorklistItem::ChangeToAdjustment;
1354}
1355}
1356}
1357}
1358SpillsMovedDynamicCount += TotalEstimatedWin;
1359
1360// Update statistics
1361if (!UsedPushPopMode) {
1362SpillsMovedRegularMode += MovedRegs.size();
1363return;
1364}
1365
1366// Schedule modifications to stack-accessing instructions via
1367// StackLayoutModifier.
1368SpillsMovedPushPopMode += MovedRegs.size();
1369for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1370unsigned RegNdx;
1371MCInst *SavePos;
1372size_t SaveSize;
1373std::tie(RegNdx, SavePos, SaveSize) = I;
1374for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1375SLM.collapseRegion(Save);
1376SLM.insertRegion(SavePos, SaveSize);
1377}
1378}
1379
1380namespace {
1381/// Helper function to identify whether two basic blocks created by splitting
1382/// a critical edge have the same contents.
1383bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1384const BinaryBasicBlock &B) {
1385if (A.succ_size() != B.succ_size())
1386return false;
1387if (A.succ_size() != 1)
1388return false;
1389
1390if (*A.succ_begin() != *B.succ_begin())
1391return false;
1392
1393if (A.size() != B.size())
1394return false;
1395
1396// Compare instructions
1397auto I = A.begin(), E = A.end();
1398auto OtherI = B.begin(), OtherE = B.end();
1399while (I != E && OtherI != OtherE) {
1400if (I->getOpcode() != OtherI->getOpcode())
1401return false;
1402if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1403return true;
1404}))
1405return false;
1406++I;
1407++OtherI;
1408}
1409return true;
1410}
1411} // namespace
1412
1413bool ShrinkWrapping::foldIdenticalSplitEdges() {
1414bool Changed = false;
1415for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1416BinaryBasicBlock &BB = *Iter;
1417if (!BB.getName().starts_with(".LSplitEdge"))
1418continue;
1419for (BinaryBasicBlock &RBB : llvm::reverse(BF)) {
1420if (&RBB == &BB)
1421break;
1422if (!RBB.getName().starts_with(".LSplitEdge") || !RBB.isValid() ||
1423!isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1424continue;
1425assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1426BinaryBasicBlock *Pred = *RBB.pred_begin();
1427uint64_t OrigCount = Pred->branch_info_begin()->Count;
1428uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1429BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1430Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1431Changed = true;
1432// Remove the block from CFG
1433RBB.markValid(false);
1434}
1435}
1436
1437return Changed;
1438}
1439
1440namespace {
1441
1442// A special StackPointerTracking that compensates for our future plans
1443// in removing/adding insn.
1444class PredictiveStackPointerTracking
1445: public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1446friend class DataflowAnalysis<PredictiveStackPointerTracking,
1447std::pair<int, int>>;
1448decltype(ShrinkWrapping::Todo) &TodoMap;
1449DataflowInfoManager &Info;
1450
1451std::optional<unsigned> AnnotationIndex;
1452
1453protected:
1454void compNextAux(const MCInst &Point,
1455const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1456std::pair<int, int> &Res) {
1457for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1458if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1459BC.MIB->isPush(Point)) {
1460Res.first += BC.MIB->getPushSize(Point);
1461continue;
1462}
1463if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1464BC.MIB->isPop(Point)) {
1465Res.first -= BC.MIB->getPopSize(Point);
1466continue;
1467}
1468if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1469Item.FIEToInsert.IsStore) {
1470Res.first -= Item.FIEToInsert.Size;
1471continue;
1472}
1473if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1474Item.FIEToInsert.IsLoad) {
1475Res.first += Item.FIEToInsert.Size;
1476continue;
1477}
1478}
1479}
1480
1481std::pair<int, int> computeNext(const MCInst &Point,
1482const std::pair<int, int> &Cur) {
1483std::pair<int, int> Res =
1484StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1485Point, Cur);
1486if (Res.first == StackPointerTracking::SUPERPOSITION ||
1487Res.first == StackPointerTracking::EMPTY)
1488return Res;
1489auto TodoItems =
1490BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1491Point, ShrinkWrapping::getAnnotationName());
1492if (TodoItems)
1493compNextAux(Point, *TodoItems, Res);
1494auto &InsnToBBMap = Info.getInsnToBBMap();
1495if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1496return Res;
1497auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1498if (WRI == TodoMap.end())
1499return Res;
1500compNextAux(Point, WRI->second, Res);
1501return Res;
1502}
1503
1504StringRef getAnnotationName() const {
1505return StringRef("PredictiveStackPointerTracking");
1506}
1507
1508public:
1509PredictiveStackPointerTracking(BinaryFunction &BF,
1510decltype(ShrinkWrapping::Todo) &TodoMap,
1511DataflowInfoManager &Info,
1512MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1513: StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1514AllocatorId),
1515TodoMap(TodoMap), Info(Info) {}
1516
1517void run() {
1518StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1519}
1520};
1521
1522} // end anonymous namespace
1523
1524void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1525int SPValPop) {
1526MCInst *SavePoint = nullptr;
1527for (BinaryBasicBlock &BB : BF) {
1528for (MCInst &Inst : llvm::reverse(BB)) {
1529int32_t SrcImm = 0;
1530MCPhysReg Reg = 0;
1531MCPhysReg StackPtrReg = 0;
1532int64_t StackOffset = 0;
1533bool IsIndexed = false;
1534bool IsLoad = false;
1535bool IsStore = false;
1536bool IsSimple = false;
1537bool IsStoreFromReg = false;
1538uint8_t Size = 0;
1539if (!BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg,
1540SrcImm, StackPtrReg, StackOffset, Size,
1541IsSimple, IsIndexed))
1542continue;
1543if (Reg != CSR || !IsStore || !IsSimple)
1544continue;
1545SavePoint = &Inst;
1546break;
1547}
1548if (SavePoint)
1549break;
1550}
1551assert(SavePoint);
1552LLVM_DEBUG({
1553dbgs() << "Now using as save point for reg " << CSR << " :";
1554SavePoint->dump();
1555});
1556bool PrevAffectedZone = false;
1557BinaryBasicBlock *PrevBB = nullptr;
1558DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1559for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1560if (BB->size() == 0)
1561continue;
1562const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1563const bool InAffectedZoneAtBegin =
1564(*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1565bool InAffectedZone = InAffectedZoneAtBegin;
1566for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1567const bool CurZone = DA.count(*InstIter, *SavePoint);
1568if (InAffectedZone != CurZone) {
1569auto InsertionIter = InstIter;
1570++InsertionIter;
1571InAffectedZone = CurZone;
1572if (InAffectedZone)
1573InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1574SPValPop);
1575else
1576InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1577SPValPush);
1578--InstIter;
1579}
1580}
1581// Are we at the first basic block or hot-cold split point?
1582if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1583if (InAffectedZoneAtBegin)
1584insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1585} else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1586if (InAffectedZoneAtBegin)
1587insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1588else
1589insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1590}
1591PrevAffectedZone = InAffectedZoneAtEnd;
1592PrevBB = BB;
1593}
1594}
1595
1596void ShrinkWrapping::rebuildCFIForSP() {
1597for (BinaryBasicBlock &BB : BF) {
1598for (MCInst &Inst : BB) {
1599if (!BC.MIB->isCFI(Inst))
1600continue;
1601const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1602if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1603BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1604}
1605}
1606
1607int PrevSPVal = -8;
1608BinaryBasicBlock *PrevBB = nullptr;
1609StackPointerTracking &SPT = Info.getStackPointerTracking();
1610for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1611if (BB->size() == 0)
1612continue;
1613const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1614const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1615int SPVal = SPValAtBegin;
1616for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1617const int CurVal = SPT.getStateAt(*Iter)->first;
1618if (SPVal != CurVal) {
1619auto InsertionIter = Iter;
1620++InsertionIter;
1621Iter = BF.addCFIInstruction(
1622BB, InsertionIter,
1623MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1624SPVal = CurVal;
1625}
1626}
1627if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1628BF.addCFIInstruction(
1629BB, BB->begin(),
1630MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1631else if (SPValAtBegin != PrevSPVal)
1632BF.addCFIInstruction(
1633PrevBB, PrevBB->end(),
1634MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1635PrevSPVal = SPValAtEnd;
1636PrevBB = BB;
1637}
1638
1639for (BinaryBasicBlock &BB : BF)
1640for (auto I = BB.begin(); I != BB.end();)
1641if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1642I = BB.eraseInstruction(I);
1643else
1644++I;
1645}
1646
1647Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1648const FrameIndexEntry &FIE,
1649bool CreatePushOrPop) {
1650MCInst NewInst;
1651if (SPVal != StackPointerTracking::SUPERPOSITION &&
1652SPVal != StackPointerTracking::EMPTY) {
1653if (FIE.IsLoad) {
1654BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1655FIE.StackOffset - SPVal, FIE.RegOrImm,
1656FIE.Size);
1657} else {
1658BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1659FIE.StackOffset - SPVal, FIE.RegOrImm,
1660FIE.Size);
1661}
1662if (CreatePushOrPop)
1663BC.MIB->changeToPushOrPop(NewInst);
1664return NewInst;
1665}
1666assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1667FPVal != StackPointerTracking::EMPTY);
1668
1669if (FIE.IsLoad) {
1670BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1671FIE.StackOffset - FPVal, FIE.RegOrImm,
1672FIE.Size);
1673} else {
1674BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1675FIE.StackOffset - FPVal, FIE.RegOrImm, FIE.Size);
1676}
1677return NewInst;
1678}
1679
1680void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1681const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1682if (UpdatedCFIs.count(CFI))
1683return;
1684
1685switch (CFI->getOperation()) {
1686case MCCFIInstruction::OpDefCfa:
1687case MCCFIInstruction::OpDefCfaRegister:
1688case MCCFIInstruction::OpDefCfaOffset:
1689CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1690break;
1691case MCCFIInstruction::OpOffset:
1692default:
1693break;
1694}
1695
1696UpdatedCFIs.insert(CFI);
1697}
1698
1699BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1700BBIterTy Pos, unsigned Reg,
1701bool isPush, int Sz,
1702int64_t NewOffset) {
1703if (isPush) {
1704for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1705Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1706updateCFIInstOffset(*Pos++, NewOffset);
1707}
1708if (HasDeletedOffsetCFIs[Reg]) {
1709Pos = BF.addCFIInstruction(
1710&BB, Pos,
1711MCCFIInstruction::createOffset(
1712nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1713++Pos;
1714}
1715} else {
1716for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1717Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1718updateCFIInstOffset(*Pos++, NewOffset);
1719}
1720if (HasDeletedOffsetCFIs[Reg]) {
1721Pos = BF.addCFIInstruction(
1722&BB, Pos,
1723MCCFIInstruction::createSameValue(
1724nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1725++Pos;
1726}
1727}
1728return Pos;
1729}
1730
1731Expected<BBIterTy> ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1732BinaryBasicBlock *CurBB,
1733const WorklistItem &Item,
1734int64_t SPVal,
1735int64_t FPVal) {
1736// Trigger CFI reconstruction for this CSR if necessary - writing to
1737// PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1738if ((Item.FIEToInsert.IsStore &&
1739!DeletedPushCFIs[Item.AffectedReg].empty()) ||
1740(Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1741HasDeletedOffsetCFIs[Item.AffectedReg]) {
1742if (Item.Action == WorklistItem::InsertPushOrPop) {
1743if (Item.FIEToInsert.IsStore)
1744PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1745else
1746PopOffsetByReg[Item.AffectedReg] = SPVal;
1747} else {
1748if (Item.FIEToInsert.IsStore)
1749PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1750else
1751PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1752}
1753}
1754
1755LLVM_DEBUG({
1756dbgs() << "Creating stack access with SPVal = " << SPVal
1757<< "; stack offset = " << Item.FIEToInsert.StackOffset
1758<< " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1759<< "\n";
1760});
1761Expected<MCInst> NewInstOrErr =
1762createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1763Item.Action == WorklistItem::InsertPushOrPop);
1764if (auto E = NewInstOrErr.takeError())
1765return Error(std::move(E));
1766MCInst &NewInst = *NewInstOrErr;
1767if (InsertionPoint != CurBB->end()) {
1768LLVM_DEBUG({
1769dbgs() << "Adding before Inst: ";
1770InsertionPoint->dump();
1771dbgs() << "the following inst: ";
1772NewInst.dump();
1773});
1774BBIterTy Iter =
1775CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1776return ++Iter;
1777}
1778CurBB->addInstruction(std::move(NewInst));
1779LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1780return CurBB->end();
1781}
1782
1783Expected<BBIterTy> ShrinkWrapping::processInsertionsList(
1784BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1785std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1786bool HasInsertions = llvm::any_of(TodoList, [&](WorklistItem &Item) {
1787return Item.Action == WorklistItem::InsertLoadOrStore ||
1788Item.Action == WorklistItem::InsertPushOrPop;
1789});
1790
1791if (!HasInsertions)
1792return InsertionPoint;
1793
1794assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1795SPVal != StackPointerTracking::EMPTY) ||
1796(FPVal != StackPointerTracking::SUPERPOSITION &&
1797FPVal != StackPointerTracking::EMPTY)) &&
1798"Cannot insert if we have no idea of the stack state here");
1799
1800// Revert the effect of PSPT for this location, we want SP Value before
1801// insertions
1802if (InsertionPoint == CurBB->end()) {
1803for (WorklistItem &Item : TodoList) {
1804if (Item.Action != WorklistItem::InsertPushOrPop)
1805continue;
1806if (Item.FIEToInsert.IsStore)
1807SPVal += Item.FIEToInsert.Size;
1808if (Item.FIEToInsert.IsLoad)
1809SPVal -= Item.FIEToInsert.Size;
1810}
1811}
1812
1813// Reorder POPs to obey the correct dominance relation between them
1814llvm::stable_sort(TodoList, [&](const WorklistItem &A,
1815const WorklistItem &B) {
1816if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1817(B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1818return false;
1819if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1820return true;
1821if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1822return false;
1823return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1824});
1825
1826// Process insertions
1827for (WorklistItem &Item : TodoList) {
1828if (Item.Action == WorklistItem::Erase ||
1829Item.Action == WorklistItem::ChangeToAdjustment)
1830continue;
1831
1832auto InsertionPointOrErr =
1833processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1834if (auto E = InsertionPointOrErr.takeError())
1835return Error(std::move(E));
1836InsertionPoint = *InsertionPointOrErr;
1837if (Item.Action == WorklistItem::InsertPushOrPop &&
1838Item.FIEToInsert.IsStore)
1839SPVal -= Item.FIEToInsert.Size;
1840if (Item.Action == WorklistItem::InsertPushOrPop &&
1841Item.FIEToInsert.IsLoad)
1842SPVal += Item.FIEToInsert.Size;
1843}
1844return InsertionPoint;
1845}
1846
1847Expected<bool> ShrinkWrapping::processInsertions() {
1848PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1849PSPT.run();
1850
1851bool Changes = false;
1852for (BinaryBasicBlock &BB : BF) {
1853// Process insertions before some inst.
1854for (auto I = BB.begin(); I != BB.end(); ++I) {
1855MCInst &Inst = *I;
1856auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1857Inst, getAnnotationIndex());
1858if (!TodoList)
1859continue;
1860Changes = true;
1861std::vector<WorklistItem> List = *TodoList;
1862LLVM_DEBUG({
1863dbgs() << "Now processing insertions in " << BB.getName()
1864<< " before inst: ";
1865Inst.dump();
1866});
1867auto Iter = I;
1868std::pair<int, int> SPTState =
1869*PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1870auto IterOrErr =
1871processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1872if (auto E = IterOrErr.takeError())
1873return Error(std::move(E));
1874I = *IterOrErr;
1875}
1876// Process insertions at the end of bb
1877auto WRI = Todo.find(&BB);
1878if (WRI != Todo.end()) {
1879std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1880if (auto E = processInsertionsList(BB.end(), &BB, WRI->second,
1881SPTState.first, SPTState.second)
1882.takeError())
1883return Error(std::move(E));
1884Changes = true;
1885}
1886}
1887return Changes;
1888}
1889
1890void ShrinkWrapping::processDeletions() {
1891LivenessAnalysis &LA = Info.getLivenessAnalysis();
1892for (BinaryBasicBlock &BB : BF) {
1893for (auto II = BB.begin(); II != BB.end(); ++II) {
1894MCInst &Inst = *II;
1895auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1896Inst, getAnnotationIndex());
1897if (!TodoList)
1898continue;
1899// Process all deletions
1900for (WorklistItem &Item : *TodoList) {
1901if (Item.Action != WorklistItem::Erase &&
1902Item.Action != WorklistItem::ChangeToAdjustment)
1903continue;
1904
1905if (Item.Action == WorklistItem::ChangeToAdjustment) {
1906// Is flag reg alive across this func?
1907bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1908if (int Sz = BC.MIB->getPushSize(Inst)) {
1909BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1910continue;
1911}
1912if (int Sz = BC.MIB->getPopSize(Inst)) {
1913BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1914continue;
1915}
1916}
1917
1918LLVM_DEBUG({
1919dbgs() << "Erasing: ";
1920BC.printInstruction(dbgs(), Inst);
1921});
1922II = std::prev(BB.eraseInstruction(II));
1923break;
1924}
1925}
1926}
1927}
1928
1929void ShrinkWrapping::rebuildCFI() {
1930const bool FP = Info.getStackPointerTracking().HasFramePointer;
1931Info.invalidateAll();
1932if (!FP) {
1933rebuildCFIForSP();
1934Info.invalidateAll();
1935}
1936for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1937if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1938continue;
1939const int64_t SPValPush = PushOffsetByReg[I];
1940const int64_t SPValPop = PopOffsetByReg[I];
1941insertUpdatedCFI(I, SPValPush, SPValPop);
1942Info.invalidateAll();
1943}
1944}
1945
1946Expected<bool> ShrinkWrapping::perform(bool HotOnly) {
1947HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1948PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1949PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1950
1951// Update pass statistics
1952uint64_t TotalInstrs = 0ULL;
1953uint64_t TotalStoreInstrs = 0ULL;
1954for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1955uint64_t BBExecCount = BB->getExecutionCount();
1956if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
1957continue;
1958for (const auto &Instr : *BB) {
1959if (BC.MIB->isPseudo(Instr))
1960continue;
1961if (BC.MIB->mayStore(Instr))
1962TotalStoreInstrs += BBExecCount;
1963TotalInstrs += BBExecCount;
1964}
1965}
1966InstrDynamicCount += TotalInstrs;
1967StoreDynamicCount += TotalStoreInstrs;
1968
1969if (!FA.hasFrameInfo(BF))
1970return false;
1971
1972if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
1973return false;
1974
1975if (opts::EqualizeBBCounts)
1976equalizeBBCounts(Info, BF);
1977
1978if (BF.checkForAmbiguousJumpTables()) {
1979LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1980<< ".\n");
1981// We could call disambiguateJumpTables here, but it is probably not worth
1982// the cost (of duplicating potentially large jump tables that could regress
1983// dcache misses). Moreover, ambiguous JTs are rare and coming from code
1984// written in assembly language. Just bail.
1985return false;
1986}
1987SLM.initialize();
1988CSA.compute();
1989classifyCSRUses();
1990pruneUnwantedCSRs();
1991computeSaveLocations();
1992moveSaveRestores();
1993LLVM_DEBUG({
1994dbgs() << "Func before shrink-wrapping: \n";
1995BF.dump();
1996});
1997SLM.performChanges();
1998// Early exit if processInsertions doesn't detect any todo items
1999auto ModifiedOrErr = processInsertions();
2000if (auto E = ModifiedOrErr.takeError())
2001return Error(std::move(E));
2002const bool Modified = *ModifiedOrErr;
2003if (!Modified)
2004return false;
2005processDeletions();
2006if (foldIdenticalSplitEdges()) {
2007const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
2008(void)Stats;
2009LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
2010<< " redundant split edge BBs (" << Stats.second
2011<< " bytes) for " << BF.getPrintName() << "\n");
2012}
2013rebuildCFI();
2014// We may have split edges, creating BBs that need correct branching
2015BF.fixBranches();
2016LLVM_DEBUG({
2017dbgs() << "Func after shrink-wrapping: \n";
2018BF.dump();
2019});
2020return true;
2021}
2022
2023void ShrinkWrapping::printStats(BinaryContext &BC) {
2024BC.outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2025<< " spills inserting load/stores and " << SpillsMovedPushPopMode
2026<< " spills inserting push/pops\n";
2027if (!InstrDynamicCount || !StoreDynamicCount)
2028return;
2029BC.outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2030<< " store executions ("
2031<< format("%.1lf%%",
2032(100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
2033<< " total instructions executed, "
2034<< format("%.1lf%%",
2035(100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
2036<< " store instructions)\n";
2037BC.outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2038<< SpillsFailedDynamicCount << " store executions ("
2039<< format("%.1lf%%",
2040(100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
2041<< " total instructions executed, "
2042<< format("%.1lf%%",
2043(100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
2044<< " store instructions)\n";
2045}
2046
2047// Operators necessary as a result of using MCAnnotation
2048raw_ostream &operator<<(raw_ostream &OS,
2049const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2050OS << "SWTodo[";
2051const char *Sep = "";
2052for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2053OS << Sep;
2054switch (Item.Action) {
2055case ShrinkWrapping::WorklistItem::Erase:
2056OS << "Erase";
2057break;
2058case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2059OS << "ChangeToAdjustment";
2060break;
2061case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2062OS << "InsertLoadOrStore";
2063break;
2064case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2065OS << "InsertPushOrPop";
2066break;
2067}
2068Sep = ", ";
2069}
2070OS << "]";
2071return OS;
2072}
2073
2074raw_ostream &
2075operator<<(raw_ostream &OS,
2076const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2077OS << "SLMTodo[";
2078const char *Sep = "";
2079for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2080OS << Sep;
2081switch (Item.Action) {
2082case StackLayoutModifier::WorklistItem::None:
2083OS << "None";
2084break;
2085case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2086OS << "AdjustLoadStoreOffset";
2087break;
2088case StackLayoutModifier::WorklistItem::AdjustCFI:
2089OS << "AdjustCFI";
2090break;
2091}
2092Sep = ", ";
2093}
2094OS << "]";
2095return OS;
2096}
2097
2098bool operator==(const ShrinkWrapping::WorklistItem &A,
2099const ShrinkWrapping::WorklistItem &B) {
2100return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2101A.Adjustment == B.Adjustment &&
2102A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2103A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2104A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2105A.FIEToInsert.Size == B.FIEToInsert.Size &&
2106A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2107A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2108}
2109
2110bool operator==(const StackLayoutModifier::WorklistItem &A,
2111const StackLayoutModifier::WorklistItem &B) {
2112return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2113}
2114
2115} // end namespace bolt
2116} // end namespace llvm
2117