llvm-project
500 строк · 17.0 Кб
1//===- bolt/Passes/RegReAssign.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the RegReAssign class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/RegReAssign.h"14#include "bolt/Core/BinaryFunctionCallGraph.h"15#include "bolt/Core/MCPlus.h"16#include "bolt/Passes/DataflowAnalysis.h"17#include "bolt/Passes/DataflowInfoManager.h"18#include "bolt/Utils/Utils.h"19#include <numeric>20
21#define DEBUG_TYPE "regreassign"22
23using namespace llvm;24
25namespace opts {26extern cl::OptionCategory BoltOptCategory;27extern cl::opt<bool> UpdateDebugSections;28
29static cl::opt<bool> AggressiveReAssign(30"use-aggr-reg-reassign",31cl::desc("use register liveness analysis to try to find more opportunities "32"for -reg-reassign optimization"),33cl::cat(BoltOptCategory));34}
35
36namespace llvm {37namespace bolt {38
39void RegReAssign::swap(BinaryFunction &Function, MCPhysReg A, MCPhysReg B) {40BinaryContext &BC = Function.getBinaryContext();41const BitVector &AliasA = BC.MIB->getAliases(A, false);42const BitVector &AliasB = BC.MIB->getAliases(B, false);43
44// Regular instructions45for (BinaryBasicBlock &BB : Function) {46for (MCInst &Inst : BB) {47for (MCOperand &Operand : MCPlus::primeOperands(Inst)) {48if (!Operand.isReg())49continue;50
51unsigned Reg = Operand.getReg();52if (AliasA.test(Reg)) {53Operand.setReg(BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)));54--StaticBytesSaved;55DynBytesSaved -= BB.getKnownExecutionCount();56continue;57}58if (!AliasB.test(Reg))59continue;60Operand.setReg(BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)));61++StaticBytesSaved;62DynBytesSaved += BB.getKnownExecutionCount();63}64}65}66
67// CFI68DenseSet<const MCCFIInstruction *> Changed;69for (BinaryBasicBlock &BB : Function) {70for (MCInst &Inst : BB) {71if (!BC.MIB->isCFI(Inst))72continue;73const MCCFIInstruction *CFI = Function.getCFIFor(Inst);74if (Changed.count(CFI))75continue;76Changed.insert(CFI);77
78switch (CFI->getOperation()) {79case MCCFIInstruction::OpRegister: {80const unsigned CFIReg2 = CFI->getRegister2();81const MCPhysReg Reg2 = *BC.MRI->getLLVMRegNum(CFIReg2, /*isEH=*/false);82if (AliasA.test(Reg2)) {83Function.setCFIFor(84Inst, MCCFIInstruction::createRegister(85nullptr, CFI->getRegister(),86BC.MRI->getDwarfRegNum(87BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg2)),88false)));89} else if (AliasB.test(Reg2)) {90Function.setCFIFor(91Inst, MCCFIInstruction::createRegister(92nullptr, CFI->getRegister(),93BC.MRI->getDwarfRegNum(94BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg2)),95false)));96}97}98[[fallthrough]];99case MCCFIInstruction::OpUndefined:100case MCCFIInstruction::OpDefCfa:101case MCCFIInstruction::OpOffset:102case MCCFIInstruction::OpRestore:103case MCCFIInstruction::OpSameValue:104case MCCFIInstruction::OpDefCfaRegister:105case MCCFIInstruction::OpRelOffset:106case MCCFIInstruction::OpEscape: {107unsigned CFIReg;108if (CFI->getOperation() != MCCFIInstruction::OpEscape) {109CFIReg = CFI->getRegister();110} else {111std::optional<uint8_t> Reg =112readDWARFExpressionTargetReg(CFI->getValues());113// Handle DW_CFA_def_cfa_expression114if (!Reg)115break;116CFIReg = *Reg;117}118const MCPhysReg Reg = *BC.MRI->getLLVMRegNum(CFIReg, /*isEH=*/false);119if (AliasA.test(Reg))120Function.mutateCFIRegisterFor(121Inst,122BC.MRI->getDwarfRegNum(123BC.MIB->getAliasSized(B, BC.MIB->getRegSize(Reg)), false));124else if (AliasB.test(Reg))125Function.mutateCFIRegisterFor(126Inst,127BC.MRI->getDwarfRegNum(128BC.MIB->getAliasSized(A, BC.MIB->getRegSize(Reg)), false));129break;130}131default:132break;133}134}135}136}
137
138void RegReAssign::rankRegisters(BinaryFunction &Function) {139BinaryContext &BC = Function.getBinaryContext();140std::fill(RegScore.begin(), RegScore.end(), 0);141std::fill(RankedRegs.begin(), RankedRegs.end(), 0);142
143auto countRegScore = [&](BinaryBasicBlock &BB) {144for (MCInst &Inst : BB) {145const bool CannotUseREX = BC.MIB->cannotUseREX(Inst);146const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode());147
148// Disallow substituitions involving regs in implicit uses lists149for (MCPhysReg ImplicitUse : Desc.implicit_uses()) {150const size_t RegEC =151BC.MIB->getAliases(ImplicitUse, false).find_first();152RegScore[RegEC] =153std::numeric_limits<decltype(RegScore)::value_type>::min();154}155
156// Disallow substituitions involving regs in implicit defs lists157for (MCPhysReg ImplicitDef : Desc.implicit_defs()) {158const size_t RegEC =159BC.MIB->getAliases(ImplicitDef, false).find_first();160RegScore[RegEC] =161std::numeric_limits<decltype(RegScore)::value_type>::min();162}163
164for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {165const MCOperand &Operand = Inst.getOperand(I);166if (!Operand.isReg())167continue;168
169if (Desc.getOperandConstraint(I, MCOI::TIED_TO) != -1)170continue;171
172unsigned Reg = Operand.getReg();173size_t RegEC = BC.MIB->getAliases(Reg, false).find_first();174if (RegEC == 0)175continue;176
177// Disallow substituitions involving regs in instrs that cannot use REX178// The relationship of X86 registers is shown in the diagram. BL and BH179// do not have a direct alias relationship. However, if the BH register180// cannot be swapped, then the BX/EBX/RBX registers cannot be swapped as181// well, which means that BL register also cannot be swapped. Therefore,182// in the presence of BX/EBX/RBX registers, BL and BH have an alias183// relationship.184// ┌─────────────────┐185// │ RBX │186// ├─────┬───────────┤187// │ │ EBX │188// ├─────┴──┬────────┤189// │ │ BX │190// ├────────┼───┬────┤191// │ │BH │BL │192// └────────┴───┴────┘193if (CannotUseREX) {194RegScore[RegEC] =195std::numeric_limits<decltype(RegScore)::value_type>::min();196RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC];197continue;198}199
200// Unsupported substitution, cannot swap BH with R* regs, bail201if (BC.MIB->isUpper8BitReg(Reg) && ClassicCSR.test(Reg)) {202RegScore[RegEC] =203std::numeric_limits<decltype(RegScore)::value_type>::min();204RegScore[BC.MIB->getAliasSized(Reg, 1)] = RegScore[RegEC];205continue;206}207
208RegScore[RegEC] += BB.getKnownExecutionCount();209}210}211};212for (BinaryBasicBlock &BB : Function)213countRegScore(BB);214
215for (BinaryFunction *ChildFrag : Function.getFragments()) {216for (BinaryBasicBlock &BB : *ChildFrag)217countRegScore(BB);218}219
220std::iota(RankedRegs.begin(), RankedRegs.end(), 0); // 0, 1, 2, 3...221llvm::sort(RankedRegs,222[&](size_t A, size_t B) { return RegScore[A] > RegScore[B]; });223
224LLVM_DEBUG({225for (size_t Reg : RankedRegs) {226if (RegScore[Reg] == 0)227continue;228dbgs() << Reg << " ";229if (RegScore[Reg] > 0)230dbgs() << BC.MRI->getName(Reg) << ": " << RegScore[Reg] << "\n";231else232dbgs() << BC.MRI->getName(Reg) << ": (blacklisted)\n";233}234});235}
236
237void RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {238BinaryContext &BC = Function.getBinaryContext();239rankRegisters(Function);240
241// If there is a situation where function:242// A() -> A.cold()243// A.localalias() -> A.cold()244// simply swapping these two calls can cause issues.245for (BinaryFunction *ChildFrag : Function.getFragments()) {246if (ChildFrag->getParentFragments()->size() > 1)247return;248if (ChildFrag->empty())249return;250}251
252// Bail early if our registers are all black listed, before running expensive253// analysis passes254bool Bail = true;255int64_t LowScoreClassic = std::numeric_limits<int64_t>::max();256for (int J : ClassicRegs.set_bits()) {257if (RegScore[J] <= 0)258continue;259Bail = false;260if (RegScore[J] < LowScoreClassic)261LowScoreClassic = RegScore[J];262}263if (Bail)264return;265BitVector Extended = ClassicRegs;266Extended.flip();267Extended &= GPRegs;268Bail = true;269int64_t HighScoreExtended = 0;270for (int J : Extended.set_bits()) {271if (RegScore[J] <= 0)272continue;273Bail = false;274if (RegScore[J] > HighScoreExtended)275HighScoreExtended = RegScore[J];276}277// Also bail early if there is no profitable substitution even if we assume278// all registers can be exchanged279if (Bail || (LowScoreClassic << 1) >= HighScoreExtended)280return;281
282// -- expensive pass -- determine all regs alive during func start283DataflowInfoManager Info(Function, RA.get(), nullptr);284BitVector AliveAtStart = *Info.getLivenessAnalysis().getStateAt(285ProgramPoint::getFirstPointAt(*Function.begin()));286for (BinaryBasicBlock &BB : Function)287if (BB.pred_size() == 0)288AliveAtStart |= *Info.getLivenessAnalysis().getStateAt(289ProgramPoint::getFirstPointAt(BB));290
291// Mark frame pointer alive because of CFI292AliveAtStart |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false);293// Never touch return registers294BC.MIB->getDefaultLiveOut(AliveAtStart);295
296// Try swapping more profitable options first297auto Begin = RankedRegs.begin();298auto End = std::prev(RankedRegs.end());299while (Begin != End) {300MCPhysReg ClassicReg = *End;301if (!ClassicRegs[ClassicReg] || RegScore[ClassicReg] <= 0) {302--End;303continue;304}305
306MCPhysReg ExtReg = *Begin;307if (!Extended[ExtReg] || RegScore[ExtReg] <= 0) {308++Begin;309continue;310}311
312if (RegScore[ClassicReg] << 1 >= RegScore[ExtReg]) {313LLVM_DEBUG(dbgs() << " Ending at " << BC.MRI->getName(ClassicReg)314<< " with " << BC.MRI->getName(ExtReg)315<< " because exchange is not profitable\n");316break;317}318
319BitVector AnyAliasAlive = AliveAtStart;320AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);321if (AnyAliasAlive.any()) {322LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)323<< " with " << BC.MRI->getName(ExtReg)324<< " because classic reg is alive\n");325--End;326continue;327}328AnyAliasAlive = AliveAtStart;329AnyAliasAlive &= BC.MIB->getAliases(ExtReg);330if (AnyAliasAlive.any()) {331LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)332<< " with " << BC.MRI->getName(ExtReg)333<< " because extended reg is alive\n");334++Begin;335continue;336}337
338// Opportunity detected. Swap.339LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(ClassicReg)340<< " with " << BC.MRI->getName(ExtReg) << "\n\n");341swap(Function, ClassicReg, ExtReg);342FuncsChanged.insert(&Function);343for (BinaryFunction *ChildFrag : Function.getFragments()) {344swap(*ChildFrag, ClassicReg, ExtReg);345FuncsChanged.insert(ChildFrag);346}347++Begin;348if (Begin == End)349break;350--End;351}352}
353
354bool RegReAssign::conservativePassOverFunction(BinaryFunction &Function) {355BinaryContext &BC = Function.getBinaryContext();356rankRegisters(Function);357
358for (BinaryFunction *ChildFrag : Function.getFragments()) {359if (ChildFrag->getParentFragments()->size() > 1)360return false;361if (ChildFrag->empty())362return false;363}364
365// Try swapping R12, R13, R14 or R15 with RBX (we work with all callee-saved366// regs except RBP)367MCPhysReg Candidate = 0;368for (int J : ExtendedCSR.set_bits())369if (RegScore[J] > RegScore[Candidate])370Candidate = J;371
372if (!Candidate || RegScore[Candidate] < 0)373return false;374
375// Check if our classic callee-saved reg (RBX is the only one) has lower376// score / utilization rate377MCPhysReg RBX = 0;378for (int I : ClassicCSR.set_bits()) {379int64_t ScoreRBX = RegScore[I];380if (ScoreRBX <= 0)381continue;382
383if (RegScore[Candidate] > (ScoreRBX + 10))384RBX = I;385}386
387if (!RBX)388return false;389
390// The high 8 bits of the register will never be swapped. To prevent the high391// 8 bits from being swapped incorrectly, we should switched to swapping the392// low 8 bits of the register instead.393if (BC.MIB->isUpper8BitReg(RBX)) {394RBX = BC.MIB->getAliasSized(RBX, 1);395if (RegScore[RBX] < 0 || RegScore[RBX] > RegScore[Candidate])396return false;397}398
399LLVM_DEBUG(dbgs() << "\n ** Swapping " << BC.MRI->getName(RBX) << " with "400<< BC.MRI->getName(Candidate) << "\n\n");401(void)BC;402swap(Function, RBX, Candidate);403FuncsChanged.insert(&Function);404for (BinaryFunction *ChildFrag : Function.getFragments()) {405swap(*ChildFrag, RBX, Candidate);406FuncsChanged.insert(ChildFrag);407}408return true;409}
410
411void RegReAssign::setupAggressivePass(BinaryContext &BC,412std::map<uint64_t, BinaryFunction> &BFs) {413setupConservativePass(BC, BFs);414CG.reset(new BinaryFunctionCallGraph(buildCallGraph(BC)));415RA.reset(new RegAnalysis(BC, &BFs, &*CG));416
417GPRegs = BitVector(BC.MRI->getNumRegs(), false);418BC.MIB->getGPRegs(GPRegs);419}
420
421void RegReAssign::setupConservativePass(422BinaryContext &BC, std::map<uint64_t, BinaryFunction> &BFs) {423// Set up constant bitvectors used throughout this analysis424ClassicRegs = BitVector(BC.MRI->getNumRegs(), false);425CalleeSaved = BitVector(BC.MRI->getNumRegs(), false);426ClassicCSR = BitVector(BC.MRI->getNumRegs(), false);427ExtendedCSR = BitVector(BC.MRI->getNumRegs(), false);428// Never consider the frame pointer429BC.MIB->getClassicGPRegs(ClassicRegs);430ClassicRegs.flip();431ClassicRegs |= BC.MIB->getAliases(BC.MIB->getFramePointer(), false);432ClassicRegs.flip();433BC.MIB->getCalleeSavedRegs(CalleeSaved);434ClassicCSR |= ClassicRegs;435ClassicCSR &= CalleeSaved;436BC.MIB->getClassicGPRegs(ClassicRegs);437ExtendedCSR |= ClassicRegs;438ExtendedCSR.flip();439ExtendedCSR &= CalleeSaved;440
441LLVM_DEBUG({442RegStatePrinter P(BC);443dbgs() << "Starting register reassignment\nClassicRegs: ";444P.print(dbgs(), ClassicRegs);445dbgs() << "\nCalleeSaved: ";446P.print(dbgs(), CalleeSaved);447dbgs() << "\nClassicCSR: ";448P.print(dbgs(), ClassicCSR);449dbgs() << "\nExtendedCSR: ";450P.print(dbgs(), ExtendedCSR);451dbgs() << "\n";452});453}
454
455Error RegReAssign::runOnFunctions(BinaryContext &BC) {456RegScore = std::vector<int64_t>(BC.MRI->getNumRegs(), 0);457RankedRegs = std::vector<size_t>(BC.MRI->getNumRegs(), 0);458
459if (opts::AggressiveReAssign)460setupAggressivePass(BC, BC.getBinaryFunctions());461else462setupConservativePass(BC, BC.getBinaryFunctions());463
464for (auto &I : BC.getBinaryFunctions()) {465BinaryFunction &Function = I.second;466
467if (!Function.isSimple() || Function.isIgnored() || Function.isFragment())468continue;469
470LLVM_DEBUG(dbgs() << "====================================\n");471LLVM_DEBUG(dbgs() << " - " << Function.getPrintName() << "\n");472if (!conservativePassOverFunction(Function) && opts::AggressiveReAssign) {473aggressivePassOverFunction(Function);474LLVM_DEBUG({475if (FuncsChanged.count(&Function))476dbgs() << "Aggressive pass successful on " << Function.getPrintName()477<< "\n";478});479}480}481
482if (FuncsChanged.empty()) {483BC.outs() << "BOLT-INFO: Reg Reassignment Pass: no changes were made.\n";484return Error::success();485}486if (opts::UpdateDebugSections)487BC.outs()488<< "BOLT-WARNING: You used -reg-reassign and -update-debug-sections."489<< " Some registers were changed but associated AT_LOCATION for "490<< "impacted variables were NOT updated! This operation is "491<< "currently unsupported by BOLT.\n";492BC.outs() << "BOLT-INFO: Reg Reassignment Pass Stats:\n";493BC.outs() << "\t " << FuncsChanged.size() << " functions affected.\n";494BC.outs() << "\t " << StaticBytesSaved << " static bytes saved.\n";495BC.outs() << "\t " << DynBytesSaved << " dynamic bytes saved.\n";496return Error::success();497}
498
499} // namespace bolt500} // namespace llvm501