llvm-project
280 строк · 10.4 Кб
1//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements feature and label extraction for offline supervised learning
10// of a IR to native size model.
11//
12//===----------------------------------------------------------------------===//
13#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"14
15#ifdef LLVM_HAVE_TFLITE16#include "llvm/Analysis/Utils/TFUtils.h"17#endif18#include "llvm/IR/Function.h"19#include "llvm/IR/PassManager.h"20#include "llvm/Support/raw_ostream.h"21
22using namespace llvm;23
24AnalysisKey InlineSizeEstimatorAnalysis::Key;25
26#ifdef LLVM_HAVE_TFLITE27#include "llvm/Analysis/LoopInfo.h"28#include "llvm/Analysis/TargetLibraryInfo.h"29#include "llvm/Analysis/TargetTransformInfo.h"30#include "llvm/IR/BasicBlock.h"31#include "llvm/IR/Dominators.h"32#include "llvm/IR/Instructions.h"33#include "llvm/Support/Casting.h"34#include "llvm/Support/CommandLine.h"35#include <algorithm>36#include <deque>37#include <optional>38
39cl::opt<std::string> TFIR2NativeModelPath(40"ml-inliner-ir2native-model", cl::Hidden,41cl::desc("Path to saved model evaluating native size from IR."));42
43#define DEBUG_TYPE "inline-size-estimator"44namespace {45unsigned getMaxInstructionID() {46#define LAST_OTHER_INST(NR) return NR;47#include "llvm/IR/Instruction.def"48}
49
50class IRToNativeSizeLearning {51public:52enum class NamedFeatureIndex : size_t {53InitialSize,54Blocks,55Calls,56IsLocal,57IsLinkOnceODR,58IsLinkOnce,59Loops,60MaxLoopDepth,61MaxDomTreeLevel,62
63NumNamedFeatures
64};65static const size_t NumNamedFeatures =66static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);67struct FunctionFeatures {68static const size_t FeatureCount;69
70std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};71std::vector<int32_t> InstructionHistogram;72std::vector<int32_t> InstructionPairHistogram;73
74void fillTensor(int32_t *Ptr) const;75int32_t &operator[](NamedFeatureIndex Pos) {76return NamedFeatures[static_cast<size_t>(Pos)];77}78};79IRToNativeSizeLearning() = default;80
81static FunctionFeatures getFunctionFeatures(Function &F,82FunctionAnalysisManager &FAM);83};84
85// This is a point in time - we determined including these pairs of
86// consecutive instructions (in the IR layout available at inline time) as
87// features improves the model performance. We want to move away from manual
88// feature selection.
89// The array is given in opcode pairs rather than labels because 1) labels
90// weren't readily available, and 2) the successions were hand - extracted.
91//
92// This array must be sorted.
93static const std::array<std::pair<size_t, size_t>, 137>94ImportantInstructionSuccessions{95{{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},96{1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},97{1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},98{1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},99{2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},100{2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},101{13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},102{28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},103{31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},104{32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},105{32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},106{33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},107{34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},108{39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},109{49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},110{49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},111{53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},112{56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},113{56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},114{64, 1}, {64, 64}, {65, 1}, {65, 65}}};115
116// We have: 9 calculated features (the features here); 1 feature for each
117// instruction opcode; and 1 feature for each manually-identified sequence.
118// For the latter 2, we build a histogram: we count the number of
119// occurrences of each instruction opcode or succession of instructions,
120// respectively.
121// Note that instruction opcodes start from 1. For convenience, we also have an
122// always 0 feature for the '0' opcode, hence the extra 1.
123const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =124ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +125IRToNativeSizeLearning::NumNamedFeatures;126
127size_t getSize(Function &F, TargetTransformInfo &TTI) {128size_t Ret = 0;129for (const auto &BB : F)130for (const auto &I : BB)131Ret += *(TTI.getInstructionCost(132&I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());133return Ret;134}
135
136size_t getSize(Function &F, FunctionAnalysisManager &FAM) {137auto &TTI = FAM.getResult<TargetIRAnalysis>(F);138return getSize(F, TTI);139}
140
141unsigned getMaxDominatorTreeDepth(const Function &F,142const DominatorTree &Tree) {143unsigned Ret = 0;144for (const auto &BB : F)145if (const auto *TN = Tree.getNode(&BB))146Ret = std::max(Ret, TN->getLevel());147return Ret;148}
149} // namespace150
151IRToNativeSizeLearning::FunctionFeatures152IRToNativeSizeLearning::getFunctionFeatures(Function &F,153FunctionAnalysisManager &FAM) {154assert(llvm::is_sorted(ImportantInstructionSuccessions) &&155"expected function features are sorted");156
157auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);158FunctionFeatures FF;159size_t InstrCount = getMaxInstructionID() + 1;160FF.InstructionHistogram.resize(InstrCount);161
162FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());163
164int StartID = 0;165int LastID = StartID;166auto getPairIndex = [](size_t a, size_t b) {167auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));168if (I == ImportantInstructionSuccessions.end())169return -1;170return static_cast<int>(171std::distance(ImportantInstructionSuccessions.begin(), I));172};173
174// We don't want debug calls, because they'd just add noise.175for (const auto &BB : F) {176for (const auto &I : BB.instructionsWithoutDebug()) {177auto ID = I.getOpcode();178
179++FF.InstructionHistogram[ID];180int PairIndex = getPairIndex(LastID, ID);181if (PairIndex >= 0)182++FF.InstructionPairHistogram[PairIndex];183LastID = ID;184if (isa<CallBase>(I))185++FF[NamedFeatureIndex::Calls];186}187}188
189FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);190FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();191FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();192FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();193FF[NamedFeatureIndex::Blocks] = F.size();194auto &LI = FAM.getResult<LoopAnalysis>(F);195FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());196for (auto &L : LI)197FF[NamedFeatureIndex::MaxLoopDepth] =198std::max(FF[NamedFeatureIndex::MaxLoopDepth],199static_cast<int32_t>(L->getLoopDepth()));200FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);201return FF;202}
203
204void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {205std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);206Ptr += NamedFeatures.size();207std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);208Ptr += InstructionHistogram.size();209std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),210Ptr);211}
212
213bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {214return !TFIR2NativeModelPath.empty();215}
216
217InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {218if (!isEvaluatorRequested()) {219return;220}221std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(222"serving_default_input_1",223{1, static_cast<int64_t>(224IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};225std::vector<TensorSpec> OutputSpecs{226TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};227Evaluator = std::make_unique<TFModelEvaluator>(228TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);229if (!Evaluator || !Evaluator->isValid()) {230Evaluator.reset();231return;232}233}
234
235InlineSizeEstimatorAnalysis::Result236InlineSizeEstimatorAnalysis::run(const Function &F,237FunctionAnalysisManager &FAM) {238if (!Evaluator)239return std::nullopt;240auto Features = IRToNativeSizeLearning::getFunctionFeatures(241const_cast<Function &>(F), FAM);242int32_t *V = Evaluator->getInput<int32_t>(0);243Features.fillTensor(V);244auto ER = Evaluator->evaluate();245if (!ER)246return std::nullopt;247float Ret = *ER->getTensorValue<float>(0);248if (Ret < 0.0)249Ret = 0.0;250return static_cast<size_t>(Ret);251}
252
253InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}254InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(255InlineSizeEstimatorAnalysis &&Other)256: Evaluator(std::move(Other.Evaluator)) {}257
258#else259namespace llvm {260class TFModelEvaluator {};261} // namespace llvm262InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;263InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(264InlineSizeEstimatorAnalysis &&) {}265InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default;266InlineSizeEstimatorAnalysis::Result267InlineSizeEstimatorAnalysis::run(const Function &F,268FunctionAnalysisManager &FAM) {269return std::nullopt;270}
271bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }272#endif273
274PreservedAnalyses
275InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,276FunctionAnalysisManager &AM) {277OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()278<< ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";279return PreservedAnalyses::all();280}
281