llvm-project

InlineSizeEstimatorAnalysis.cpp
280 строк · 10.4 Кб
Перенос по словам
1
//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This implements feature and label extraction for offline supervised learning
10
// of a IR to native size model.
11
//
12
//===----------------------------------------------------------------------===//
13
#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
14

15
#ifdef LLVM_HAVE_TFLITE
16
#include "llvm/Analysis/Utils/TFUtils.h"
17
#endif
18
#include "llvm/IR/Function.h"
19
#include "llvm/IR/PassManager.h"
20
#include "llvm/Support/raw_ostream.h"
21

22
using namespace llvm;
23

24
AnalysisKey InlineSizeEstimatorAnalysis::Key;
25

26
#ifdef LLVM_HAVE_TFLITE
27
#include "llvm/Analysis/LoopInfo.h"
28
#include "llvm/Analysis/TargetLibraryInfo.h"
29
#include "llvm/Analysis/TargetTransformInfo.h"
30
#include "llvm/IR/BasicBlock.h"
31
#include "llvm/IR/Dominators.h"
32
#include "llvm/IR/Instructions.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/CommandLine.h"
35
#include <algorithm>
36
#include <deque>
37
#include <optional>
38

39
cl::opt<std::string> TFIR2NativeModelPath(
40
    "ml-inliner-ir2native-model", cl::Hidden,
41
    cl::desc("Path to saved model evaluating native size from IR."));
42

43
#define DEBUG_TYPE "inline-size-estimator"
44
namespace {
45
unsigned getMaxInstructionID() {
46
#define LAST_OTHER_INST(NR) return NR;
47
#include "llvm/IR/Instruction.def"
48
}
49

50
class IRToNativeSizeLearning {
51
public:
52
  enum class NamedFeatureIndex : size_t {
53
    InitialSize,
54
    Blocks,
55
    Calls,
56
    IsLocal,
57
    IsLinkOnceODR,
58
    IsLinkOnce,
59
    Loops,
60
    MaxLoopDepth,
61
    MaxDomTreeLevel,
62

63
    NumNamedFeatures
64
  };
65
  static const size_t NumNamedFeatures =
66
      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
67
  struct FunctionFeatures {
68
    static const size_t FeatureCount;
69

70
    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
71
    std::vector<int32_t> InstructionHistogram;
72
    std::vector<int32_t> InstructionPairHistogram;
73

74
    void fillTensor(int32_t *Ptr) const;
75
    int32_t &operator[](NamedFeatureIndex Pos) {
76
      return NamedFeatures[static_cast<size_t>(Pos)];
77
    }
78
  };
79
  IRToNativeSizeLearning() = default;
80

81
  static FunctionFeatures getFunctionFeatures(Function &F,
82
                                              FunctionAnalysisManager &FAM);
83
};
84

85
// This is a point in time - we determined including these pairs of
86
// consecutive instructions (in the IR layout available at inline time) as
87
// features improves the model performance. We want to move away from manual
88
// feature selection.
89
// The array is given in opcode pairs rather than labels because 1) labels
90
// weren't readily available, and 2) the successions were hand - extracted.
91
//
92
// This array must be sorted.
93
static const std::array<std::pair<size_t, size_t>, 137>
94
    ImportantInstructionSuccessions{
95
        {{1, 1},   {1, 4},   {1, 5},   {1, 7},   {1, 8},   {1, 9},   {1, 11},
96
         {1, 12},  {1, 13},  {1, 14},  {1, 18},  {1, 20},  {1, 22},  {1, 24},
97
         {1, 25},  {1, 26},  {1, 27},  {1, 28},  {1, 29},  {1, 30},  {1, 31},
98
         {1, 32},  {1, 33},  {1, 34},  {1, 39},  {1, 40},  {1, 42},  {1, 45},
99
         {2, 1},   {2, 2},   {2, 13},  {2, 28},  {2, 29},  {2, 32},  {2, 33},
100
         {2, 34},  {2, 38},  {2, 48},  {2, 49},  {2, 53},  {2, 55},  {2, 56},
101
         {13, 2},  {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
102
         {28, 2},  {28, 48}, {28, 53}, {29, 2},  {29, 33}, {29, 56}, {31, 31},
103
         {31, 33}, {31, 34}, {31, 49}, {32, 1},  {32, 2},  {32, 13}, {32, 15},
104
         {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
105
         {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1},  {33, 2},  {33, 32},
106
         {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1},  {34, 2},
107
         {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
108
         {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2},  {48, 34}, {48, 56},
109
         {49, 1},  {49, 2},  {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
110
         {49, 49}, {49, 56}, {53, 1},  {53, 2},  {53, 28}, {53, 34}, {53, 53},
111
         {53, 57}, {55, 1},  {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
112
         {56, 1},  {56, 2},  {56, 7},  {56, 13}, {56, 32}, {56, 33}, {56, 34},
113
         {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
114
         {64, 1},  {64, 64}, {65, 1},  {65, 65}}};
115

116
// We have: 9 calculated features (the features here); 1 feature for each
117
// instruction opcode; and 1 feature for each manually-identified sequence.
118
// For the latter 2, we build a histogram: we count the number of
119
// occurrences of each instruction opcode or succession of instructions,
120
// respectively.
121
// Note that instruction opcodes start from 1. For convenience, we also have an
122
// always 0 feature for the '0' opcode, hence the extra 1.
123
const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
124
    ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
125
    IRToNativeSizeLearning::NumNamedFeatures;
126

127
size_t getSize(Function &F, TargetTransformInfo &TTI) {
128
  size_t Ret = 0;
129
  for (const auto &BB : F)
130
    for (const auto &I : BB)
131
      Ret += *(TTI.getInstructionCost(
132
          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
133
  return Ret;
134
}
135

136
size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
137
  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
138
  return getSize(F, TTI);
139
}
140

141
unsigned getMaxDominatorTreeDepth(const Function &F,
142
                                  const DominatorTree &Tree) {
143
  unsigned Ret = 0;
144
  for (const auto &BB : F)
145
    if (const auto *TN = Tree.getNode(&BB))
146
      Ret = std::max(Ret, TN->getLevel());
147
  return Ret;
148
}
149
} // namespace
150

151
IRToNativeSizeLearning::FunctionFeatures
152
IRToNativeSizeLearning::getFunctionFeatures(Function &F,
153
                                            FunctionAnalysisManager &FAM) {
154
  assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
155
         "expected function features are sorted");
156

157
  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
158
  FunctionFeatures FF;
159
  size_t InstrCount = getMaxInstructionID() + 1;
160
  FF.InstructionHistogram.resize(InstrCount);
161

162
  FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
163

164
  int StartID = 0;
165
  int LastID = StartID;
166
  auto getPairIndex = [](size_t a, size_t b) {
167
    auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
168
    if (I == ImportantInstructionSuccessions.end())
169
      return -1;
170
    return static_cast<int>(
171
        std::distance(ImportantInstructionSuccessions.begin(), I));
172
  };
173

174
  // We don't want debug calls, because they'd just add noise.
175
  for (const auto &BB : F) {
176
    for (const auto &I : BB.instructionsWithoutDebug()) {
177
      auto ID = I.getOpcode();
178

179
      ++FF.InstructionHistogram[ID];
180
      int PairIndex = getPairIndex(LastID, ID);
181
      if (PairIndex >= 0)
182
        ++FF.InstructionPairHistogram[PairIndex];
183
      LastID = ID;
184
      if (isa<CallBase>(I))
185
        ++FF[NamedFeatureIndex::Calls];
186
    }
187
  }
188

189
  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
190
  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
191
  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
192
  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
193
  FF[NamedFeatureIndex::Blocks] = F.size();
194
  auto &LI = FAM.getResult<LoopAnalysis>(F);
195
  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
196
  for (auto &L : LI)
197
    FF[NamedFeatureIndex::MaxLoopDepth] =
198
        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
199
                 static_cast<int32_t>(L->getLoopDepth()));
200
  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
201
  return FF;
202
}
203

204
void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
205
  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
206
  Ptr += NamedFeatures.size();
207
  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
208
  Ptr += InstructionHistogram.size();
209
  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
210
            Ptr);
211
}
212

213
bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
214
  return !TFIR2NativeModelPath.empty();
215
}
216

217
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
218
  if (!isEvaluatorRequested()) {
219
    return;
220
  }
221
  std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
222
      "serving_default_input_1",
223
      {1, static_cast<int64_t>(
224
              IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
225
  std::vector<TensorSpec> OutputSpecs{
226
      TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
227
  Evaluator = std::make_unique<TFModelEvaluator>(
228
      TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
229
  if (!Evaluator || !Evaluator->isValid()) {
230
    Evaluator.reset();
231
    return;
232
  }
233
}
234

235
InlineSizeEstimatorAnalysis::Result
236
InlineSizeEstimatorAnalysis::run(const Function &F,
237
                                 FunctionAnalysisManager &FAM) {
238
  if (!Evaluator)
239
    return std::nullopt;
240
  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
241
      const_cast<Function &>(F), FAM);
242
  int32_t *V = Evaluator->getInput<int32_t>(0);
243
  Features.fillTensor(V);
244
  auto ER = Evaluator->evaluate();
245
  if (!ER)
246
    return std::nullopt;
247
  float Ret = *ER->getTensorValue<float>(0);
248
  if (Ret < 0.0)
249
    Ret = 0.0;
250
  return static_cast<size_t>(Ret);
251
}
252

253
InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
254
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
255
    InlineSizeEstimatorAnalysis &&Other)
256
    : Evaluator(std::move(Other.Evaluator)) {}
257

258
#else
259
namespace llvm {
260
class TFModelEvaluator {};
261
} // namespace llvm
262
InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;
263
InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
264
    InlineSizeEstimatorAnalysis &&) {}
265
InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default;
266
InlineSizeEstimatorAnalysis::Result
267
InlineSizeEstimatorAnalysis::run(const Function &F,
268
                                 FunctionAnalysisManager &FAM) {
269
  return std::nullopt;
270
}
271
bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
272
#endif
273

274
PreservedAnalyses
275
InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
276
                                            FunctionAnalysisManager &AM) {
277
  OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
278
     << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
279
  return PreservedAnalyses::all();
280
}
281
llvm-project

Использование cookies