InlineSizeEstimatorAnalysis.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This implements feature and label extraction for offline supervised learning
  10. // of a IR to native size model.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
  14. #ifdef LLVM_HAVE_TF_API
  15. #include "llvm/Analysis/Utils/TFUtils.h"
  16. #endif
  17. #include "llvm/Analysis/LoopInfo.h"
  18. #include "llvm/Analysis/TargetLibraryInfo.h"
  19. #include "llvm/Analysis/TargetTransformInfo.h"
  20. #include "llvm/IR/BasicBlock.h"
  21. #include "llvm/IR/Dominators.h"
  22. #include "llvm/IR/Function.h"
  23. #include "llvm/IR/Instructions.h"
  24. #include "llvm/IR/PassManager.h"
  25. #include "llvm/MC/MCAsmLayout.h"
  26. #include "llvm/Support/Casting.h"
  27. #include "llvm/Support/CommandLine.h"
  28. #include "llvm/Support/raw_ostream.h"
  29. #include <algorithm>
  30. #include <deque>
  31. using namespace llvm;
  32. AnalysisKey InlineSizeEstimatorAnalysis::Key;
  33. #define DEBUG_TYPE "inline-size-estimator"
  34. #ifdef LLVM_HAVE_TF_API
  35. cl::opt<std::string> TFIR2NativeModelPath(
  36. "ml-inliner-ir2native-model", cl::Hidden,
  37. cl::desc("Path to saved model evaluating native size from IR."));
  38. namespace {
  39. unsigned getMaxInstructionID() {
  40. #define LAST_OTHER_INST(NR) return NR;
  41. #include "llvm/IR/Instruction.def"
  42. }
  43. class IRToNativeSizeLearning {
  44. public:
  45. enum class NamedFeatureIndex : size_t {
  46. InitialSize,
  47. Blocks,
  48. Calls,
  49. IsLocal,
  50. IsLinkOnceODR,
  51. IsLinkOnce,
  52. Loops,
  53. MaxLoopDepth,
  54. MaxDomTreeLevel,
  55. NumNamedFeatures
  56. };
  57. static const size_t NumNamedFeatures =
  58. static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
  59. struct FunctionFeatures {
  60. static const size_t FeatureCount;
  61. std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
  62. std::vector<int32_t> InstructionHistogram;
  63. std::vector<int32_t> InstructionPairHistogram;
  64. void fillTensor(int32_t *Ptr) const;
  65. int32_t &operator[](NamedFeatureIndex Pos) {
  66. return NamedFeatures[static_cast<size_t>(Pos)];
  67. }
  68. };
  69. IRToNativeSizeLearning() = default;
  70. static FunctionFeatures getFunctionFeatures(Function &F,
  71. FunctionAnalysisManager &FAM);
  72. };
  73. // This is a point in time - we determined including these pairs of
  74. // consecutive instructions (in the IR layout available at inline time) as
  75. // features improves the model performance. We want to move away from manual
  76. // feature selection.
  77. // The array is given in opcode pairs rather than labels because 1) labels
  78. // weren't readily available, and 2) the successions were hand - extracted.
  79. //
  80. // This array must be sorted.
  81. static const std::array<std::pair<size_t, size_t>, 137>
  82. ImportantInstructionSuccessions{
  83. {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
  84. {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
  85. {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
  86. {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
  87. {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
  88. {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
  89. {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
  90. {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
  91. {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
  92. {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
  93. {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
  94. {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
  95. {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
  96. {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
  97. {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
  98. {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
  99. {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
  100. {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
  101. {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
  102. {64, 1}, {64, 64}, {65, 1}, {65, 65}}};
  103. // We have: 9 calculated features (the features here); 1 feature for each
  104. // instruction opcode; and 1 feature for each manually-identified sequence.
  105. // For the latter 2, we build a histogram: we count the number of
  106. // occurrences of each instruction opcode or succession of instructions,
  107. // respectively.
  108. // Note that instruction opcodes start from 1. For convenience, we also have an
  109. // always 0 feature for the '0' opcode, hence the extra 1.
  110. const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
  111. ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
  112. IRToNativeSizeLearning::NumNamedFeatures;
  113. size_t getSize(Function &F, TargetTransformInfo &TTI) {
  114. size_t Ret = 0;
  115. for (const auto &BB : F)
  116. for (const auto &I : BB)
  117. Ret += *(TTI.getInstructionCost(
  118. &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
  119. return Ret;
  120. }
  121. size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
  122. auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
  123. return getSize(F, TTI);
  124. }
  125. unsigned getMaxDominatorTreeDepth(const Function &F,
  126. const DominatorTree &Tree) {
  127. unsigned Ret = 0;
  128. for (const auto &BB : F)
  129. if (const auto *TN = Tree.getNode(&BB))
  130. Ret = std::max(Ret, TN->getLevel());
  131. return Ret;
  132. }
  133. } // namespace
  134. IRToNativeSizeLearning::FunctionFeatures
  135. IRToNativeSizeLearning::getFunctionFeatures(Function &F,
  136. FunctionAnalysisManager &FAM) {
  137. assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
  138. "expected function features are sorted");
  139. auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
  140. FunctionFeatures FF;
  141. size_t InstrCount = getMaxInstructionID() + 1;
  142. FF.InstructionHistogram.resize(InstrCount);
  143. FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
  144. int StartID = 0;
  145. int LastID = StartID;
  146. auto getPairIndex = [](size_t a, size_t b) {
  147. auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
  148. if (I == ImportantInstructionSuccessions.end())
  149. return -1;
  150. return static_cast<int>(
  151. std::distance(ImportantInstructionSuccessions.begin(), I));
  152. };
  153. // We don't want debug calls, because they'd just add noise.
  154. for (const auto &BB : F) {
  155. for (const auto &I : BB.instructionsWithoutDebug()) {
  156. auto ID = I.getOpcode();
  157. ++FF.InstructionHistogram[ID];
  158. int PairIndex = getPairIndex(LastID, ID);
  159. if (PairIndex >= 0)
  160. ++FF.InstructionPairHistogram[PairIndex];
  161. LastID = ID;
  162. if (isa<CallBase>(I))
  163. ++FF[NamedFeatureIndex::Calls];
  164. }
  165. }
  166. FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
  167. FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
  168. FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
  169. FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
  170. FF[NamedFeatureIndex::Blocks] =
  171. std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
  172. auto &LI = FAM.getResult<LoopAnalysis>(F);
  173. FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
  174. for (auto &L : LI)
  175. FF[NamedFeatureIndex::MaxLoopDepth] =
  176. std::max(FF[NamedFeatureIndex::MaxLoopDepth],
  177. static_cast<int32_t>(L->getLoopDepth()));
  178. FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
  179. return FF;
  180. }
  181. void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
  182. std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
  183. Ptr += NamedFeatures.size();
  184. std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
  185. Ptr += InstructionHistogram.size();
  186. std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
  187. Ptr);
  188. }
  189. bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
  190. return !TFIR2NativeModelPath.empty();
  191. }
  192. InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
  193. if (!isEvaluatorRequested()) {
  194. return;
  195. }
  196. std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
  197. "serving_default_input_1",
  198. {1, static_cast<int64_t>(
  199. IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
  200. std::vector<TensorSpec> OutputSpecs{
  201. TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
  202. Evaluator = std::make_unique<TFModelEvaluator>(
  203. TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
  204. if (!Evaluator || !Evaluator->isValid()) {
  205. Evaluator.reset();
  206. return;
  207. }
  208. }
  209. InlineSizeEstimatorAnalysis::Result
  210. InlineSizeEstimatorAnalysis::run(const Function &F,
  211. FunctionAnalysisManager &FAM) {
  212. if (!Evaluator)
  213. return None;
  214. auto Features = IRToNativeSizeLearning::getFunctionFeatures(
  215. const_cast<Function &>(F), FAM);
  216. int32_t *V = Evaluator->getInput<int32_t>(0);
  217. Features.fillTensor(V);
  218. auto ER = Evaluator->evaluate();
  219. if (!ER)
  220. return None;
  221. float Ret = *ER->getTensorValue<float>(0);
  222. if (Ret < 0.0)
  223. Ret = 0.0;
  224. return static_cast<size_t>(Ret);
  225. }
  226. InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
  227. InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
  228. InlineSizeEstimatorAnalysis &&Other)
  229. : Evaluator(std::move(Other.Evaluator)) {}
  230. #else
  231. namespace llvm {
  232. class TFModelEvaluator {};
  233. } // namespace llvm
  234. InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
  235. InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
  236. InlineSizeEstimatorAnalysis &&) {}
  237. InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
  238. InlineSizeEstimatorAnalysis::Result
  239. InlineSizeEstimatorAnalysis::run(const Function &F,
  240. FunctionAnalysisManager &FAM) {
  241. return None;
  242. }
  243. bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
  244. #endif
  245. PreservedAnalyses
  246. InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
  247. FunctionAnalysisManager &AM) {
  248. OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
  249. << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
  250. return PreservedAnalyses::all();
  251. }