DevelopmentModeInlineAdvisor.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. //===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a model runner using Tensorflow C APIs, allowing the
  10. // loading of a model from a command line option.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Config/config.h"
  14. #include "llvm/Support/Casting.h"
  15. #if defined(LLVM_HAVE_TF_API)
  16. #include "llvm/ADT/BitVector.h"
  17. #include "llvm/Analysis/CallGraph.h"
  18. #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
  19. #include "llvm/Analysis/MLInlineAdvisor.h"
  20. #include "llvm/Analysis/ModelUnderTrainingRunner.h"
  21. #include "llvm/Analysis/NoInferenceModelRunner.h"
  22. #include "llvm/Analysis/Utils/TFUtils.h"
  23. #include "llvm/IR/LLVMContext.h"
  24. #include "llvm/Support/CommandLine.h"
  25. #include "llvm/Support/ManagedStatic.h"
  26. #include <vector>
  27. using namespace llvm;
  28. static cl::opt<std::string> TrainingLog(
  29. "training-log", cl::Hidden,
  30. cl::desc("Path where the development - mode inlining log is saved."));
  31. static cl::opt<std::string> TFModelUnderTrainingPath(
  32. "ml-inliner-model-under-training", cl::Hidden,
  33. cl::desc(R"(Path to SavedModel from the previous training iteration.
  34. The directory is also expected to contain a JSON specification of the
  35. outputs expected to be logged, where the first entry must be the
  36. inlining decision. The file containing the specification should be
  37. called output_spec.json. The expected JSON value is an array of
  38. dictionaries. Each dictionary should have 2 keys:
  39. - "tensor_spec, followed by the TensorSpec description of the
  40. output; and
  41. - "logging_name", a string indicating the name to use when
  42. logging the output values.
  43. Example:
  44. [
  45. {
  46. "logging_name" : "some_name",
  47. "tensor_spec" : {
  48. "name" : "model_name",
  49. "port" : 0,
  50. "shape" : [2, 3],
  51. "type" : "float"
  52. }
  53. }
  54. ]
  55. The first value must always correspond to the decision.)"));
  56. static cl::opt<std::string> TFOutputSpecOverride(
  57. "ml-inliner-output-spec-override", cl::Hidden,
  58. cl::desc("Override the path to the output spec json file. See "
  59. "-ml-inliner-model-under-training documentation for the "
  60. "specification of that file."));
  61. static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
  62. cl::Hidden, cl::init("action_"),
  63. cl::desc("Prefix for feature names."));
  64. namespace {
  65. /// An InlineEvent, used by TrainingLogger.
  66. struct InlineEvent {
  67. /// What the default policy's decision would have been.
  68. int64_t DefaultDecision = 0;
  69. /// What we advised. When training off the default policy, this is the same as
  70. /// DefaultDecision.
  71. int64_t AdvisedDecision = 0;
  72. /// What actually happened. This would be 'false' in the case of an inline
  73. /// error, even if AdvisedDecision were true, otherwise it agrees with
  74. /// AdvisedDecision.
  75. bool Effect = false;
  76. /// What the change in size was: size_after - size_before
  77. int64_t Reward = 0;
  78. };
  79. /// Collect data we may use for training a model, and write it as a textual
  80. /// Tensorflow SequenceExample
  81. /// (https://www.tensorflow.org/api_docs/python/tf/train/SequenceExample)
  82. /// protobuf (https://developers.google.com/protocol-buffers).
  83. /// Because this is a protobuf, we cannot just stream the events as they come.
  84. /// Internally, TrainingLogger stores data in column-major format, because that
  85. /// lines up with how TF SequenceExample represents it.
  86. class TrainingLogger final {
  87. public:
  88. TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR);
  89. /// Log one inlining event.
  90. void logInlineEvent(const InlineEvent &Event,
  91. const MLModelRunner &ModelRunner);
  92. /// Print the stored tensors.
  93. void print();
  94. private:
  95. StringRef LogFileName;
  96. const ModelUnderTrainingRunner *const MUTR;
  97. std::unique_ptr<Logger> L;
  98. BitVector Effects;
  99. /// There's at least one output. We'll set this to a different value if MUTR
  100. /// is avaliable.
  101. size_t OutputCount = 1;
  102. /// Set these 2 clearly OOB, to make sure we set them later.
  103. size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
  104. size_t DecisionPos = std::numeric_limits<size_t>::max();
  105. };
  106. /// An extension of the MLInlineAdvisor for the 'development' mode, targeting
  107. /// the offline training scenario. Note that training happens outside of the
  108. /// compiler, this facility is concerned with producing training data ("logs").
  109. /// This InlineAdvisor can operate in the following modes:
  110. ///
  111. /// 1) collect logs for the default policy. This is useful for bootstrapping
  112. /// training, which will be considerably faster by starting from a reasonable
  113. /// policy.
  114. ///
  115. /// 2) collect logs for the ML policy, using a model from a previous
  116. /// training. Potentially, that model uses internally some small random
  117. /// perturbation of its weights, to induce exploration (setting this up is the
  118. /// responsibility of the training algorithm). The logs would then be used to
  119. /// retrain and improve on this model.
  120. ///
  121. /// 3) use the provided model, with no logging. This is useful for end to end
  122. /// validation - the model, in this case, is a release candidate and shouldn't
  123. /// have random perturbations. It is a convenience feature: rather than needing
  124. /// to take the release candidate model and compile it in 'release' mode,
  125. /// validate it, then potentially discard it, it's easier to just pass the model
  126. /// to the compiler, albeit compilation would be slower, as a one-off. Once the
  127. /// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
  128. /// release mode. The expectation is that a well-trained model provides a good
  129. /// policy over a sufficiently diverse codebase, over many changes (i.e.
  130. /// training happens seldom).
  131. class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
  132. public:
  133. DevelopmentModeMLInlineAdvisor(
  134. Module &M, ModuleAnalysisManager &MAM,
  135. std::unique_ptr<MLModelRunner> ModelRunner,
  136. std::function<bool(CallBase &)> GetDefaultAdvice,
  137. std::unique_ptr<TrainingLogger> Logger);
  138. size_t getTotalSizeEstimate();
  139. virtual ~DevelopmentModeMLInlineAdvisor();
  140. void updateNativeSizeEstimate(int64_t Change) {
  141. *CurrentNativeSize += Change;
  142. }
  143. void resetNativeSize(Function *F) {
  144. PreservedAnalyses PA = PreservedAnalyses::all();
  145. PA.abandon<InlineSizeEstimatorAnalysis>();
  146. FAM.invalidate(*F, PA);
  147. }
  148. std::unique_ptr<MLInlineAdvice>
  149. getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
  150. Optional<size_t> getNativeSizeEstimate(const Function &F) const;
  151. private:
  152. bool isLogging() const { return !!Logger; }
  153. std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
  154. std::function<bool(CallBase &)> GetDefaultAdvice;
  155. const bool IsDoingInference;
  156. std::unique_ptr<TrainingLogger> Logger;
  157. const Optional<int32_t> InitialNativeSize;
  158. Optional<int32_t> CurrentNativeSize;
  159. };
  160. /// A variant of MLInlineAdvice that tracks all non-trivial inlining
  161. /// decisions, for training/logging.
  162. class LoggingMLInlineAdvice : public MLInlineAdvice {
  163. public:
  164. LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
  165. OptimizationRemarkEmitter &ORE, bool Recommendation,
  166. TrainingLogger &Logger,
  167. Optional<size_t> CallerSizeEstimateBefore,
  168. Optional<size_t> CalleeSizeEstimateBefore,
  169. bool DefaultDecision, bool Mandatory = false)
  170. : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
  171. CallerSizeEstimateBefore(CallerSizeEstimateBefore),
  172. CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
  173. DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
  174. virtual ~LoggingMLInlineAdvice() = default;
  175. private:
  176. DevelopmentModeMLInlineAdvisor *getAdvisor() const {
  177. return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
  178. }
  179. void recordInliningImpl() override {
  180. MLInlineAdvice::recordInliningImpl();
  181. getAdvisor()->resetNativeSize(Caller);
  182. int Reward = std::numeric_limits<int>::max();
  183. if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
  184. !getAdvisor()->isForcedToStop()) {
  185. int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
  186. *CalleeSizeEstimateBefore;
  187. Reward = NativeSizeAfter -
  188. (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
  189. getAdvisor()->updateNativeSizeEstimate(Reward);
  190. }
  191. log(Reward, /*Success=*/true);
  192. }
  193. void recordInliningWithCalleeDeletedImpl() override {
  194. MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
  195. getAdvisor()->resetNativeSize(Caller);
  196. if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
  197. !getAdvisor()->isForcedToStop()) {
  198. int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
  199. int Reward = NativeSizeAfter -
  200. (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
  201. getAdvisor()->updateNativeSizeEstimate(Reward);
  202. log(Reward, /*Success=*/true);
  203. } else {
  204. log(NoReward, /*Success=*/true);
  205. }
  206. }
  207. void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
  208. MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
  209. log(NoReward, /*Success=*/false);
  210. }
  211. void recordUnattemptedInliningImpl() override {
  212. MLInlineAdvice::recordUnattemptedInliningImpl();
  213. log(NoReward, /*Success=*/false);
  214. }
  215. void log(int64_t Reward, bool Success) {
  216. if (Mandatory)
  217. return;
  218. InlineEvent Event;
  219. Event.AdvisedDecision = isInliningRecommended();
  220. Event.DefaultDecision = DefaultDecision;
  221. Event.Effect = Success;
  222. Event.Reward = Reward;
  223. Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
  224. }
  225. static const int64_t NoReward = 0;
  226. TrainingLogger &Logger;
  227. const Optional<size_t> CallerSizeEstimateBefore;
  228. const Optional<size_t> CalleeSizeEstimateBefore;
  229. const int64_t DefaultDecision;
  230. const int64_t Mandatory;
  231. };
  232. static const std::vector<TensorSpec> TrainingOnlyFeatures{
  233. TensorSpec::createSpec<int64_t>(TFFeedPrefix + "inlining_default", {1}),
  234. TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
  235. TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
  236. TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
  237. static const std::vector<TensorSpec> getInputFeatures() {
  238. std::vector<TensorSpec> InputSpecs;
  239. for (size_t I = 0; I < NumberOfFeatures; ++I)
  240. InputSpecs.push_back(
  241. TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
  242. append_range(InputSpecs, TrainingOnlyFeatures);
  243. return InputSpecs;
  244. }
  245. } // namespace
  246. TrainingLogger::TrainingLogger(StringRef LogFileName,
  247. const ModelUnderTrainingRunner *MUTR)
  248. : LogFileName(LogFileName), MUTR(MUTR) {
  249. // The first output is the inlining decision.
  250. if (MUTR)
  251. OutputCount = MUTR->outputLoggedFeatureSpecs().size();
  252. std::vector<LoggedFeatureSpec> FT;
  253. for (size_t I = 0; I < NumberOfFeatures; ++I)
  254. FT.push_back(
  255. {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
  256. if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
  257. append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
  258. DefaultDecisionPos = FT.size();
  259. FT.push_back(
  260. {TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}), None});
  261. DecisionPos = FT.size();
  262. FT.push_back({TensorSpec::createSpec<int64_t>(DecisionName, {1}), None});
  263. L = std::make_unique<Logger>(
  264. FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
  265. InlineSizeEstimatorAnalysis::isEvaluatorRequested());
  266. }
  267. /// Log one inlining event.
  268. void TrainingLogger::logInlineEvent(const InlineEvent &Event,
  269. const MLModelRunner &ModelRunner) {
  270. size_t CurrentFeature = 0;
  271. for (; CurrentFeature < NumberOfFeatures; ++CurrentFeature) {
  272. int64_t F = *ModelRunner.getTensor<int64_t>(CurrentFeature);
  273. L->logInt64Value(CurrentFeature, &F);
  274. }
  275. for (size_t I = 1; I < OutputCount; ++I) {
  276. const auto &Result = *MUTR->lastEvaluationResult();
  277. const char *RawData =
  278. reinterpret_cast<const char *>(Result.getUntypedTensorValue(I));
  279. L->logSpecifiedTensorValue(CurrentFeature, RawData);
  280. ++CurrentFeature;
  281. }
  282. assert(CurrentFeature == DefaultDecisionPos);
  283. L->logInt64Value(DefaultDecisionPos, &Event.DefaultDecision);
  284. L->logInt64Value(DecisionPos, &Event.AdvisedDecision);
  285. if (InlineSizeEstimatorAnalysis::isEvaluatorRequested())
  286. L->logInt64Reward(Event.Reward);
  287. // For debugging / later use
  288. Effects.push_back(Event.Effect);
  289. }
  290. void TrainingLogger::print() {
  291. std::error_code EC;
  292. raw_fd_ostream OutFile(LogFileName, EC);
  293. L->flush(OutFile);
  294. }
  295. DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
  296. Module &M, ModuleAnalysisManager &MAM,
  297. std::unique_ptr<MLModelRunner> ModelRunner,
  298. std::function<bool(CallBase &)> GetDefaultAdvice,
  299. std::unique_ptr<TrainingLogger> Logger)
  300. : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
  301. GetDefaultAdvice(GetDefaultAdvice),
  302. IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
  303. Logger(std::move(Logger)),
  304. InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
  305. CurrentNativeSize(InitialNativeSize) {
  306. // We cannot have the case of neither inference nor logging.
  307. assert(IsDoingInference || isLogging());
  308. }
  309. DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() {
  310. if (isLogging())
  311. Logger->print();
  312. }
  313. Optional<size_t>
  314. DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
  315. if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
  316. return None;
  317. auto &R =
  318. FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
  319. if (!R) {
  320. F.getParent()->getContext().emitError(
  321. "Native size estimator is not present.");
  322. return 0;
  323. }
  324. return *R;
  325. }
  326. std::unique_ptr<MLInlineAdvice>
  327. DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
  328. return std::make_unique<LoggingMLInlineAdvice>(
  329. /*Advisor=*/this,
  330. /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
  331. /*Logger=*/*Logger,
  332. /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
  333. /*CalleeSizeEstimateBefore=*/
  334. getNativeSizeEstimate(*CB.getCalledFunction()),
  335. /*DefaultDecision=*/true, /*Mandatory*/ true);
  336. }
  337. std::unique_ptr<MLInlineAdvice>
  338. DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
  339. CallBase &CB, OptimizationRemarkEmitter &ORE) {
  340. if (IsDoingInference && !isLogging())
  341. return MLInlineAdvisor::getAdviceFromModel(CB, ORE);
  342. bool DefaultAdvice = GetDefaultAdvice(CB);
  343. auto Recommendation =
  344. IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
  345. : DefaultAdvice;
  346. return std::make_unique<LoggingMLInlineAdvice>(
  347. /*Advisor=*/this,
  348. /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
  349. /*Logger=*/*Logger,
  350. /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
  351. /*CalleeSizeEstimateBefore=*/
  352. getNativeSizeEstimate(*CB.getCalledFunction()),
  353. /*DefaultDecision=*/DefaultAdvice);
  354. }
  355. size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
  356. if (!InlineSizeEstimatorAnalysis::isEvaluatorRequested())
  357. return 0;
  358. size_t Ret = 0;
  359. for (auto &F : M) {
  360. if (F.isDeclaration())
  361. continue;
  362. Ret += *getNativeSizeEstimate(F);
  363. }
  364. return Ret;
  365. }
  366. std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
  367. Module &M, ModuleAnalysisManager &MAM,
  368. std::function<bool(CallBase &)> GetDefaultAdvice) {
  369. auto &Ctx = M.getContext();
  370. std::unique_ptr<MLModelRunner> Runner;
  371. if (TFModelUnderTrainingPath.empty())
  372. Runner.reset(new NoInferenceModelRunner(Ctx, getInputFeatures()));
  373. else
  374. Runner = ModelUnderTrainingRunner::createAndEnsureValid(
  375. Ctx, TFModelUnderTrainingPath, DecisionName, getInputFeatures(),
  376. TFOutputSpecOverride);
  377. if (!Runner)
  378. return nullptr;
  379. std::unique_ptr<TrainingLogger> Logger;
  380. if (!TrainingLog.empty())
  381. Logger = std::make_unique<TrainingLogger>(
  382. TrainingLog, dyn_cast<ModelUnderTrainingRunner>(Runner.get()));
  383. return std::make_unique<DevelopmentModeMLInlineAdvisor>(
  384. M, MAM, std::move(Runner), GetDefaultAdvice, std::move(Logger));
  385. }
  386. #endif // defined(LLVM_HAVE_TF_API)