SanitizerBinaryMetadata.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is a part of SanitizerBinaryMetadata.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
  13. #include "llvm/ADT/SetVector.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/ADT/Statistic.h"
  16. #include "llvm/ADT/StringRef.h"
  17. #include "llvm/ADT/Triple.h"
  18. #include "llvm/ADT/Twine.h"
  19. #include "llvm/IR/Constant.h"
  20. #include "llvm/IR/DerivedTypes.h"
  21. #include "llvm/IR/Function.h"
  22. #include "llvm/IR/GlobalValue.h"
  23. #include "llvm/IR/GlobalVariable.h"
  24. #include "llvm/IR/IRBuilder.h"
  25. #include "llvm/IR/Instruction.h"
  26. #include "llvm/IR/Instructions.h"
  27. #include "llvm/IR/LLVMContext.h"
  28. #include "llvm/IR/MDBuilder.h"
  29. #include "llvm/IR/Metadata.h"
  30. #include "llvm/IR/Module.h"
  31. #include "llvm/IR/Type.h"
  32. #include "llvm/IR/Value.h"
  33. #include "llvm/InitializePasses.h"
  34. #include "llvm/Pass.h"
  35. #include "llvm/Support/CommandLine.h"
  36. #include "llvm/Support/Debug.h"
  37. #include "llvm/Transforms/Instrumentation.h"
  38. #include "llvm/Transforms/Utils/ModuleUtils.h"
  39. #include <array>
  40. #include <cstdint>
  41. using namespace llvm;
  42. #define DEBUG_TYPE "sanmd"
  43. namespace {
  44. //===--- Constants --------------------------------------------------------===//
  45. constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits
  46. constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
  47. constexpr int kCtorDtorPriority = 2;
  48. // Pairs of names of initialization callback functions and which section
  49. // contains the relevant metadata.
  50. class MetadataInfo {
  51. public:
  52. const StringRef FunctionPrefix;
  53. const StringRef SectionSuffix;
  54. const uint32_t FeatureMask;
  55. static const MetadataInfo Covered;
  56. static const MetadataInfo Atomics;
  57. private:
  58. // Forbid construction elsewhere.
  59. explicit constexpr MetadataInfo(StringRef FunctionPrefix,
  60. StringRef SectionSuffix, uint32_t Feature)
  61. : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix),
  62. FeatureMask(Feature) {}
  63. };
  64. const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered",
  65. kSanitizerBinaryMetadataCoveredSection,
  66. kSanitizerBinaryMetadataNone};
  67. const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics",
  68. kSanitizerBinaryMetadataAtomicsSection,
  69. kSanitizerBinaryMetadataAtomics};
  70. // The only instances of MetadataInfo are the constants above, so a set of
  71. // them may simply store pointers to them. To deterministically generate code,
  72. // we need to use a set with stable iteration order, such as SetVector.
  73. using MetadataInfoSet = SetVector<const MetadataInfo *>;
  74. //===--- Command-line options ---------------------------------------------===//
  75. cl::opt<bool> ClWeakCallbacks(
  76. "sanitizer-metadata-weak-callbacks",
  77. cl::desc("Declare callbacks extern weak, and only call if non-null."),
  78. cl::Hidden, cl::init(true));
  79. cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
  80. cl::desc("Emit PCs for covered functions."),
  81. cl::Hidden, cl::init(false));
  82. cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
  83. cl::desc("Emit PCs for atomic operations."),
  84. cl::Hidden, cl::init(false));
  85. cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
  86. cl::desc("Emit PCs for start of functions that are "
  87. "subject for use-after-return checking"),
  88. cl::Hidden, cl::init(false));
  89. //===--- Statistics -------------------------------------------------------===//
  90. STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
  91. STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
  92. STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
  93. //===----------------------------------------------------------------------===//
  94. // Apply opt overrides.
  95. SanitizerBinaryMetadataOptions &&
  96. transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
  97. Opts.Covered |= ClEmitCovered;
  98. Opts.Atomics |= ClEmitAtomics;
  99. Opts.UAR |= ClEmitUAR;
  100. return std::move(Opts);
  101. }
  102. class SanitizerBinaryMetadata {
  103. public:
  104. SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts)
  105. : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
  106. TargetTriple(M.getTargetTriple()), IRB(M.getContext()) {
  107. // FIXME: Make it work with other formats.
  108. assert(TargetTriple.isOSBinFormatELF() && "ELF only");
  109. }
  110. bool run();
  111. private:
  112. // Return enabled feature mask of per-instruction metadata.
  113. uint32_t getEnabledPerInstructionFeature() const {
  114. uint32_t FeatureMask = 0;
  115. if (Options.Atomics)
  116. FeatureMask |= MetadataInfo::Atomics.FeatureMask;
  117. return FeatureMask;
  118. }
  119. uint32_t getVersion() const {
  120. uint32_t Version = kVersionBase;
  121. const auto CM = Mod.getCodeModel();
  122. if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
  123. Version |= kVersionPtrSizeRel;
  124. return Version;
  125. }
  126. void runOn(Function &F, MetadataInfoSet &MIS);
  127. // Determines which set of metadata to collect for this instruction.
  128. //
  129. // Returns true if covered metadata is required to unambiguously interpret
  130. // other metadata. For example, if we are interested in atomics metadata, any
  131. // function with memory operations (atomic or not) requires covered metadata
  132. // to determine if a memory operation is atomic or not in modules compiled
  133. // with SanitizerBinaryMetadata.
  134. bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
  135. uint32_t &FeatureMask);
  136. // Get start/end section marker pointer.
  137. GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
  138. // Returns the target-dependent section name.
  139. StringRef getSectionName(StringRef SectionSuffix);
  140. // Returns the section start marker name.
  141. Twine getSectionStart(StringRef SectionSuffix);
  142. // Returns the section end marker name.
  143. Twine getSectionEnd(StringRef SectionSuffix);
  144. Module &Mod;
  145. const SanitizerBinaryMetadataOptions Options;
  146. const Triple TargetTriple;
  147. IRBuilder<> IRB;
  148. };
  149. bool SanitizerBinaryMetadata::run() {
  150. MetadataInfoSet MIS;
  151. for (Function &F : Mod)
  152. runOn(F, MIS);
  153. if (MIS.empty())
  154. return false;
  155. //
  156. // Setup constructors and call all initialization functions for requested
  157. // metadata features.
  158. //
  159. auto *Int8PtrTy = IRB.getInt8PtrTy();
  160. auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy);
  161. auto *Int32Ty = IRB.getInt32Ty();
  162. const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy};
  163. auto *Version = ConstantInt::get(Int32Ty, getVersion());
  164. for (const MetadataInfo *MI : MIS) {
  165. const std::array<Value *, InitTypes.size()> InitArgs = {
  166. Version,
  167. getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy),
  168. getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy),
  169. };
  170. // We declare the _add and _del functions as weak, and only call them if
  171. // there is a valid symbol linked. This allows building binaries with
  172. // semantic metadata, but without having callbacks. When a tool that wants
  173. // the metadata is linked which provides the callbacks, they will be called.
  174. Function *Ctor =
  175. createSanitizerCtorAndInitFunctions(
  176. Mod, (MI->FunctionPrefix + ".module_ctor").str(),
  177. (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
  178. /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
  179. .first;
  180. Function *Dtor =
  181. createSanitizerCtorAndInitFunctions(
  182. Mod, (MI->FunctionPrefix + ".module_dtor").str(),
  183. (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
  184. /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
  185. .first;
  186. Constant *CtorData = nullptr;
  187. Constant *DtorData = nullptr;
  188. if (TargetTriple.supportsCOMDAT()) {
  189. // Use COMDAT to deduplicate constructor/destructor function.
  190. Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
  191. Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
  192. CtorData = Ctor;
  193. DtorData = Dtor;
  194. }
  195. appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData);
  196. appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData);
  197. }
  198. return true;
  199. }
  200. void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
  201. if (F.empty())
  202. return;
  203. if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
  204. return;
  205. // Don't touch available_externally functions, their actual body is elsewhere.
  206. if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
  207. return;
  208. MDBuilder MDB(F.getContext());
  209. // The metadata features enabled for this function, stored along covered
  210. // metadata (if enabled).
  211. uint32_t FeatureMask = getEnabledPerInstructionFeature();
  212. // Don't emit unnecessary covered metadata for all functions to save space.
  213. bool RequiresCovered = false;
  214. // We can only understand if we need to set UAR feature after looking
  215. // at the instructions. So we need to check instructions even if FeatureMask
  216. // is empty.
  217. if (FeatureMask || Options.UAR) {
  218. for (BasicBlock &BB : F)
  219. for (Instruction &I : BB)
  220. RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
  221. }
  222. if (F.isVarArg())
  223. FeatureMask &= ~kSanitizerBinaryMetadataUAR;
  224. if (FeatureMask & kSanitizerBinaryMetadataUAR) {
  225. RequiresCovered = true;
  226. NumMetadataUAR++;
  227. }
  228. // Covered metadata is always emitted if explicitly requested, otherwise only
  229. // if some other metadata requires it to unambiguously interpret it for
  230. // modules compiled with SanitizerBinaryMetadata.
  231. if (Options.Covered || (FeatureMask && RequiresCovered)) {
  232. NumMetadataCovered++;
  233. const auto *MI = &MetadataInfo::Covered;
  234. MIS.insert(MI);
  235. const StringRef Section = getSectionName(MI->SectionSuffix);
  236. // The feature mask will be placed after the size (32 bit) of the function,
  237. // so in total one covered entry will use `sizeof(void*) + 4 + 4`.
  238. Constant *CFM = IRB.getInt32(FeatureMask);
  239. F.setMetadata(LLVMContext::MD_pcsections,
  240. MDB.createPCSections({{Section, {CFM}}}));
  241. }
  242. }
  243. bool isUARSafeCall(CallInst *CI) {
  244. auto *F = CI->getCalledFunction();
  245. // There are no intrinsic functions that leak arguments.
  246. // If the called function does not return, the current function
  247. // does not return as well, so no possibility of use-after-return.
  248. // Sanitizer function also don't leak or don't return.
  249. // It's safe to both pass pointers to local variables to them
  250. // and to tail-call them.
  251. return F && (F->isIntrinsic() || F->doesNotReturn() ||
  252. F->getName().startswith("__asan_") ||
  253. F->getName().startswith("__hwsan_") ||
  254. F->getName().startswith("__ubsan_") ||
  255. F->getName().startswith("__msan_") ||
  256. F->getName().startswith("__tsan_"));
  257. }
  258. bool hasUseAfterReturnUnsafeUses(Value &V) {
  259. for (User *U : V.users()) {
  260. if (auto *I = dyn_cast<Instruction>(U)) {
  261. if (I->isLifetimeStartOrEnd() || I->isDroppable())
  262. continue;
  263. if (auto *CI = dyn_cast<CallInst>(U)) {
  264. if (isUARSafeCall(CI))
  265. continue;
  266. }
  267. if (isa<LoadInst>(U))
  268. continue;
  269. if (auto *SI = dyn_cast<StoreInst>(U)) {
  270. // If storing TO the alloca, then the address isn't taken.
  271. if (SI->getOperand(1) == &V)
  272. continue;
  273. }
  274. if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
  275. if (!hasUseAfterReturnUnsafeUses(*GEPI))
  276. continue;
  277. } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
  278. if (!hasUseAfterReturnUnsafeUses(*BCI))
  279. continue;
  280. }
  281. }
  282. return true;
  283. }
  284. return false;
  285. }
  286. bool useAfterReturnUnsafe(Instruction &I) {
  287. if (isa<AllocaInst>(I))
  288. return hasUseAfterReturnUnsafeUses(I);
  289. // Tail-called functions are not necessary intercepted
  290. // at runtime because there is no call instruction.
  291. // So conservatively mark the caller as requiring checking.
  292. else if (auto *CI = dyn_cast<CallInst>(&I))
  293. return CI->isTailCall() && !isUARSafeCall(CI);
  294. return false;
  295. }
  296. bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
  297. MDBuilder &MDB, uint32_t &FeatureMask) {
  298. SmallVector<const MetadataInfo *, 1> InstMetadata;
  299. bool RequiresCovered = false;
  300. if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
  301. if (useAfterReturnUnsafe(I))
  302. FeatureMask |= kSanitizerBinaryMetadataUAR;
  303. }
  304. if (Options.Atomics && I.mayReadOrWriteMemory()) {
  305. auto SSID = getAtomicSyncScopeID(&I);
  306. if (SSID.has_value() && *SSID != SyncScope::SingleThread) {
  307. NumMetadataAtomics++;
  308. InstMetadata.push_back(&MetadataInfo::Atomics);
  309. }
  310. RequiresCovered = true;
  311. }
  312. // Attach MD_pcsections to instruction.
  313. if (!InstMetadata.empty()) {
  314. MIS.insert(InstMetadata.begin(), InstMetadata.end());
  315. SmallVector<MDBuilder::PCSection, 1> Sections;
  316. for (const auto &MI : InstMetadata)
  317. Sections.push_back({getSectionName(MI->SectionSuffix), {}});
  318. I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
  319. }
  320. return RequiresCovered;
  321. }
  322. GlobalVariable *
  323. SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
  324. // Use ExternalWeak so that if all sections are discarded due to section
  325. // garbage collection, the linker will not report undefined symbol errors.
  326. auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
  327. GlobalVariable::ExternalWeakLinkage,
  328. /*Initializer=*/nullptr, MarkerName);
  329. Marker->setVisibility(GlobalValue::HiddenVisibility);
  330. return Marker;
  331. }
  332. StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
  333. // FIXME: Other TargetTriple (req. string pool)
  334. return SectionSuffix;
  335. }
  336. Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
  337. return "__start_" + SectionSuffix;
  338. }
  339. Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
  340. return "__stop_" + SectionSuffix;
  341. }
  342. } // namespace
  343. SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
  344. SanitizerBinaryMetadataOptions Opts)
  345. : Options(std::move(Opts)) {}
  346. PreservedAnalyses
  347. SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
  348. SanitizerBinaryMetadata Pass(M, Options);
  349. if (Pass.run())
  350. return PreservedAnalyses::none();
  351. return PreservedAnalyses::all();
  352. }