123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408 |
- //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file is a part of SanitizerBinaryMetadata.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
- #include "llvm/ADT/SetVector.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/ADT/StringRef.h"
- #include "llvm/ADT/Triple.h"
- #include "llvm/ADT/Twine.h"
- #include "llvm/IR/Constant.h"
- #include "llvm/IR/DerivedTypes.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/GlobalValue.h"
- #include "llvm/IR/GlobalVariable.h"
- #include "llvm/IR/IRBuilder.h"
- #include "llvm/IR/Instruction.h"
- #include "llvm/IR/Instructions.h"
- #include "llvm/IR/LLVMContext.h"
- #include "llvm/IR/MDBuilder.h"
- #include "llvm/IR/Metadata.h"
- #include "llvm/IR/Module.h"
- #include "llvm/IR/Type.h"
- #include "llvm/IR/Value.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Pass.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Transforms/Instrumentation.h"
- #include "llvm/Transforms/Utils/ModuleUtils.h"
- #include <array>
- #include <cstdint>
- using namespace llvm;
- #define DEBUG_TYPE "sanmd"
- namespace {
- //===--- Constants --------------------------------------------------------===//
- constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits
- constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
- constexpr int kCtorDtorPriority = 2;
- // Pairs of names of initialization callback functions and which section
- // contains the relevant metadata.
- class MetadataInfo {
- public:
- const StringRef FunctionPrefix;
- const StringRef SectionSuffix;
- const uint32_t FeatureMask;
- static const MetadataInfo Covered;
- static const MetadataInfo Atomics;
- private:
- // Forbid construction elsewhere.
- explicit constexpr MetadataInfo(StringRef FunctionPrefix,
- StringRef SectionSuffix, uint32_t Feature)
- : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix),
- FeatureMask(Feature) {}
- };
- const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered",
- kSanitizerBinaryMetadataCoveredSection,
- kSanitizerBinaryMetadataNone};
- const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics",
- kSanitizerBinaryMetadataAtomicsSection,
- kSanitizerBinaryMetadataAtomics};
- // The only instances of MetadataInfo are the constants above, so a set of
- // them may simply store pointers to them. To deterministically generate code,
- // we need to use a set with stable iteration order, such as SetVector.
- using MetadataInfoSet = SetVector<const MetadataInfo *>;
- //===--- Command-line options ---------------------------------------------===//
- cl::opt<bool> ClWeakCallbacks(
- "sanitizer-metadata-weak-callbacks",
- cl::desc("Declare callbacks extern weak, and only call if non-null."),
- cl::Hidden, cl::init(true));
- cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
- cl::desc("Emit PCs for covered functions."),
- cl::Hidden, cl::init(false));
- cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
- cl::desc("Emit PCs for atomic operations."),
- cl::Hidden, cl::init(false));
- cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
- cl::desc("Emit PCs for start of functions that are "
- "subject for use-after-return checking"),
- cl::Hidden, cl::init(false));
- //===--- Statistics -------------------------------------------------------===//
- STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
- STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
- STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
- //===----------------------------------------------------------------------===//
- // Apply opt overrides.
- SanitizerBinaryMetadataOptions &&
- transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
- Opts.Covered |= ClEmitCovered;
- Opts.Atomics |= ClEmitAtomics;
- Opts.UAR |= ClEmitUAR;
- return std::move(Opts);
- }
- class SanitizerBinaryMetadata {
- public:
- SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts)
- : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
- TargetTriple(M.getTargetTriple()), IRB(M.getContext()) {
- // FIXME: Make it work with other formats.
- assert(TargetTriple.isOSBinFormatELF() && "ELF only");
- }
- bool run();
- private:
- // Return enabled feature mask of per-instruction metadata.
- uint32_t getEnabledPerInstructionFeature() const {
- uint32_t FeatureMask = 0;
- if (Options.Atomics)
- FeatureMask |= MetadataInfo::Atomics.FeatureMask;
- return FeatureMask;
- }
- uint32_t getVersion() const {
- uint32_t Version = kVersionBase;
- const auto CM = Mod.getCodeModel();
- if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
- Version |= kVersionPtrSizeRel;
- return Version;
- }
- void runOn(Function &F, MetadataInfoSet &MIS);
- // Determines which set of metadata to collect for this instruction.
- //
- // Returns true if covered metadata is required to unambiguously interpret
- // other metadata. For example, if we are interested in atomics metadata, any
- // function with memory operations (atomic or not) requires covered metadata
- // to determine if a memory operation is atomic or not in modules compiled
- // with SanitizerBinaryMetadata.
- bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
- uint32_t &FeatureMask);
- // Get start/end section marker pointer.
- GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
- // Returns the target-dependent section name.
- StringRef getSectionName(StringRef SectionSuffix);
- // Returns the section start marker name.
- Twine getSectionStart(StringRef SectionSuffix);
- // Returns the section end marker name.
- Twine getSectionEnd(StringRef SectionSuffix);
- Module &Mod;
- const SanitizerBinaryMetadataOptions Options;
- const Triple TargetTriple;
- IRBuilder<> IRB;
- };
- bool SanitizerBinaryMetadata::run() {
- MetadataInfoSet MIS;
- for (Function &F : Mod)
- runOn(F, MIS);
- if (MIS.empty())
- return false;
- //
- // Setup constructors and call all initialization functions for requested
- // metadata features.
- //
- auto *Int8PtrTy = IRB.getInt8PtrTy();
- auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy);
- auto *Int32Ty = IRB.getInt32Ty();
- const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy};
- auto *Version = ConstantInt::get(Int32Ty, getVersion());
- for (const MetadataInfo *MI : MIS) {
- const std::array<Value *, InitTypes.size()> InitArgs = {
- Version,
- getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy),
- getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy),
- };
- // We declare the _add and _del functions as weak, and only call them if
- // there is a valid symbol linked. This allows building binaries with
- // semantic metadata, but without having callbacks. When a tool that wants
- // the metadata is linked which provides the callbacks, they will be called.
- Function *Ctor =
- createSanitizerCtorAndInitFunctions(
- Mod, (MI->FunctionPrefix + ".module_ctor").str(),
- (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
- /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
- .first;
- Function *Dtor =
- createSanitizerCtorAndInitFunctions(
- Mod, (MI->FunctionPrefix + ".module_dtor").str(),
- (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
- /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
- .first;
- Constant *CtorData = nullptr;
- Constant *DtorData = nullptr;
- if (TargetTriple.supportsCOMDAT()) {
- // Use COMDAT to deduplicate constructor/destructor function.
- Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
- Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
- CtorData = Ctor;
- DtorData = Dtor;
- }
- appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData);
- appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData);
- }
- return true;
- }
- void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
- if (F.empty())
- return;
- if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
- return;
- // Don't touch available_externally functions, their actual body is elsewhere.
- if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
- return;
- MDBuilder MDB(F.getContext());
- // The metadata features enabled for this function, stored along covered
- // metadata (if enabled).
- uint32_t FeatureMask = getEnabledPerInstructionFeature();
- // Don't emit unnecessary covered metadata for all functions to save space.
- bool RequiresCovered = false;
- // We can only understand if we need to set UAR feature after looking
- // at the instructions. So we need to check instructions even if FeatureMask
- // is empty.
- if (FeatureMask || Options.UAR) {
- for (BasicBlock &BB : F)
- for (Instruction &I : BB)
- RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
- }
- if (F.isVarArg())
- FeatureMask &= ~kSanitizerBinaryMetadataUAR;
- if (FeatureMask & kSanitizerBinaryMetadataUAR) {
- RequiresCovered = true;
- NumMetadataUAR++;
- }
- // Covered metadata is always emitted if explicitly requested, otherwise only
- // if some other metadata requires it to unambiguously interpret it for
- // modules compiled with SanitizerBinaryMetadata.
- if (Options.Covered || (FeatureMask && RequiresCovered)) {
- NumMetadataCovered++;
- const auto *MI = &MetadataInfo::Covered;
- MIS.insert(MI);
- const StringRef Section = getSectionName(MI->SectionSuffix);
- // The feature mask will be placed after the size (32 bit) of the function,
- // so in total one covered entry will use `sizeof(void*) + 4 + 4`.
- Constant *CFM = IRB.getInt32(FeatureMask);
- F.setMetadata(LLVMContext::MD_pcsections,
- MDB.createPCSections({{Section, {CFM}}}));
- }
- }
- bool isUARSafeCall(CallInst *CI) {
- auto *F = CI->getCalledFunction();
- // There are no intrinsic functions that leak arguments.
- // If the called function does not return, the current function
- // does not return as well, so no possibility of use-after-return.
- // Sanitizer function also don't leak or don't return.
- // It's safe to both pass pointers to local variables to them
- // and to tail-call them.
- return F && (F->isIntrinsic() || F->doesNotReturn() ||
- F->getName().startswith("__asan_") ||
- F->getName().startswith("__hwsan_") ||
- F->getName().startswith("__ubsan_") ||
- F->getName().startswith("__msan_") ||
- F->getName().startswith("__tsan_"));
- }
- bool hasUseAfterReturnUnsafeUses(Value &V) {
- for (User *U : V.users()) {
- if (auto *I = dyn_cast<Instruction>(U)) {
- if (I->isLifetimeStartOrEnd() || I->isDroppable())
- continue;
- if (auto *CI = dyn_cast<CallInst>(U)) {
- if (isUARSafeCall(CI))
- continue;
- }
- if (isa<LoadInst>(U))
- continue;
- if (auto *SI = dyn_cast<StoreInst>(U)) {
- // If storing TO the alloca, then the address isn't taken.
- if (SI->getOperand(1) == &V)
- continue;
- }
- if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (!hasUseAfterReturnUnsafeUses(*GEPI))
- continue;
- } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
- if (!hasUseAfterReturnUnsafeUses(*BCI))
- continue;
- }
- }
- return true;
- }
- return false;
- }
- bool useAfterReturnUnsafe(Instruction &I) {
- if (isa<AllocaInst>(I))
- return hasUseAfterReturnUnsafeUses(I);
- // Tail-called functions are not necessary intercepted
- // at runtime because there is no call instruction.
- // So conservatively mark the caller as requiring checking.
- else if (auto *CI = dyn_cast<CallInst>(&I))
- return CI->isTailCall() && !isUARSafeCall(CI);
- return false;
- }
- bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
- MDBuilder &MDB, uint32_t &FeatureMask) {
- SmallVector<const MetadataInfo *, 1> InstMetadata;
- bool RequiresCovered = false;
- if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
- if (useAfterReturnUnsafe(I))
- FeatureMask |= kSanitizerBinaryMetadataUAR;
- }
- if (Options.Atomics && I.mayReadOrWriteMemory()) {
- auto SSID = getAtomicSyncScopeID(&I);
- if (SSID.has_value() && *SSID != SyncScope::SingleThread) {
- NumMetadataAtomics++;
- InstMetadata.push_back(&MetadataInfo::Atomics);
- }
- RequiresCovered = true;
- }
- // Attach MD_pcsections to instruction.
- if (!InstMetadata.empty()) {
- MIS.insert(InstMetadata.begin(), InstMetadata.end());
- SmallVector<MDBuilder::PCSection, 1> Sections;
- for (const auto &MI : InstMetadata)
- Sections.push_back({getSectionName(MI->SectionSuffix), {}});
- I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
- }
- return RequiresCovered;
- }
- GlobalVariable *
- SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
- // Use ExternalWeak so that if all sections are discarded due to section
- // garbage collection, the linker will not report undefined symbol errors.
- auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
- GlobalVariable::ExternalWeakLinkage,
- /*Initializer=*/nullptr, MarkerName);
- Marker->setVisibility(GlobalValue::HiddenVisibility);
- return Marker;
- }
- StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
- // FIXME: Other TargetTriple (req. string pool)
- return SectionSuffix;
- }
- Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
- return "__start_" + SectionSuffix;
- }
- Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
- return "__stop_" + SectionSuffix;
- }
- } // namespace
- SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
- SanitizerBinaryMetadataOptions Opts)
- : Options(std::move(Opts)) {}
- PreservedAnalyses
- SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
- SanitizerBinaryMetadata Pass(M, Options);
- if (Pass.run())
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
- }
|