FileAnalysis.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. //===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
  9. #define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
  10. #include "llvm/ADT/DenseMap.h"
  11. #include "llvm/ADT/SmallSet.h"
  12. #include "llvm/BinaryFormat/ELF.h"
  13. #include "llvm/DebugInfo/Symbolize/Symbolize.h"
  14. #include "llvm/MC/MCAsmInfo.h"
  15. #include "llvm/MC/MCContext.h"
  16. #include "llvm/MC/MCDisassembler/MCDisassembler.h"
  17. #include "llvm/MC/MCInst.h"
  18. #include "llvm/MC/MCInstPrinter.h"
  19. #include "llvm/MC/MCInstrAnalysis.h"
  20. #include "llvm/MC/MCInstrDesc.h"
  21. #include "llvm/MC/MCInstrInfo.h"
  22. #include "llvm/MC/MCObjectFileInfo.h"
  23. #include "llvm/MC/MCRegisterInfo.h"
  24. #include "llvm/MC/MCSubtargetInfo.h"
  25. #include "llvm/MC/TargetRegistry.h"
  26. #include "llvm/Object/Binary.h"
  27. #include "llvm/Object/COFF.h"
  28. #include "llvm/Object/ELFObjectFile.h"
  29. #include "llvm/Object/ObjectFile.h"
  30. #include "llvm/Support/Casting.h"
  31. #include "llvm/Support/CommandLine.h"
  32. #include "llvm/Support/Error.h"
  33. #include "llvm/Support/MemoryBuffer.h"
  34. #include "llvm/Support/TargetSelect.h"
  35. #include "llvm/Support/raw_ostream.h"
  36. #include <functional>
  37. #include <set>
  38. #include <string>
  39. #include <unordered_map>
  40. namespace llvm {
  41. namespace cfi_verify {
  42. struct GraphResult;
  43. extern bool IgnoreDWARFFlag;
  44. enum class CFIProtectionStatus {
  45. // This instruction is protected by CFI.
  46. PROTECTED,
  47. // The instruction is not an indirect control flow instruction, and thus
  48. // shouldn't be protected.
  49. FAIL_NOT_INDIRECT_CF,
  50. // There is a path to the instruction that was unexpected.
  51. FAIL_ORPHANS,
  52. // There is a path to the instruction from a conditional branch that does not
  53. // properly check the destination for this vcall/icall.
  54. FAIL_BAD_CONDITIONAL_BRANCH,
  55. // One of the operands of the indirect CF instruction is modified between the
  56. // CFI-check and execution.
  57. FAIL_REGISTER_CLOBBERED,
  58. // The instruction referenced does not exist. This normally indicates an
  59. // error in the program, where you try and validate a graph that was created
  60. // in a different FileAnalysis object.
  61. FAIL_INVALID_INSTRUCTION,
  62. };
  63. StringRef stringCFIProtectionStatus(CFIProtectionStatus Status);
  64. // Disassembler and analysis tool for machine code files. Keeps track of non-
  65. // sequential control flows, including indirect control flow instructions.
  66. class FileAnalysis {
  67. public:
  68. // A metadata struct for an instruction.
  69. struct Instr {
  70. uint64_t VMAddress; // Virtual memory address of this instruction.
  71. MCInst Instruction; // Instruction.
  72. uint64_t InstructionSize; // Size of this instruction.
  73. bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
  74. // undefined.
  75. };
  76. // Construct a FileAnalysis from a file path.
  77. static Expected<FileAnalysis> Create(StringRef Filename);
  78. // Construct and take ownership of the supplied object. Do not use this
  79. // constructor, prefer to use FileAnalysis::Create instead.
  80. FileAnalysis(object::OwningBinary<object::Binary> Binary);
  81. FileAnalysis() = delete;
  82. FileAnalysis(const FileAnalysis &) = delete;
  83. FileAnalysis(FileAnalysis &&Other) = default;
  84. // Returns the instruction at the provided address. Returns nullptr if there
  85. // is no instruction at the provided address.
  86. const Instr *getInstruction(uint64_t Address) const;
  87. // Returns the instruction at the provided adress, dying if the instruction is
  88. // not found.
  89. const Instr &getInstructionOrDie(uint64_t Address) const;
  90. // Returns a pointer to the previous/next instruction in sequence,
  91. // respectively. Returns nullptr if the next/prev instruction doesn't exist,
  92. // or if the provided instruction doesn't exist.
  93. const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
  94. const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
  95. // Returns whether this instruction is used by CFI to trap the program.
  96. bool isCFITrap(const Instr &InstrMeta) const;
  97. // Returns whether this instruction is a call to a function that will trap on
  98. // CFI violations (i.e., it serves as a trap in this instance).
  99. bool willTrapOnCFIViolation(const Instr &InstrMeta) const;
  100. // Returns whether this function can fall through to the next instruction.
  101. // Undefined (and bad) instructions cannot fall through, and instruction that
  102. // modify the control flow can only fall through if they are conditional
  103. // branches or calls.
  104. bool canFallThrough(const Instr &InstrMeta) const;
  105. // Returns the definitive next instruction. This is different from the next
  106. // instruction sequentially as it will follow unconditional branches (assuming
  107. // they can be resolved at compile time, i.e. not indirect). This method
  108. // returns nullptr if the provided instruction does not transfer control flow
  109. // to exactly one instruction that is known deterministically at compile time.
  110. // Also returns nullptr if the deterministic target does not exist in this
  111. // file.
  112. const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;
  113. // Get a list of deterministic control flows that lead to the provided
  114. // instruction. This list includes all static control flow cross-references as
  115. // well as the previous instruction if it can fall through.
  116. std::set<const Instr *>
  117. getDirectControlFlowXRefs(const Instr &InstrMeta) const;
  118. // Returns whether this instruction uses a register operand.
  119. bool usesRegisterOperand(const Instr &InstrMeta) const;
  120. // Returns the list of indirect instructions.
  121. const std::set<object::SectionedAddress> &getIndirectInstructions() const;
  122. const MCRegisterInfo *getRegisterInfo() const;
  123. const MCInstrInfo *getMCInstrInfo() const;
  124. const MCInstrAnalysis *getMCInstrAnalysis() const;
  125. // Returns the inlining information for the provided address.
  126. Expected<DIInliningInfo>
  127. symbolizeInlinedCode(object::SectionedAddress Address);
  128. // Returns whether the provided Graph represents a protected indirect control
  129. // flow instruction in this file.
  130. CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const;
  131. // Returns the first place the operand register is clobbered between the CFI-
  132. // check and the indirect CF instruction execution. We do this by walking
  133. // backwards from the indirect CF and ensuring there is at most one load
  134. // involving the operand register (which is the indirect CF itself on x86).
  135. // If the register is not modified, returns the address of the indirect CF
  136. // instruction. The result is undefined if the provided graph does not fall
  137. // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see
  138. // CFIProtectionStatus).
  139. uint64_t indirectCFOperandClobber(const GraphResult& Graph) const;
  140. // Prints an instruction to the provided stream using this object's pretty-
  141. // printers.
  142. void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const;
  143. protected:
  144. // Construct a blank object with the provided triple and features. Used in
  145. // testing, where a sub class will dependency inject protected methods to
  146. // allow analysis of raw binary, without requiring a fully valid ELF file.
  147. FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
  148. // Add an instruction to this object.
  149. void addInstruction(const Instr &Instruction);
  150. // Disassemble and parse the provided bytes into this object. Instruction
  151. // address calculation is done relative to the provided SectionAddress.
  152. void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
  153. object::SectionedAddress Address);
  154. // Constructs and initialises members required for disassembly.
  155. Error initialiseDisassemblyMembers();
  156. // Parses code sections from the internal object file. Saves them into the
  157. // internal members. Should only be called once by Create().
  158. Error parseCodeSections();
  159. // Parses the symbol table to look for the addresses of functions that will
  160. // trap on CFI violations.
  161. Error parseSymbolTable();
  162. private:
  163. // Members that describe the input file.
  164. object::OwningBinary<object::Binary> Binary;
  165. const object::ObjectFile *Object = nullptr;
  166. Triple ObjectTriple;
  167. std::string ArchName;
  168. std::string MCPU;
  169. const Target *ObjectTarget = nullptr;
  170. SubtargetFeatures Features;
  171. // Members required for disassembly.
  172. std::unique_ptr<const MCRegisterInfo> RegisterInfo;
  173. std::unique_ptr<const MCAsmInfo> AsmInfo;
  174. std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
  175. std::unique_ptr<const MCInstrInfo> MII;
  176. std::unique_ptr<MCContext> Context;
  177. std::unique_ptr<const MCDisassembler> Disassembler;
  178. std::unique_ptr<const MCInstrAnalysis> MIA;
  179. std::unique_ptr<MCInstPrinter> Printer;
  180. // Symbolizer used for debug information parsing.
  181. std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
  182. // A mapping between the virtual memory address to the instruction metadata
  183. // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
  184. // insertion allocation.
  185. std::map<uint64_t, Instr> Instructions;
  186. // Contains a mapping between a specific address, and a list of instructions
  187. // that use this address as a branch target (including call instructions).
  188. DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
  189. // A list of addresses of indirect control flow instructions.
  190. std::set<object::SectionedAddress> IndirectInstructions;
  191. // The addresses of functions that will trap on CFI violations.
  192. SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses;
  193. };
  194. class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
  195. public:
  196. static char ID;
  197. std::string Text;
  198. UnsupportedDisassembly(StringRef Text);
  199. void log(raw_ostream &OS) const override;
  200. std::error_code convertToErrorCode() const override;
  201. };
  202. } // namespace cfi_verify
  203. } // namespace llvm
  204. #endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H