MCDisassembler.h 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  14. #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  15. #include "llvm/ADT/StringRef.h"
  16. #include "llvm/BinaryFormat/XCOFF.h"
  17. #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
  18. #include <cstdint>
  19. #include <memory>
  20. #include <vector>
  21. namespace llvm {
  22. struct XCOFFSymbolInfoTy {
  23. std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
  24. std::optional<uint32_t> Index;
  25. bool IsLabel = false;
  26. bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
  27. };
  28. struct SymbolInfoTy {
  29. uint64_t Addr;
  30. StringRef Name;
  31. // XCOFF uses XCOFFSymInfo. Other targets use Type.
  32. XCOFFSymbolInfoTy XCOFFSymInfo;
  33. uint8_t Type;
  34. private:
  35. bool IsXCOFF;
  36. bool HasType;
  37. public:
  38. SymbolInfoTy(uint64_t Addr, StringRef Name,
  39. std::optional<XCOFF::StorageMappingClass> Smc,
  40. std::optional<uint32_t> Idx, bool Label)
  41. : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
  42. IsXCOFF(true), HasType(false) {}
  43. SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
  44. bool IsXCOFF = false)
  45. : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {}
  46. bool isXCOFF() const { return IsXCOFF; }
  47. private:
  48. friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
  49. assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
  50. "The value of IsXCOFF and HasType in P1 and P2 should be the same "
  51. "respectively.");
  52. if (P1.IsXCOFF && P1.HasType)
  53. return std::tie(P1.Addr, P1.Type, P1.Name) <
  54. std::tie(P2.Addr, P2.Type, P2.Name);
  55. if (P1.IsXCOFF)
  56. return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
  57. std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
  58. return std::tie(P1.Addr, P1.Name, P1.Type) <
  59. std::tie(P2.Addr, P2.Name, P2.Type);
  60. }
  61. };
  62. using SectionSymbolsTy = std::vector<SymbolInfoTy>;
  63. template <typename T> class ArrayRef;
  64. class MCContext;
  65. class MCInst;
  66. class MCSubtargetInfo;
  67. class raw_ostream;
  68. /// Superclass for all disassemblers. Consumes a memory region and provides an
  69. /// array of assembly instructions.
  70. class MCDisassembler {
  71. public:
  72. /// Ternary decode status. Most backends will just use Fail and
  73. /// Success, however some have a concept of an instruction with
  74. /// understandable semantics but which is architecturally
  75. /// incorrect. An example of this is ARM UNPREDICTABLE instructions
  76. /// which are disassemblable but cause undefined behaviour.
  77. ///
  78. /// Because it makes sense to disassemble these instructions, there
  79. /// is a "soft fail" failure mode that indicates the MCInst& is
  80. /// valid but architecturally incorrect.
  81. ///
  82. /// The enum numbers are deliberately chosen such that reduction
  83. /// from Success->SoftFail ->Fail can be done with a simple
  84. /// bitwise-AND:
  85. ///
  86. /// LEFT & TOP = | Success Unpredictable Fail
  87. /// --------------+-----------------------------------
  88. /// Success | Success Unpredictable Fail
  89. /// Unpredictable | Unpredictable Unpredictable Fail
  90. /// Fail | Fail Fail Fail
  91. ///
  92. /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
  93. /// Success, SoftFail, Fail respectively.
  94. enum DecodeStatus {
  95. Fail = 0,
  96. SoftFail = 1,
  97. Success = 3
  98. };
  99. MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
  100. : Ctx(Ctx), STI(STI) {}
  101. virtual ~MCDisassembler();
  102. /// Returns the disassembly of a single instruction.
  103. ///
  104. /// \param Instr - An MCInst to populate with the contents of the
  105. /// instruction.
  106. /// \param Size - A value to populate with the size of the instruction, or
  107. /// the number of bytes consumed while attempting to decode
  108. /// an invalid instruction.
  109. /// \param Address - The address, in the memory space of region, of the first
  110. /// byte of the instruction.
  111. /// \param Bytes - A reference to the actual bytes of the instruction.
  112. /// \param CStream - The stream to print comments and annotations on.
  113. /// \return - MCDisassembler::Success if the instruction is valid,
  114. /// MCDisassembler::SoftFail if the instruction was
  115. /// disassemblable but invalid,
  116. /// MCDisassembler::Fail if the instruction was invalid.
  117. virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
  118. ArrayRef<uint8_t> Bytes, uint64_t Address,
  119. raw_ostream &CStream) const = 0;
  120. /// Used to perform separate target specific disassembly for a particular
  121. /// symbol. May parse any prelude that precedes instructions after the
  122. /// start of a symbol, or the entire symbol.
  123. /// This is used for example by WebAssembly to decode preludes.
  124. ///
  125. /// Base implementation returns std::nullopt. So all targets by default ignore
  126. /// to treat symbols separately.
  127. ///
  128. /// \param Symbol - The symbol.
  129. /// \param Size - The number of bytes consumed.
  130. /// \param Address - The address, in the memory space of region, of the first
  131. /// byte of the symbol.
  132. /// \param Bytes - A reference to the actual bytes at the symbol location.
  133. /// \param CStream - The stream to print comments and annotations on.
  134. /// \return - MCDisassembler::Success if bytes are decoded
  135. /// successfully. Size must hold the number of bytes that
  136. /// were decoded.
  137. /// - MCDisassembler::Fail if the bytes are invalid. Size
  138. /// must hold the number of bytes that were decoded before
  139. /// failing. The target must print nothing. This can be
  140. /// done by buffering the output if needed.
  141. /// - std::nullopt if the target doesn't want to handle the
  142. /// symbol separately. Value of Size is ignored in this
  143. /// case.
  144. virtual std::optional<DecodeStatus>
  145. onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
  146. uint64_t Address, raw_ostream &CStream) const;
  147. // TODO:
  148. // Implement similar hooks that can be used at other points during
  149. // disassembly. Something along the following lines:
  150. // - onBeforeInstructionDecode()
  151. // - onAfterInstructionDecode()
  152. // - onSymbolEnd()
  153. // It should help move much of the target specific code from llvm-objdump to
  154. // respective target disassemblers.
  155. /// Suggest a distance to skip in a buffer of data to find the next
  156. /// place to look for the start of an instruction. For example, if
  157. /// all instructions have a fixed alignment, this might advance to
  158. /// the next multiple of that alignment.
  159. ///
  160. /// If not overridden, the default is 1.
  161. ///
  162. /// \param Address - The address, in the memory space of region, of the
  163. /// starting point (typically the first byte of something
  164. /// that did not decode as a valid instruction at all).
  165. /// \param Bytes - A reference to the actual bytes at Address. May be
  166. /// needed in order to determine the width of an
  167. /// unrecognized instruction (e.g. in Thumb this is a simple
  168. /// consistent criterion that doesn't require knowing the
  169. /// specific instruction). The caller can pass as much data
  170. /// as they have available, and the function is required to
  171. /// make a reasonable default choice if not enough data is
  172. /// available to make a better one.
  173. /// \return - A number of bytes to skip. Must always be greater than
  174. /// zero. May be greater than the size of Bytes.
  175. virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
  176. uint64_t Address) const;
  177. private:
  178. MCContext &Ctx;
  179. protected:
  180. // Subtarget information, for instruction decoding predicates if required.
  181. const MCSubtargetInfo &STI;
  182. std::unique_ptr<MCSymbolizer> Symbolizer;
  183. public:
  184. // Helpers around MCSymbolizer
  185. bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
  186. bool IsBranch, uint64_t Offset, uint64_t OpSize,
  187. uint64_t InstSize) const;
  188. void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
  189. /// Set \p Symzer as the current symbolizer.
  190. /// This takes ownership of \p Symzer, and deletes the previously set one.
  191. void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
  192. MCContext& getContext() const { return Ctx; }
  193. const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
  194. // Marked mutable because we cache it inside the disassembler, rather than
  195. // having to pass it around as an argument through all the autogenerated code.
  196. mutable raw_ostream *CommentStream = nullptr;
  197. };
  198. } // end namespace llvm
  199. #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  200. #ifdef __GNUC__
  201. #pragma GCC diagnostic pop
  202. #endif