MCDisassembler.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  14. #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  15. #include "llvm/ADT/Optional.h"
  16. #include "llvm/ADT/StringRef.h"
  17. #include "llvm/BinaryFormat/XCOFF.h"
  18. #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
  19. #include <cstdint>
  20. #include <memory>
  21. #include <vector>
  22. namespace llvm {
  23. struct XCOFFSymbolInfo {
  24. Optional<XCOFF::StorageMappingClass> StorageMappingClass;
  25. Optional<uint32_t> Index;
  26. bool IsLabel;
  27. XCOFFSymbolInfo(Optional<XCOFF::StorageMappingClass> Smc,
  28. Optional<uint32_t> Idx, bool Label)
  29. : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {}
  30. bool operator<(const XCOFFSymbolInfo &SymInfo) const;
  31. };
  32. struct SymbolInfoTy {
  33. uint64_t Addr;
  34. StringRef Name;
  35. union {
  36. uint8_t Type;
  37. XCOFFSymbolInfo XCOFFSymInfo;
  38. };
  39. private:
  40. bool IsXCOFF;
  41. public:
  42. SymbolInfoTy(uint64_t Addr, StringRef Name,
  43. Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx,
  44. bool Label)
  45. : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {}
  46. SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type)
  47. : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {}
  48. bool isXCOFF() const { return IsXCOFF; }
  49. private:
  50. friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
  51. assert(P1.IsXCOFF == P2.IsXCOFF &&
  52. "P1.IsXCOFF should be equal to P2.IsXCOFF.");
  53. if (P1.IsXCOFF)
  54. return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
  55. std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
  56. return std::tie(P1.Addr, P1.Name, P1.Type) <
  57. std::tie(P2.Addr, P2.Name, P2.Type);
  58. }
  59. };
  60. using SectionSymbolsTy = std::vector<SymbolInfoTy>;
  61. template <typename T> class ArrayRef;
  62. class MCContext;
  63. class MCInst;
  64. class MCSubtargetInfo;
  65. class raw_ostream;
  66. /// Superclass for all disassemblers. Consumes a memory region and provides an
  67. /// array of assembly instructions.
  68. class MCDisassembler {
  69. public:
  70. /// Ternary decode status. Most backends will just use Fail and
  71. /// Success, however some have a concept of an instruction with
  72. /// understandable semantics but which is architecturally
  73. /// incorrect. An example of this is ARM UNPREDICTABLE instructions
  74. /// which are disassemblable but cause undefined behaviour.
  75. ///
  76. /// Because it makes sense to disassemble these instructions, there
  77. /// is a "soft fail" failure mode that indicates the MCInst& is
  78. /// valid but architecturally incorrect.
  79. ///
  80. /// The enum numbers are deliberately chosen such that reduction
  81. /// from Success->SoftFail ->Fail can be done with a simple
  82. /// bitwise-AND:
  83. ///
  84. /// LEFT & TOP = | Success Unpredictable Fail
  85. /// --------------+-----------------------------------
  86. /// Success | Success Unpredictable Fail
  87. /// Unpredictable | Unpredictable Unpredictable Fail
  88. /// Fail | Fail Fail Fail
  89. ///
  90. /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
  91. /// Success, SoftFail, Fail respectively.
  92. enum DecodeStatus {
  93. Fail = 0,
  94. SoftFail = 1,
  95. Success = 3
  96. };
  97. MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
  98. : Ctx(Ctx), STI(STI) {}
  99. virtual ~MCDisassembler();
  100. /// Returns the disassembly of a single instruction.
  101. ///
  102. /// \param Instr - An MCInst to populate with the contents of the
  103. /// instruction.
  104. /// \param Size - A value to populate with the size of the instruction, or
  105. /// the number of bytes consumed while attempting to decode
  106. /// an invalid instruction.
  107. /// \param Address - The address, in the memory space of region, of the first
  108. /// byte of the instruction.
  109. /// \param Bytes - A reference to the actual bytes of the instruction.
  110. /// \param CStream - The stream to print comments and annotations on.
  111. /// \return - MCDisassembler::Success if the instruction is valid,
  112. /// MCDisassembler::SoftFail if the instruction was
  113. /// disassemblable but invalid,
  114. /// MCDisassembler::Fail if the instruction was invalid.
  115. virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
  116. ArrayRef<uint8_t> Bytes, uint64_t Address,
  117. raw_ostream &CStream) const = 0;
  118. /// Used to perform separate target specific disassembly for a particular
  119. /// symbol. May parse any prelude that precedes instructions after the
  120. /// start of a symbol, or the entire symbol.
  121. /// This is used for example by WebAssembly to decode preludes.
  122. ///
  123. /// Base implementation returns None. So all targets by default ignore to
  124. /// treat symbols separately.
  125. ///
  126. /// \param Symbol - The symbol.
  127. /// \param Size - The number of bytes consumed.
  128. /// \param Address - The address, in the memory space of region, of the first
  129. /// byte of the symbol.
  130. /// \param Bytes - A reference to the actual bytes at the symbol location.
  131. /// \param CStream - The stream to print comments and annotations on.
  132. /// \return - MCDisassembler::Success if bytes are decoded
  133. /// successfully. Size must hold the number of bytes that
  134. /// were decoded.
  135. /// - MCDisassembler::Fail if the bytes are invalid. Size
  136. /// must hold the number of bytes that were decoded before
  137. /// failing. The target must print nothing. This can be
  138. /// done by buffering the output if needed.
  139. /// - None if the target doesn't want to handle the symbol
  140. /// separately. Value of Size is ignored in this case.
  141. virtual Optional<DecodeStatus>
  142. onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
  143. uint64_t Address, raw_ostream &CStream) const;
  144. // TODO:
  145. // Implement similar hooks that can be used at other points during
  146. // disassembly. Something along the following lines:
  147. // - onBeforeInstructionDecode()
  148. // - onAfterInstructionDecode()
  149. // - onSymbolEnd()
  150. // It should help move much of the target specific code from llvm-objdump to
  151. // respective target disassemblers.
  152. private:
  153. MCContext &Ctx;
  154. protected:
  155. // Subtarget information, for instruction decoding predicates if required.
  156. const MCSubtargetInfo &STI;
  157. std::unique_ptr<MCSymbolizer> Symbolizer;
  158. public:
  159. // Helpers around MCSymbolizer
  160. bool tryAddingSymbolicOperand(MCInst &Inst,
  161. int64_t Value,
  162. uint64_t Address, bool IsBranch,
  163. uint64_t Offset, uint64_t InstSize) const;
  164. void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
  165. /// Set \p Symzer as the current symbolizer.
  166. /// This takes ownership of \p Symzer, and deletes the previously set one.
  167. void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
  168. MCContext& getContext() const { return Ctx; }
  169. const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
  170. // Marked mutable because we cache it inside the disassembler, rather than
  171. // having to pass it around as an argument through all the autogenerated code.
  172. mutable raw_ostream *CommentStream = nullptr;
  173. };
  174. } // end namespace llvm
  175. #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  176. #ifdef __GNUC__
  177. #pragma GCC diagnostic pop
  178. #endif