MCExternalSymbolizer.cpp 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
  9. #include "llvm/MC/MCContext.h"
  10. #include "llvm/MC/MCExpr.h"
  11. #include "llvm/MC/MCInst.h"
  12. #include "llvm/Support/raw_ostream.h"
  13. #include <cstring>
  14. using namespace llvm;
  15. namespace llvm {
  16. class Triple;
  17. }
  18. // This function tries to add a symbolic operand in place of the immediate
  19. // Value in the MCInst. The immediate Value has had any PC adjustment made by
  20. // the caller. If the instruction is a branch instruction then IsBranch is true,
  21. // else false. If the getOpInfo() function was set as part of the
  22. // setupForSymbolicDisassembly() call then that function is called to get any
  23. // symbolic information at the Address for this instruction. If that returns
  24. // non-zero then the symbolic information it returns is used to create an MCExpr
  25. // and that is added as an operand to the MCInst. If getOpInfo() returns zero
  26. // and IsBranch is true then a symbol look up for Value is done and if a symbol
  27. // is found an MCExpr is created with that, else an MCExpr with Value is
  28. // created. This function returns true if it adds an operand to the MCInst and
  29. // false otherwise.
  30. bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
  31. raw_ostream &cStream,
  32. int64_t Value,
  33. uint64_t Address,
  34. bool IsBranch,
  35. uint64_t Offset,
  36. uint64_t InstSize) {
  37. struct LLVMOpInfo1 SymbolicOp;
  38. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
  39. SymbolicOp.Value = Value;
  40. if (!GetOpInfo ||
  41. !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
  42. // Clear SymbolicOp.Value from above and also all other fields.
  43. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
  44. // At this point, GetOpInfo() did not find any relocation information about
  45. // this operand and we are left to use the SymbolLookUp() call back to guess
  46. // if the Value is the address of a symbol. In the case this is a branch
  47. // that always makes sense to guess. But in the case of an immediate it is
  48. // a bit more questionable if it is an address of a symbol or some other
  49. // reference. So if the immediate Value comes from a width of 1 byte,
  50. // InstSize, we will not guess it is an address of a symbol. Because in
  51. // object files assembled starting at address 0 this usually leads to
  52. // incorrect symbolication.
  53. if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
  54. return false;
  55. uint64_t ReferenceType;
  56. if (IsBranch)
  57. ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
  58. else
  59. ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
  60. const char *ReferenceName;
  61. const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
  62. &ReferenceName);
  63. if (Name) {
  64. SymbolicOp.AddSymbol.Name = Name;
  65. SymbolicOp.AddSymbol.Present = true;
  66. // If Name is a C++ symbol name put the human readable name in a comment.
  67. if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name)
  68. cStream << ReferenceName;
  69. }
  70. // For branches always create an MCExpr so it gets printed as hex address.
  71. else if (IsBranch) {
  72. SymbolicOp.Value = Value;
  73. }
  74. if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
  75. cStream << "symbol stub for: " << ReferenceName;
  76. else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)
  77. cStream << "Objc message: " << ReferenceName;
  78. if (!Name && !IsBranch)
  79. return false;
  80. }
  81. const MCExpr *Add = nullptr;
  82. if (SymbolicOp.AddSymbol.Present) {
  83. if (SymbolicOp.AddSymbol.Name) {
  84. StringRef Name(SymbolicOp.AddSymbol.Name);
  85. MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
  86. Add = MCSymbolRefExpr::create(Sym, Ctx);
  87. } else {
  88. Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx);
  89. }
  90. }
  91. const MCExpr *Sub = nullptr;
  92. if (SymbolicOp.SubtractSymbol.Present) {
  93. if (SymbolicOp.SubtractSymbol.Name) {
  94. StringRef Name(SymbolicOp.SubtractSymbol.Name);
  95. MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
  96. Sub = MCSymbolRefExpr::create(Sym, Ctx);
  97. } else {
  98. Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
  99. }
  100. }
  101. const MCExpr *Off = nullptr;
  102. if (SymbolicOp.Value != 0)
  103. Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
  104. const MCExpr *Expr;
  105. if (Sub) {
  106. const MCExpr *LHS;
  107. if (Add)
  108. LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
  109. else
  110. LHS = MCUnaryExpr::createMinus(Sub, Ctx);
  111. if (Off)
  112. Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
  113. else
  114. Expr = LHS;
  115. } else if (Add) {
  116. if (Off)
  117. Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
  118. else
  119. Expr = Add;
  120. } else {
  121. if (Off)
  122. Expr = Off;
  123. else
  124. Expr = MCConstantExpr::create(0, Ctx);
  125. }
  126. Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);
  127. if (!Expr)
  128. return false;
  129. MI.addOperand(MCOperand::createExpr(Expr));
  130. return true;
  131. }
  132. // This function tries to add a comment as to what is being referenced by a load
  133. // instruction with the base register that is the Pc. These can often be values
  134. // in a literal pool near the Address of the instruction. The Address of the
  135. // instruction and its immediate Value are used as a possible literal pool entry.
  136. // The SymbolLookUp call back will return the name of a symbol referenced by the
  137. // literal pool's entry if the referenced address is that of a symbol. Or it
  138. // will return a pointer to a literal 'C' string if the referenced address of
  139. // the literal pool's entry is an address into a section with C string literals.
  140. // Or if the reference is to an Objective-C data structure it will return a
  141. // specific reference type for it and a string.
  142. void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
  143. int64_t Value,
  144. uint64_t Address) {
  145. if (SymbolLookUp) {
  146. uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
  147. const char *ReferenceName;
  148. (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
  149. if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
  150. cStream << "literal pool symbol address: " << ReferenceName;
  151. else if(ReferenceType ==
  152. LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
  153. cStream << "literal pool for: \"";
  154. cStream.write_escaped(ReferenceName);
  155. cStream << "\"";
  156. }
  157. else if(ReferenceType ==
  158. LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
  159. cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
  160. else if(ReferenceType ==
  161. LLVMDisassembler_ReferenceType_Out_Objc_Message)
  162. cStream << "Objc message: " << ReferenceName;
  163. else if(ReferenceType ==
  164. LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
  165. cStream << "Objc message ref: " << ReferenceName;
  166. else if(ReferenceType ==
  167. LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
  168. cStream << "Objc selector ref: " << ReferenceName;
  169. else if(ReferenceType ==
  170. LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
  171. cStream << "Objc class ref: " << ReferenceName;
  172. }
  173. }
  174. namespace llvm {
  175. MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
  176. LLVMSymbolLookupCallback SymbolLookUp,
  177. void *DisInfo, MCContext *Ctx,
  178. std::unique_ptr<MCRelocationInfo> &&RelInfo) {
  179. assert(Ctx && "No MCContext given for symbolic disassembly");
  180. return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
  181. SymbolLookUp, DisInfo);
  182. }
  183. }