MCExternalSymbolizer.cpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. //===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
  9. #include "llvm/MC/MCContext.h"
  10. #include "llvm/MC/MCExpr.h"
  11. #include "llvm/MC/MCInst.h"
  12. #include "llvm/Support/raw_ostream.h"
  13. #include <cstring>
  14. using namespace llvm;
  15. namespace llvm {
  16. class Triple;
  17. }
  18. // This function tries to add a symbolic operand in place of the immediate
  19. // Value in the MCInst. The immediate Value has had any PC adjustment made by
  20. // the caller. If the instruction is a branch instruction then IsBranch is true,
  21. // else false. If the getOpInfo() function was set as part of the
  22. // setupForSymbolicDisassembly() call then that function is called to get any
  23. // symbolic information at the Address for this instruction. If that returns
  24. // non-zero then the symbolic information it returns is used to create an MCExpr
  25. // and that is added as an operand to the MCInst. If getOpInfo() returns zero
  26. // and IsBranch is true then a symbol look up for Value is done and if a symbol
  27. // is found an MCExpr is created with that, else an MCExpr with Value is
  28. // created. This function returns true if it adds an operand to the MCInst and
  29. // false otherwise.
  30. bool MCExternalSymbolizer::tryAddingSymbolicOperand(
  31. MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address,
  32. bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
  33. struct LLVMOpInfo1 SymbolicOp;
  34. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
  35. SymbolicOp.Value = Value;
  36. if (!GetOpInfo ||
  37. !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) {
  38. // Clear SymbolicOp.Value from above and also all other fields.
  39. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
  40. // At this point, GetOpInfo() did not find any relocation information about
  41. // this operand and we are left to use the SymbolLookUp() call back to guess
  42. // if the Value is the address of a symbol. In the case this is a branch
  43. // that always makes sense to guess. But in the case of an immediate it is
  44. // a bit more questionable if it is an address of a symbol or some other
  45. // reference. So if the immediate Value comes from a width of 1 byte,
  46. // OpSize, we will not guess it is an address of a symbol. Because in
  47. // object files assembled starting at address 0 this usually leads to
  48. // incorrect symbolication.
  49. if (!SymbolLookUp || (OpSize == 1 && !IsBranch))
  50. return false;
  51. uint64_t ReferenceType;
  52. if (IsBranch)
  53. ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
  54. else
  55. ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
  56. const char *ReferenceName;
  57. const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
  58. &ReferenceName);
  59. if (Name) {
  60. SymbolicOp.AddSymbol.Name = Name;
  61. SymbolicOp.AddSymbol.Present = true;
  62. // If Name is a C++ symbol name put the human readable name in a comment.
  63. if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name)
  64. cStream << ReferenceName;
  65. }
  66. // For branches always create an MCExpr so it gets printed as hex address.
  67. else if (IsBranch) {
  68. SymbolicOp.Value = Value;
  69. }
  70. if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
  71. cStream << "symbol stub for: " << ReferenceName;
  72. else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)
  73. cStream << "Objc message: " << ReferenceName;
  74. if (!Name && !IsBranch)
  75. return false;
  76. }
  77. const MCExpr *Add = nullptr;
  78. if (SymbolicOp.AddSymbol.Present) {
  79. if (SymbolicOp.AddSymbol.Name) {
  80. StringRef Name(SymbolicOp.AddSymbol.Name);
  81. MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
  82. Add = MCSymbolRefExpr::create(Sym, Ctx);
  83. } else {
  84. Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx);
  85. }
  86. }
  87. const MCExpr *Sub = nullptr;
  88. if (SymbolicOp.SubtractSymbol.Present) {
  89. if (SymbolicOp.SubtractSymbol.Name) {
  90. StringRef Name(SymbolicOp.SubtractSymbol.Name);
  91. MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
  92. Sub = MCSymbolRefExpr::create(Sym, Ctx);
  93. } else {
  94. Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
  95. }
  96. }
  97. const MCExpr *Off = nullptr;
  98. if (SymbolicOp.Value != 0)
  99. Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
  100. const MCExpr *Expr;
  101. if (Sub) {
  102. const MCExpr *LHS;
  103. if (Add)
  104. LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
  105. else
  106. LHS = MCUnaryExpr::createMinus(Sub, Ctx);
  107. if (Off)
  108. Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
  109. else
  110. Expr = LHS;
  111. } else if (Add) {
  112. if (Off)
  113. Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
  114. else
  115. Expr = Add;
  116. } else {
  117. if (Off)
  118. Expr = Off;
  119. else
  120. Expr = MCConstantExpr::create(0, Ctx);
  121. }
  122. Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);
  123. if (!Expr)
  124. return false;
  125. MI.addOperand(MCOperand::createExpr(Expr));
  126. return true;
  127. }
  128. // This function tries to add a comment as to what is being referenced by a load
  129. // instruction with the base register that is the Pc. These can often be values
  130. // in a literal pool near the Address of the instruction. The Address of the
  131. // instruction and its immediate Value are used as a possible literal pool entry.
  132. // The SymbolLookUp call back will return the name of a symbol referenced by the
  133. // literal pool's entry if the referenced address is that of a symbol. Or it
  134. // will return a pointer to a literal 'C' string if the referenced address of
  135. // the literal pool's entry is an address into a section with C string literals.
  136. // Or if the reference is to an Objective-C data structure it will return a
  137. // specific reference type for it and a string.
  138. void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
  139. int64_t Value,
  140. uint64_t Address) {
  141. if (SymbolLookUp) {
  142. uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
  143. const char *ReferenceName;
  144. (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
  145. if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
  146. cStream << "literal pool symbol address: " << ReferenceName;
  147. else if(ReferenceType ==
  148. LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
  149. cStream << "literal pool for: \"";
  150. cStream.write_escaped(ReferenceName);
  151. cStream << "\"";
  152. }
  153. else if(ReferenceType ==
  154. LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
  155. cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
  156. else if(ReferenceType ==
  157. LLVMDisassembler_ReferenceType_Out_Objc_Message)
  158. cStream << "Objc message: " << ReferenceName;
  159. else if(ReferenceType ==
  160. LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
  161. cStream << "Objc message ref: " << ReferenceName;
  162. else if(ReferenceType ==
  163. LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
  164. cStream << "Objc selector ref: " << ReferenceName;
  165. else if(ReferenceType ==
  166. LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
  167. cStream << "Objc class ref: " << ReferenceName;
  168. }
  169. }
  170. namespace llvm {
  171. MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
  172. LLVMSymbolLookupCallback SymbolLookUp,
  173. void *DisInfo, MCContext *Ctx,
  174. std::unique_ptr<MCRelocationInfo> &&RelInfo) {
  175. assert(Ctx && "No MCContext given for symbolic disassembly");
  176. return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
  177. SymbolLookUp, DisInfo);
  178. }
  179. }