macho2yaml.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "obj2yaml.h"
  9. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  10. #include "llvm/Object/MachOUniversal.h"
  11. #include "llvm/ObjectYAML/DWARFYAML.h"
  12. #include "llvm/ObjectYAML/ObjectYAML.h"
  13. #include "llvm/Support/Error.h"
  14. #include "llvm/Support/ErrorHandling.h"
  15. #include "llvm/Support/LEB128.h"
  16. #include <string.h> // for memcpy
  17. using namespace llvm;
  18. class MachODumper {
  19. template <typename StructType>
  20. Expected<const char *> processLoadCommandData(
  21. MachOYAML::LoadCommand &LC,
  22. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  23. MachOYAML::Object &Y);
  24. const object::MachOObjectFile &Obj;
  25. std::unique_ptr<DWARFContext> DWARFCtx;
  26. void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
  27. Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
  28. void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
  29. void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
  30. void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
  31. ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
  32. void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
  33. void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
  34. template <typename SectionType>
  35. Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
  36. size_t SecIndex);
  37. template <typename SectionType>
  38. Expected<MachOYAML::Section> constructSection(SectionType Sec,
  39. size_t SecIndex);
  40. template <typename SectionType, typename SegmentType>
  41. Expected<const char *>
  42. extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  43. std::vector<MachOYAML::Section> &Sections,
  44. MachOYAML::Object &Y);
  45. public:
  46. MachODumper(const object::MachOObjectFile &O,
  47. std::unique_ptr<DWARFContext> DCtx)
  48. : Obj(O), DWARFCtx(std::move(DCtx)) {}
  49. Expected<std::unique_ptr<MachOYAML::Object>> dump();
  50. };
  51. #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
  52. case MachO::LCName: \
  53. memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \
  54. sizeof(MachO::LCStruct)); \
  55. if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
  56. MachO::swapStruct(LC.Data.LCStruct##_data); \
  57. if (Expected<const char *> ExpectedEndPtr = \
  58. processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \
  59. EndPtr = *ExpectedEndPtr; \
  60. else \
  61. return ExpectedEndPtr.takeError(); \
  62. break;
  63. template <typename SectionType>
  64. Expected<MachOYAML::Section>
  65. MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
  66. MachOYAML::Section TempSec;
  67. memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
  68. memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
  69. TempSec.addr = Sec.addr;
  70. TempSec.size = Sec.size;
  71. TempSec.offset = Sec.offset;
  72. TempSec.align = Sec.align;
  73. TempSec.reloff = Sec.reloff;
  74. TempSec.nreloc = Sec.nreloc;
  75. TempSec.flags = Sec.flags;
  76. TempSec.reserved1 = Sec.reserved1;
  77. TempSec.reserved2 = Sec.reserved2;
  78. TempSec.reserved3 = 0;
  79. if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
  80. TempSec.content =
  81. yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
  82. if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
  83. TempSec.relocations.reserve(TempSec.nreloc);
  84. for (const object::RelocationRef &Reloc : SecRef->relocations()) {
  85. const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
  86. const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
  87. MachOYAML::Relocation R;
  88. R.address = Obj.getAnyRelocationAddress(RE);
  89. R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
  90. R.length = Obj.getAnyRelocationLength(RE);
  91. R.type = Obj.getAnyRelocationType(RE);
  92. R.is_scattered = Obj.isRelocationScattered(RE);
  93. R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
  94. R.is_extern =
  95. (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
  96. R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
  97. TempSec.relocations.push_back(R);
  98. }
  99. } else {
  100. return SecRef.takeError();
  101. }
  102. return TempSec;
  103. }
  104. template <>
  105. Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
  106. size_t SecIndex) {
  107. Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
  108. if (TempSec)
  109. TempSec->reserved3 = 0;
  110. return TempSec;
  111. }
  112. template <>
  113. Expected<MachOYAML::Section>
  114. MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
  115. Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
  116. if (TempSec)
  117. TempSec->reserved3 = Sec.reserved3;
  118. return TempSec;
  119. }
  120. static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
  121. DWARFYAML::Data &DWARF) {
  122. if (SecName == "__debug_abbrev") {
  123. dumpDebugAbbrev(DCtx, DWARF);
  124. return Error::success();
  125. }
  126. if (SecName == "__debug_aranges")
  127. return dumpDebugARanges(DCtx, DWARF);
  128. if (SecName == "__debug_info") {
  129. dumpDebugInfo(DCtx, DWARF);
  130. return Error::success();
  131. }
  132. if (SecName == "__debug_line") {
  133. dumpDebugLines(DCtx, DWARF);
  134. return Error::success();
  135. }
  136. if (SecName.startswith("__debug_pub")) {
  137. // FIXME: We should extract pub-section dumpers from this function.
  138. dumpDebugPubSections(DCtx, DWARF);
  139. return Error::success();
  140. }
  141. if (SecName == "__debug_ranges")
  142. return dumpDebugRanges(DCtx, DWARF);
  143. if (SecName == "__debug_str")
  144. return dumpDebugStrings(DCtx, DWARF);
  145. return createStringError(errc::not_supported,
  146. "dumping " + SecName + " section is not supported");
  147. }
  148. template <typename SectionType, typename SegmentType>
  149. Expected<const char *> MachODumper::extractSections(
  150. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  151. std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
  152. auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
  153. const SectionType *Curr =
  154. reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
  155. for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
  156. SectionType Sec;
  157. memcpy((void *)&Sec, Curr, sizeof(SectionType));
  158. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  159. MachO::swapStruct(Sec);
  160. // For MachO section indices start from 1.
  161. if (Expected<MachOYAML::Section> S =
  162. constructSection(Sec, Sections.size() + 1)) {
  163. StringRef SecName(S->sectname);
  164. if (SecName.startswith("__debug_")) {
  165. // If the DWARF section cannot be successfully parsed, emit raw content
  166. // instead of an entry in the DWARF section of the YAML.
  167. if (Error Err = dumpDebugSection(SecName, *DWARFCtx.get(), Y.DWARF))
  168. consumeError(std::move(Err));
  169. else
  170. S->content.reset();
  171. }
  172. Sections.push_back(std::move(*S));
  173. } else
  174. return S.takeError();
  175. }
  176. return reinterpret_cast<const char *>(Curr);
  177. }
  178. template <typename StructType>
  179. Expected<const char *> MachODumper::processLoadCommandData(
  180. MachOYAML::LoadCommand &LC,
  181. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  182. MachOYAML::Object &Y) {
  183. return LoadCmd.Ptr + sizeof(StructType);
  184. }
  185. template <>
  186. Expected<const char *>
  187. MachODumper::processLoadCommandData<MachO::segment_command>(
  188. MachOYAML::LoadCommand &LC,
  189. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  190. MachOYAML::Object &Y) {
  191. return extractSections<MachO::section, MachO::segment_command>(
  192. LoadCmd, LC.Sections, Y);
  193. }
  194. template <>
  195. Expected<const char *>
  196. MachODumper::processLoadCommandData<MachO::segment_command_64>(
  197. MachOYAML::LoadCommand &LC,
  198. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  199. MachOYAML::Object &Y) {
  200. return extractSections<MachO::section_64, MachO::segment_command_64>(
  201. LoadCmd, LC.Sections, Y);
  202. }
  203. template <typename StructType>
  204. const char *
  205. readString(MachOYAML::LoadCommand &LC,
  206. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
  207. auto Start = LoadCmd.Ptr + sizeof(StructType);
  208. auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
  209. auto Size = strnlen(Start, MaxSize);
  210. LC.PayloadString = StringRef(Start, Size).str();
  211. return Start + Size;
  212. }
  213. template <>
  214. Expected<const char *>
  215. MachODumper::processLoadCommandData<MachO::dylib_command>(
  216. MachOYAML::LoadCommand &LC,
  217. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  218. MachOYAML::Object &Y) {
  219. return readString<MachO::dylib_command>(LC, LoadCmd);
  220. }
  221. template <>
  222. Expected<const char *>
  223. MachODumper::processLoadCommandData<MachO::dylinker_command>(
  224. MachOYAML::LoadCommand &LC,
  225. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  226. MachOYAML::Object &Y) {
  227. return readString<MachO::dylinker_command>(LC, LoadCmd);
  228. }
  229. template <>
  230. Expected<const char *>
  231. MachODumper::processLoadCommandData<MachO::rpath_command>(
  232. MachOYAML::LoadCommand &LC,
  233. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  234. MachOYAML::Object &Y) {
  235. return readString<MachO::rpath_command>(LC, LoadCmd);
  236. }
  237. template <>
  238. Expected<const char *>
  239. MachODumper::processLoadCommandData<MachO::build_version_command>(
  240. MachOYAML::LoadCommand &LC,
  241. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  242. MachOYAML::Object &Y) {
  243. auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
  244. auto NTools = LC.Data.build_version_command_data.ntools;
  245. for (unsigned i = 0; i < NTools; ++i) {
  246. auto Curr = Start + i * sizeof(MachO::build_tool_version);
  247. MachO::build_tool_version BV;
  248. memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
  249. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  250. MachO::swapStruct(BV);
  251. LC.Tools.push_back(BV);
  252. }
  253. return Start + NTools * sizeof(MachO::build_tool_version);
  254. }
  255. Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
  256. auto Y = std::make_unique<MachOYAML::Object>();
  257. Y->IsLittleEndian = Obj.isLittleEndian();
  258. dumpHeader(Y);
  259. if (Error Err = dumpLoadCommands(Y))
  260. return std::move(Err);
  261. dumpLinkEdit(Y);
  262. return std::move(Y);
  263. }
  264. void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
  265. Y->Header.magic = Obj.getHeader().magic;
  266. Y->Header.cputype = Obj.getHeader().cputype;
  267. Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
  268. Y->Header.filetype = Obj.getHeader().filetype;
  269. Y->Header.ncmds = Obj.getHeader().ncmds;
  270. Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
  271. Y->Header.flags = Obj.getHeader().flags;
  272. Y->Header.reserved = 0;
  273. }
  274. Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
  275. for (auto LoadCmd : Obj.load_commands()) {
  276. MachOYAML::LoadCommand LC;
  277. const char *EndPtr = LoadCmd.Ptr;
  278. switch (LoadCmd.C.cmd) {
  279. default:
  280. memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
  281. sizeof(MachO::load_command));
  282. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  283. MachO::swapStruct(LC.Data.load_command_data);
  284. if (Expected<const char *> ExpectedEndPtr =
  285. processLoadCommandData<MachO::load_command>(LC, LoadCmd,
  286. *Y.get()))
  287. EndPtr = *ExpectedEndPtr;
  288. else
  289. return ExpectedEndPtr.takeError();
  290. break;
  291. #include "llvm/BinaryFormat/MachO.def"
  292. }
  293. auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
  294. if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
  295. [](const char C) { return C == 0; })) {
  296. LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
  297. &EndPtr[RemainingBytes]);
  298. RemainingBytes = 0;
  299. }
  300. LC.ZeroPadBytes = RemainingBytes;
  301. Y->LoadCommands.push_back(std::move(LC));
  302. }
  303. return Error::success();
  304. }
  305. void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
  306. dumpRebaseOpcodes(Y);
  307. dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
  308. dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
  309. Obj.getDyldInfoWeakBindOpcodes());
  310. dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
  311. true);
  312. dumpExportTrie(Y);
  313. dumpSymbols(Y);
  314. }
  315. void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
  316. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  317. auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
  318. for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
  319. ++OpCode) {
  320. MachOYAML::RebaseOpcode RebaseOp;
  321. RebaseOp.Opcode =
  322. static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
  323. RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
  324. unsigned Count;
  325. uint64_t ULEB = 0;
  326. switch (RebaseOp.Opcode) {
  327. case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
  328. ULEB = decodeULEB128(OpCode + 1, &Count);
  329. RebaseOp.ExtraData.push_back(ULEB);
  330. OpCode += Count;
  331. LLVM_FALLTHROUGH;
  332. // Intentionally no break here -- This opcode has two ULEB values
  333. case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
  334. case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
  335. case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
  336. case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
  337. ULEB = decodeULEB128(OpCode + 1, &Count);
  338. RebaseOp.ExtraData.push_back(ULEB);
  339. OpCode += Count;
  340. break;
  341. default:
  342. break;
  343. }
  344. LEData.RebaseOpcodes.push_back(RebaseOp);
  345. if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
  346. break;
  347. }
  348. }
  349. StringRef ReadStringRef(const uint8_t *Start) {
  350. const uint8_t *Itr = Start;
  351. for (; *Itr; ++Itr)
  352. ;
  353. return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
  354. }
  355. void MachODumper::dumpBindOpcodes(
  356. std::vector<MachOYAML::BindOpcode> &BindOpcodes,
  357. ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
  358. for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
  359. ++OpCode) {
  360. MachOYAML::BindOpcode BindOp;
  361. BindOp.Opcode =
  362. static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
  363. BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
  364. unsigned Count;
  365. uint64_t ULEB = 0;
  366. int64_t SLEB = 0;
  367. switch (BindOp.Opcode) {
  368. case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
  369. ULEB = decodeULEB128(OpCode + 1, &Count);
  370. BindOp.ULEBExtraData.push_back(ULEB);
  371. OpCode += Count;
  372. LLVM_FALLTHROUGH;
  373. // Intentionally no break here -- this opcode has two ULEB values
  374. case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
  375. case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
  376. case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
  377. case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
  378. ULEB = decodeULEB128(OpCode + 1, &Count);
  379. BindOp.ULEBExtraData.push_back(ULEB);
  380. OpCode += Count;
  381. break;
  382. case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
  383. SLEB = decodeSLEB128(OpCode + 1, &Count);
  384. BindOp.SLEBExtraData.push_back(SLEB);
  385. OpCode += Count;
  386. break;
  387. case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
  388. BindOp.Symbol = ReadStringRef(OpCode + 1);
  389. OpCode += BindOp.Symbol.size() + 1;
  390. break;
  391. default:
  392. break;
  393. }
  394. BindOpcodes.push_back(BindOp);
  395. // Lazy bindings have DONE opcodes between operations, so we need to keep
  396. // processing after a DONE.
  397. if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
  398. break;
  399. }
  400. }
  401. /*!
  402. * /brief processes a node from the export trie, and its children.
  403. *
  404. * To my knowledge there is no documentation of the encoded format of this data
  405. * other than in the heads of the Apple linker engineers. To that end hopefully
  406. * this comment and the implementation below can serve to light the way for
  407. * anyone crazy enough to come down this path in the future.
  408. *
  409. * This function reads and preserves the trie structure of the export trie. To
  410. * my knowledge there is no code anywhere else that reads the data and preserves
  411. * the Trie. LD64 (sources available at opensource.apple.com) has a similar
  412. * implementation that parses the export trie into a vector. That code as well
  413. * as LLVM's libObject MachO implementation were the basis for this.
  414. *
  415. * The export trie is an encoded trie. The node serialization is a bit awkward.
  416. * The below pseudo-code is the best description I've come up with for it.
  417. *
  418. * struct SerializedNode {
  419. * ULEB128 TerminalSize;
  420. * struct TerminalData { <-- This is only present if TerminalSize > 0
  421. * ULEB128 Flags;
  422. * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
  423. * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
  424. * Flags & STUB_AND_RESOLVER )
  425. * char[] ImportName; <-- Present if ( Flags & REEXPORT )
  426. * }
  427. * uint8_t ChildrenCount;
  428. * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
  429. * SerializedNode Children[ChildrenCount]
  430. * }
  431. *
  432. * Terminal nodes are nodes that represent actual exports. They can appear
  433. * anywhere in the tree other than at the root; they do not need to be leaf
  434. * nodes. When reading the data out of the trie this routine reads it in-order,
  435. * but it puts the child names and offsets directly into the child nodes. This
  436. * results in looping over the children twice during serialization and
  437. * de-serialization, but it makes the YAML representation more human readable.
  438. *
  439. * Below is an example of the graph from a "Hello World" executable:
  440. *
  441. * -------
  442. * | '' |
  443. * -------
  444. * |
  445. * -------
  446. * | '_' |
  447. * -------
  448. * |
  449. * |----------------------------------------|
  450. * | |
  451. * ------------------------ ---------------------
  452. * | '_mh_execute_header' | | 'main' |
  453. * | Flags: 0x00000000 | | Flags: 0x00000000 |
  454. * | Addr: 0x00000000 | | Addr: 0x00001160 |
  455. * ------------------------ ---------------------
  456. *
  457. * This graph represents the trie for the exports "__mh_execute_header" and
  458. * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
  459. * terminal.
  460. */
  461. const uint8_t *processExportNode(const uint8_t *CurrPtr,
  462. const uint8_t *const End,
  463. MachOYAML::ExportEntry &Entry) {
  464. if (CurrPtr >= End)
  465. return CurrPtr;
  466. unsigned Count = 0;
  467. Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
  468. CurrPtr += Count;
  469. if (Entry.TerminalSize != 0) {
  470. Entry.Flags = decodeULEB128(CurrPtr, &Count);
  471. CurrPtr += Count;
  472. if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
  473. Entry.Address = 0;
  474. Entry.Other = decodeULEB128(CurrPtr, &Count);
  475. CurrPtr += Count;
  476. Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
  477. } else {
  478. Entry.Address = decodeULEB128(CurrPtr, &Count);
  479. CurrPtr += Count;
  480. if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
  481. Entry.Other = decodeULEB128(CurrPtr, &Count);
  482. CurrPtr += Count;
  483. } else
  484. Entry.Other = 0;
  485. }
  486. }
  487. uint8_t childrenCount = *CurrPtr++;
  488. if (childrenCount == 0)
  489. return CurrPtr;
  490. Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
  491. MachOYAML::ExportEntry());
  492. for (auto &Child : Entry.Children) {
  493. Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
  494. CurrPtr += Child.Name.length() + 1;
  495. Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
  496. CurrPtr += Count;
  497. }
  498. for (auto &Child : Entry.Children) {
  499. CurrPtr = processExportNode(CurrPtr, End, Child);
  500. }
  501. return CurrPtr;
  502. }
  503. void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
  504. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  505. auto ExportsTrie = Obj.getDyldInfoExportsTrie();
  506. processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
  507. }
  508. template <typename nlist_t>
  509. MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
  510. MachOYAML::NListEntry NL;
  511. NL.n_strx = nlist.n_strx;
  512. NL.n_type = nlist.n_type;
  513. NL.n_sect = nlist.n_sect;
  514. NL.n_desc = nlist.n_desc;
  515. NL.n_value = nlist.n_value;
  516. return NL;
  517. }
  518. void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
  519. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  520. for (auto Symbol : Obj.symbols()) {
  521. MachOYAML::NListEntry NLE =
  522. Obj.is64Bit()
  523. ? constructNameList<MachO::nlist_64>(
  524. Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
  525. : constructNameList<MachO::nlist>(
  526. Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
  527. LEData.NameList.push_back(NLE);
  528. }
  529. StringRef RemainingTable = Obj.getStringTableData();
  530. while (RemainingTable.size() > 0) {
  531. auto SymbolPair = RemainingTable.split('\0');
  532. RemainingTable = SymbolPair.second;
  533. LEData.StringTable.push_back(SymbolPair.first);
  534. }
  535. }
  536. Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
  537. std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
  538. MachODumper Dumper(Obj, std::move(DCtx));
  539. Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
  540. if (!YAML)
  541. return YAML.takeError();
  542. yaml::YamlObjectFile YAMLFile;
  543. YAMLFile.MachO = std::move(YAML.get());
  544. yaml::Output Yout(Out);
  545. Yout << YAMLFile;
  546. return Error::success();
  547. }
  548. Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
  549. yaml::YamlObjectFile YAMLFile;
  550. YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
  551. MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
  552. YAML.Header.magic = Obj.getMagic();
  553. YAML.Header.nfat_arch = Obj.getNumberOfObjects();
  554. for (auto Slice : Obj.objects()) {
  555. MachOYAML::FatArch arch;
  556. arch.cputype = Slice.getCPUType();
  557. arch.cpusubtype = Slice.getCPUSubType();
  558. arch.offset = Slice.getOffset();
  559. arch.size = Slice.getSize();
  560. arch.align = Slice.getAlign();
  561. arch.reserved = Slice.getReserved();
  562. YAML.FatArchs.push_back(arch);
  563. auto SliceObj = Slice.getAsObjectFile();
  564. if (!SliceObj)
  565. return SliceObj.takeError();
  566. std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
  567. MachODumper Dumper(*SliceObj.get(), std::move(DCtx));
  568. Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
  569. if (!YAMLObj)
  570. return YAMLObj.takeError();
  571. YAML.Slices.push_back(*YAMLObj.get());
  572. }
  573. yaml::Output Yout(Out);
  574. Yout << YAML;
  575. return Error::success();
  576. }
  577. Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
  578. if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
  579. return macho2yaml(Out, *MachOObj);
  580. if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
  581. return macho2yaml(Out, *MachOObj);
  582. llvm_unreachable("unexpected Mach-O file format");
  583. }