macho2yaml.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "obj2yaml.h"
  9. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  10. #include "llvm/Object/MachOUniversal.h"
  11. #include "llvm/ObjectYAML/DWARFYAML.h"
  12. #include "llvm/ObjectYAML/ObjectYAML.h"
  13. #include "llvm/Support/Error.h"
  14. #include "llvm/Support/ErrorHandling.h"
  15. #include "llvm/Support/LEB128.h"
  16. #include <string.h> // for memcpy
  17. using namespace llvm;
  18. class MachODumper {
  19. template <typename StructType>
  20. Expected<const char *> processLoadCommandData(
  21. MachOYAML::LoadCommand &LC,
  22. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  23. MachOYAML::Object &Y);
  24. const object::MachOObjectFile &Obj;
  25. std::unique_ptr<DWARFContext> DWARFCtx;
  26. unsigned RawSegment;
  27. void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
  28. Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
  29. void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
  30. void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
  31. void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
  32. ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
  33. void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
  34. void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
  35. void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y);
  36. template <typename SectionType>
  37. Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
  38. size_t SecIndex);
  39. template <typename SectionType>
  40. Expected<MachOYAML::Section> constructSection(SectionType Sec,
  41. size_t SecIndex);
  42. template <typename SectionType, typename SegmentType>
  43. Expected<const char *>
  44. extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  45. std::vector<MachOYAML::Section> &Sections,
  46. MachOYAML::Object &Y);
  47. public:
  48. MachODumper(const object::MachOObjectFile &O,
  49. std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
  50. : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {}
  51. Expected<std::unique_ptr<MachOYAML::Object>> dump();
  52. };
  53. #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
  54. case MachO::LCName: \
  55. memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \
  56. sizeof(MachO::LCStruct)); \
  57. if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
  58. MachO::swapStruct(LC.Data.LCStruct##_data); \
  59. if (Expected<const char *> ExpectedEndPtr = \
  60. processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \
  61. EndPtr = *ExpectedEndPtr; \
  62. else \
  63. return ExpectedEndPtr.takeError(); \
  64. break;
  65. template <typename SectionType>
  66. Expected<MachOYAML::Section>
  67. MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
  68. MachOYAML::Section TempSec;
  69. memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
  70. memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
  71. TempSec.addr = Sec.addr;
  72. TempSec.size = Sec.size;
  73. TempSec.offset = Sec.offset;
  74. TempSec.align = Sec.align;
  75. TempSec.reloff = Sec.reloff;
  76. TempSec.nreloc = Sec.nreloc;
  77. TempSec.flags = Sec.flags;
  78. TempSec.reserved1 = Sec.reserved1;
  79. TempSec.reserved2 = Sec.reserved2;
  80. TempSec.reserved3 = 0;
  81. if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
  82. TempSec.content =
  83. yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
  84. if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
  85. TempSec.relocations.reserve(TempSec.nreloc);
  86. for (const object::RelocationRef &Reloc : SecRef->relocations()) {
  87. const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
  88. const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
  89. MachOYAML::Relocation R;
  90. R.address = Obj.getAnyRelocationAddress(RE);
  91. R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
  92. R.length = Obj.getAnyRelocationLength(RE);
  93. R.type = Obj.getAnyRelocationType(RE);
  94. R.is_scattered = Obj.isRelocationScattered(RE);
  95. R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
  96. R.is_extern =
  97. (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
  98. R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
  99. TempSec.relocations.push_back(R);
  100. }
  101. } else {
  102. return SecRef.takeError();
  103. }
  104. return TempSec;
  105. }
  106. template <>
  107. Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
  108. size_t SecIndex) {
  109. Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
  110. if (TempSec)
  111. TempSec->reserved3 = 0;
  112. return TempSec;
  113. }
  114. template <>
  115. Expected<MachOYAML::Section>
  116. MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
  117. Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
  118. if (TempSec)
  119. TempSec->reserved3 = Sec.reserved3;
  120. return TempSec;
  121. }
  122. static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
  123. DWARFYAML::Data &DWARF) {
  124. if (SecName == "__debug_abbrev") {
  125. dumpDebugAbbrev(DCtx, DWARF);
  126. return Error::success();
  127. }
  128. if (SecName == "__debug_aranges")
  129. return dumpDebugARanges(DCtx, DWARF);
  130. if (SecName == "__debug_info") {
  131. dumpDebugInfo(DCtx, DWARF);
  132. return Error::success();
  133. }
  134. if (SecName == "__debug_line") {
  135. dumpDebugLines(DCtx, DWARF);
  136. return Error::success();
  137. }
  138. if (SecName.startswith("__debug_pub")) {
  139. // FIXME: We should extract pub-section dumpers from this function.
  140. dumpDebugPubSections(DCtx, DWARF);
  141. return Error::success();
  142. }
  143. if (SecName == "__debug_ranges")
  144. return dumpDebugRanges(DCtx, DWARF);
  145. if (SecName == "__debug_str")
  146. return dumpDebugStrings(DCtx, DWARF);
  147. return createStringError(errc::not_supported,
  148. "dumping " + SecName + " section is not supported");
  149. }
  150. template <typename SectionType, typename SegmentType>
  151. Expected<const char *> MachODumper::extractSections(
  152. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  153. std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
  154. auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
  155. const SectionType *Curr =
  156. reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
  157. for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
  158. SectionType Sec;
  159. memcpy((void *)&Sec, Curr, sizeof(SectionType));
  160. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  161. MachO::swapStruct(Sec);
  162. // For MachO section indices start from 1.
  163. if (Expected<MachOYAML::Section> S =
  164. constructSection(Sec, Sections.size() + 1)) {
  165. StringRef SecName(S->sectname);
  166. // Copy data sections if requested.
  167. if ((RawSegment & ::RawSegments::data) &&
  168. StringRef(S->segname).startswith("__DATA"))
  169. S->content =
  170. yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
  171. if (SecName.startswith("__debug_")) {
  172. // If the DWARF section cannot be successfully parsed, emit raw content
  173. // instead of an entry in the DWARF section of the YAML.
  174. if (Error Err = dumpDebugSection(SecName, *DWARFCtx.get(), Y.DWARF))
  175. consumeError(std::move(Err));
  176. else
  177. S->content.reset();
  178. }
  179. Sections.push_back(std::move(*S));
  180. } else
  181. return S.takeError();
  182. }
  183. return reinterpret_cast<const char *>(Curr);
  184. }
  185. template <typename StructType>
  186. Expected<const char *> MachODumper::processLoadCommandData(
  187. MachOYAML::LoadCommand &LC,
  188. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  189. MachOYAML::Object &Y) {
  190. return LoadCmd.Ptr + sizeof(StructType);
  191. }
  192. template <>
  193. Expected<const char *>
  194. MachODumper::processLoadCommandData<MachO::segment_command>(
  195. MachOYAML::LoadCommand &LC,
  196. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  197. MachOYAML::Object &Y) {
  198. return extractSections<MachO::section, MachO::segment_command>(
  199. LoadCmd, LC.Sections, Y);
  200. }
  201. template <>
  202. Expected<const char *>
  203. MachODumper::processLoadCommandData<MachO::segment_command_64>(
  204. MachOYAML::LoadCommand &LC,
  205. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  206. MachOYAML::Object &Y) {
  207. return extractSections<MachO::section_64, MachO::segment_command_64>(
  208. LoadCmd, LC.Sections, Y);
  209. }
  210. template <typename StructType>
  211. const char *
  212. readString(MachOYAML::LoadCommand &LC,
  213. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
  214. auto Start = LoadCmd.Ptr + sizeof(StructType);
  215. auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
  216. auto Size = strnlen(Start, MaxSize);
  217. LC.Content = StringRef(Start, Size).str();
  218. return Start + Size;
  219. }
  220. template <>
  221. Expected<const char *>
  222. MachODumper::processLoadCommandData<MachO::dylib_command>(
  223. MachOYAML::LoadCommand &LC,
  224. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  225. MachOYAML::Object &Y) {
  226. return readString<MachO::dylib_command>(LC, LoadCmd);
  227. }
  228. template <>
  229. Expected<const char *>
  230. MachODumper::processLoadCommandData<MachO::dylinker_command>(
  231. MachOYAML::LoadCommand &LC,
  232. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  233. MachOYAML::Object &Y) {
  234. return readString<MachO::dylinker_command>(LC, LoadCmd);
  235. }
  236. template <>
  237. Expected<const char *>
  238. MachODumper::processLoadCommandData<MachO::rpath_command>(
  239. MachOYAML::LoadCommand &LC,
  240. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  241. MachOYAML::Object &Y) {
  242. return readString<MachO::rpath_command>(LC, LoadCmd);
  243. }
  244. template <>
  245. Expected<const char *>
  246. MachODumper::processLoadCommandData<MachO::build_version_command>(
  247. MachOYAML::LoadCommand &LC,
  248. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  249. MachOYAML::Object &Y) {
  250. auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
  251. auto NTools = LC.Data.build_version_command_data.ntools;
  252. for (unsigned i = 0; i < NTools; ++i) {
  253. auto Curr = Start + i * sizeof(MachO::build_tool_version);
  254. MachO::build_tool_version BV;
  255. memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
  256. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  257. MachO::swapStruct(BV);
  258. LC.Tools.push_back(BV);
  259. }
  260. return Start + NTools * sizeof(MachO::build_tool_version);
  261. }
  262. Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
  263. auto Y = std::make_unique<MachOYAML::Object>();
  264. Y->IsLittleEndian = Obj.isLittleEndian();
  265. dumpHeader(Y);
  266. if (Error Err = dumpLoadCommands(Y))
  267. return std::move(Err);
  268. if (RawSegment & ::RawSegments::linkedit)
  269. Y->RawLinkEditSegment =
  270. yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
  271. else
  272. dumpLinkEdit(Y);
  273. return std::move(Y);
  274. }
  275. void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
  276. Y->Header.magic = Obj.getHeader().magic;
  277. Y->Header.cputype = Obj.getHeader().cputype;
  278. Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
  279. Y->Header.filetype = Obj.getHeader().filetype;
  280. Y->Header.ncmds = Obj.getHeader().ncmds;
  281. Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
  282. Y->Header.flags = Obj.getHeader().flags;
  283. Y->Header.reserved = 0;
  284. }
  285. Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
  286. for (auto LoadCmd : Obj.load_commands()) {
  287. MachOYAML::LoadCommand LC;
  288. const char *EndPtr = LoadCmd.Ptr;
  289. switch (LoadCmd.C.cmd) {
  290. default:
  291. memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
  292. sizeof(MachO::load_command));
  293. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  294. MachO::swapStruct(LC.Data.load_command_data);
  295. if (Expected<const char *> ExpectedEndPtr =
  296. processLoadCommandData<MachO::load_command>(LC, LoadCmd,
  297. *Y.get()))
  298. EndPtr = *ExpectedEndPtr;
  299. else
  300. return ExpectedEndPtr.takeError();
  301. break;
  302. #include "llvm/BinaryFormat/MachO.def"
  303. }
  304. auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
  305. if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
  306. [](const char C) { return C == 0; })) {
  307. LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
  308. &EndPtr[RemainingBytes]);
  309. RemainingBytes = 0;
  310. }
  311. LC.ZeroPadBytes = RemainingBytes;
  312. Y->LoadCommands.push_back(std::move(LC));
  313. }
  314. return Error::success();
  315. }
  316. void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
  317. dumpRebaseOpcodes(Y);
  318. dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
  319. dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
  320. Obj.getDyldInfoWeakBindOpcodes());
  321. dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
  322. true);
  323. dumpExportTrie(Y);
  324. dumpSymbols(Y);
  325. dumpIndirectSymbols(Y);
  326. }
  327. void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
  328. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  329. auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
  330. for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
  331. ++OpCode) {
  332. MachOYAML::RebaseOpcode RebaseOp;
  333. RebaseOp.Opcode =
  334. static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
  335. RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
  336. unsigned Count;
  337. uint64_t ULEB = 0;
  338. switch (RebaseOp.Opcode) {
  339. case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
  340. ULEB = decodeULEB128(OpCode + 1, &Count);
  341. RebaseOp.ExtraData.push_back(ULEB);
  342. OpCode += Count;
  343. LLVM_FALLTHROUGH;
  344. // Intentionally no break here -- This opcode has two ULEB values
  345. case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
  346. case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
  347. case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
  348. case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
  349. ULEB = decodeULEB128(OpCode + 1, &Count);
  350. RebaseOp.ExtraData.push_back(ULEB);
  351. OpCode += Count;
  352. break;
  353. default:
  354. break;
  355. }
  356. LEData.RebaseOpcodes.push_back(RebaseOp);
  357. if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
  358. break;
  359. }
  360. }
  361. StringRef ReadStringRef(const uint8_t *Start) {
  362. const uint8_t *Itr = Start;
  363. for (; *Itr; ++Itr)
  364. ;
  365. return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
  366. }
  367. void MachODumper::dumpBindOpcodes(
  368. std::vector<MachOYAML::BindOpcode> &BindOpcodes,
  369. ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
  370. for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
  371. ++OpCode) {
  372. MachOYAML::BindOpcode BindOp;
  373. BindOp.Opcode =
  374. static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
  375. BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
  376. unsigned Count;
  377. uint64_t ULEB = 0;
  378. int64_t SLEB = 0;
  379. switch (BindOp.Opcode) {
  380. case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
  381. ULEB = decodeULEB128(OpCode + 1, &Count);
  382. BindOp.ULEBExtraData.push_back(ULEB);
  383. OpCode += Count;
  384. LLVM_FALLTHROUGH;
  385. // Intentionally no break here -- this opcode has two ULEB values
  386. case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
  387. case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
  388. case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
  389. case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
  390. ULEB = decodeULEB128(OpCode + 1, &Count);
  391. BindOp.ULEBExtraData.push_back(ULEB);
  392. OpCode += Count;
  393. break;
  394. case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
  395. SLEB = decodeSLEB128(OpCode + 1, &Count);
  396. BindOp.SLEBExtraData.push_back(SLEB);
  397. OpCode += Count;
  398. break;
  399. case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
  400. BindOp.Symbol = ReadStringRef(OpCode + 1);
  401. OpCode += BindOp.Symbol.size() + 1;
  402. break;
  403. default:
  404. break;
  405. }
  406. BindOpcodes.push_back(BindOp);
  407. // Lazy bindings have DONE opcodes between operations, so we need to keep
  408. // processing after a DONE.
  409. if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
  410. break;
  411. }
  412. }
  413. /*!
  414. * /brief processes a node from the export trie, and its children.
  415. *
  416. * To my knowledge there is no documentation of the encoded format of this data
  417. * other than in the heads of the Apple linker engineers. To that end hopefully
  418. * this comment and the implementation below can serve to light the way for
  419. * anyone crazy enough to come down this path in the future.
  420. *
  421. * This function reads and preserves the trie structure of the export trie. To
  422. * my knowledge there is no code anywhere else that reads the data and preserves
  423. * the Trie. LD64 (sources available at opensource.apple.com) has a similar
  424. * implementation that parses the export trie into a vector. That code as well
  425. * as LLVM's libObject MachO implementation were the basis for this.
  426. *
  427. * The export trie is an encoded trie. The node serialization is a bit awkward.
  428. * The below pseudo-code is the best description I've come up with for it.
  429. *
  430. * struct SerializedNode {
  431. * ULEB128 TerminalSize;
  432. * struct TerminalData { <-- This is only present if TerminalSize > 0
  433. * ULEB128 Flags;
  434. * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
  435. * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
  436. * Flags & STUB_AND_RESOLVER )
  437. * char[] ImportName; <-- Present if ( Flags & REEXPORT )
  438. * }
  439. * uint8_t ChildrenCount;
  440. * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
  441. * SerializedNode Children[ChildrenCount]
  442. * }
  443. *
  444. * Terminal nodes are nodes that represent actual exports. They can appear
  445. * anywhere in the tree other than at the root; they do not need to be leaf
  446. * nodes. When reading the data out of the trie this routine reads it in-order,
  447. * but it puts the child names and offsets directly into the child nodes. This
  448. * results in looping over the children twice during serialization and
  449. * de-serialization, but it makes the YAML representation more human readable.
  450. *
  451. * Below is an example of the graph from a "Hello World" executable:
  452. *
  453. * -------
  454. * | '' |
  455. * -------
  456. * |
  457. * -------
  458. * | '_' |
  459. * -------
  460. * |
  461. * |----------------------------------------|
  462. * | |
  463. * ------------------------ ---------------------
  464. * | '_mh_execute_header' | | 'main' |
  465. * | Flags: 0x00000000 | | Flags: 0x00000000 |
  466. * | Addr: 0x00000000 | | Addr: 0x00001160 |
  467. * ------------------------ ---------------------
  468. *
  469. * This graph represents the trie for the exports "__mh_execute_header" and
  470. * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
  471. * terminal.
  472. */
  473. const uint8_t *processExportNode(const uint8_t *CurrPtr,
  474. const uint8_t *const End,
  475. MachOYAML::ExportEntry &Entry) {
  476. if (CurrPtr >= End)
  477. return CurrPtr;
  478. unsigned Count = 0;
  479. Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
  480. CurrPtr += Count;
  481. if (Entry.TerminalSize != 0) {
  482. Entry.Flags = decodeULEB128(CurrPtr, &Count);
  483. CurrPtr += Count;
  484. if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
  485. Entry.Address = 0;
  486. Entry.Other = decodeULEB128(CurrPtr, &Count);
  487. CurrPtr += Count;
  488. Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
  489. } else {
  490. Entry.Address = decodeULEB128(CurrPtr, &Count);
  491. CurrPtr += Count;
  492. if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
  493. Entry.Other = decodeULEB128(CurrPtr, &Count);
  494. CurrPtr += Count;
  495. } else
  496. Entry.Other = 0;
  497. }
  498. }
  499. uint8_t childrenCount = *CurrPtr++;
  500. if (childrenCount == 0)
  501. return CurrPtr;
  502. Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
  503. MachOYAML::ExportEntry());
  504. for (auto &Child : Entry.Children) {
  505. Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
  506. CurrPtr += Child.Name.length() + 1;
  507. Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
  508. CurrPtr += Count;
  509. }
  510. for (auto &Child : Entry.Children) {
  511. CurrPtr = processExportNode(CurrPtr, End, Child);
  512. }
  513. return CurrPtr;
  514. }
  515. void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
  516. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  517. auto ExportsTrie = Obj.getDyldInfoExportsTrie();
  518. processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
  519. }
  520. template <typename nlist_t>
  521. MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
  522. MachOYAML::NListEntry NL;
  523. NL.n_strx = nlist.n_strx;
  524. NL.n_type = nlist.n_type;
  525. NL.n_sect = nlist.n_sect;
  526. NL.n_desc = nlist.n_desc;
  527. NL.n_value = nlist.n_value;
  528. return NL;
  529. }
  530. void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
  531. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  532. for (auto Symbol : Obj.symbols()) {
  533. MachOYAML::NListEntry NLE =
  534. Obj.is64Bit()
  535. ? constructNameList<MachO::nlist_64>(
  536. Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
  537. : constructNameList<MachO::nlist>(
  538. Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
  539. LEData.NameList.push_back(NLE);
  540. }
  541. StringRef RemainingTable = Obj.getStringTableData();
  542. while (RemainingTable.size() > 0) {
  543. auto SymbolPair = RemainingTable.split('\0');
  544. RemainingTable = SymbolPair.second;
  545. LEData.StringTable.push_back(SymbolPair.first);
  546. }
  547. }
  548. void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
  549. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  550. MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
  551. for (unsigned i = 0; i < DLC.nindirectsyms; ++i)
  552. LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i));
  553. }
  554. Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
  555. unsigned RawSegments) {
  556. std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
  557. MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
  558. Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
  559. if (!YAML)
  560. return YAML.takeError();
  561. yaml::YamlObjectFile YAMLFile;
  562. YAMLFile.MachO = std::move(YAML.get());
  563. yaml::Output Yout(Out);
  564. Yout << YAMLFile;
  565. return Error::success();
  566. }
  567. Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
  568. unsigned RawSegments) {
  569. yaml::YamlObjectFile YAMLFile;
  570. YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
  571. MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
  572. YAML.Header.magic = Obj.getMagic();
  573. YAML.Header.nfat_arch = Obj.getNumberOfObjects();
  574. for (auto Slice : Obj.objects()) {
  575. MachOYAML::FatArch arch;
  576. arch.cputype = Slice.getCPUType();
  577. arch.cpusubtype = Slice.getCPUSubType();
  578. arch.offset = Slice.getOffset();
  579. arch.size = Slice.getSize();
  580. arch.align = Slice.getAlign();
  581. arch.reserved = Slice.getReserved();
  582. YAML.FatArchs.push_back(arch);
  583. auto SliceObj = Slice.getAsObjectFile();
  584. if (!SliceObj)
  585. return SliceObj.takeError();
  586. std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
  587. MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
  588. Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
  589. if (!YAMLObj)
  590. return YAMLObj.takeError();
  591. YAML.Slices.push_back(*YAMLObj.get());
  592. }
  593. yaml::Output Yout(Out);
  594. Yout << YAML;
  595. return Error::success();
  596. }
  597. Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
  598. unsigned RawSegments) {
  599. if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
  600. return macho2yaml(Out, *MachOObj, RawSegments);
  601. if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
  602. return macho2yaml(Out, *MachOObj, RawSegments);
  603. llvm_unreachable("unexpected Mach-O file format");
  604. }