macho2yaml.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722
  1. //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "obj2yaml.h"
  9. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  10. #include "llvm/Object/MachOUniversal.h"
  11. #include "llvm/ObjectYAML/DWARFYAML.h"
  12. #include "llvm/ObjectYAML/ObjectYAML.h"
  13. #include "llvm/Support/Errc.h"
  14. #include "llvm/Support/Error.h"
  15. #include "llvm/Support/ErrorHandling.h"
  16. #include "llvm/Support/LEB128.h"
  17. #include <string.h> // for memcpy
  18. using namespace llvm;
  19. class MachODumper {
  20. template <typename StructType>
  21. Expected<const char *> processLoadCommandData(
  22. MachOYAML::LoadCommand &LC,
  23. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  24. MachOYAML::Object &Y);
  25. const object::MachOObjectFile &Obj;
  26. std::unique_ptr<DWARFContext> DWARFCtx;
  27. unsigned RawSegment;
  28. void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
  29. Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
  30. void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
  31. void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
  32. void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y);
  33. void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
  34. ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
  35. void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
  36. void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
  37. void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y);
  38. void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y);
  39. void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y);
  40. template <typename SectionType>
  41. Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
  42. size_t SecIndex);
  43. template <typename SectionType>
  44. Expected<MachOYAML::Section> constructSection(SectionType Sec,
  45. size_t SecIndex);
  46. template <typename SectionType, typename SegmentType>
  47. Expected<const char *>
  48. extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  49. std::vector<MachOYAML::Section> &Sections,
  50. MachOYAML::Object &Y);
  51. public:
  52. MachODumper(const object::MachOObjectFile &O,
  53. std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
  54. : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {}
  55. Expected<std::unique_ptr<MachOYAML::Object>> dump();
  56. };
  57. #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
  58. case MachO::LCName: \
  59. memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \
  60. sizeof(MachO::LCStruct)); \
  61. if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \
  62. MachO::swapStruct(LC.Data.LCStruct##_data); \
  63. if (Expected<const char *> ExpectedEndPtr = \
  64. processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \
  65. EndPtr = *ExpectedEndPtr; \
  66. else \
  67. return ExpectedEndPtr.takeError(); \
  68. break;
  69. template <typename SectionType>
  70. Expected<MachOYAML::Section>
  71. MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
  72. MachOYAML::Section TempSec;
  73. memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
  74. memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
  75. TempSec.addr = Sec.addr;
  76. TempSec.size = Sec.size;
  77. TempSec.offset = Sec.offset;
  78. TempSec.align = Sec.align;
  79. TempSec.reloff = Sec.reloff;
  80. TempSec.nreloc = Sec.nreloc;
  81. TempSec.flags = Sec.flags;
  82. TempSec.reserved1 = Sec.reserved1;
  83. TempSec.reserved2 = Sec.reserved2;
  84. TempSec.reserved3 = 0;
  85. if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
  86. TempSec.content =
  87. yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
  88. if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
  89. TempSec.relocations.reserve(TempSec.nreloc);
  90. for (const object::RelocationRef &Reloc : SecRef->relocations()) {
  91. const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
  92. const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
  93. MachOYAML::Relocation R;
  94. R.address = Obj.getAnyRelocationAddress(RE);
  95. R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
  96. R.length = Obj.getAnyRelocationLength(RE);
  97. R.type = Obj.getAnyRelocationType(RE);
  98. R.is_scattered = Obj.isRelocationScattered(RE);
  99. R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
  100. R.is_extern =
  101. (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
  102. R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
  103. TempSec.relocations.push_back(R);
  104. }
  105. } else {
  106. return SecRef.takeError();
  107. }
  108. return TempSec;
  109. }
  110. template <>
  111. Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
  112. size_t SecIndex) {
  113. Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
  114. if (TempSec)
  115. TempSec->reserved3 = 0;
  116. return TempSec;
  117. }
  118. template <>
  119. Expected<MachOYAML::Section>
  120. MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
  121. Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
  122. if (TempSec)
  123. TempSec->reserved3 = Sec.reserved3;
  124. return TempSec;
  125. }
  126. static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
  127. DWARFYAML::Data &DWARF) {
  128. if (SecName == "__debug_abbrev") {
  129. dumpDebugAbbrev(DCtx, DWARF);
  130. return Error::success();
  131. }
  132. if (SecName == "__debug_aranges")
  133. return dumpDebugARanges(DCtx, DWARF);
  134. if (SecName == "__debug_info") {
  135. dumpDebugInfo(DCtx, DWARF);
  136. return Error::success();
  137. }
  138. if (SecName == "__debug_line") {
  139. dumpDebugLines(DCtx, DWARF);
  140. return Error::success();
  141. }
  142. if (SecName.startswith("__debug_pub")) {
  143. // FIXME: We should extract pub-section dumpers from this function.
  144. dumpDebugPubSections(DCtx, DWARF);
  145. return Error::success();
  146. }
  147. if (SecName == "__debug_ranges")
  148. return dumpDebugRanges(DCtx, DWARF);
  149. if (SecName == "__debug_str")
  150. return dumpDebugStrings(DCtx, DWARF);
  151. return createStringError(errc::not_supported,
  152. "dumping " + SecName + " section is not supported");
  153. }
  154. template <typename SectionType, typename SegmentType>
  155. Expected<const char *> MachODumper::extractSections(
  156. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  157. std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
  158. auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
  159. const SectionType *Curr =
  160. reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
  161. for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
  162. SectionType Sec;
  163. memcpy((void *)&Sec, Curr, sizeof(SectionType));
  164. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  165. MachO::swapStruct(Sec);
  166. // For MachO section indices start from 1.
  167. if (Expected<MachOYAML::Section> S =
  168. constructSection(Sec, Sections.size() + 1)) {
  169. StringRef SecName(S->sectname);
  170. // Copy data sections if requested.
  171. if ((RawSegment & ::RawSegments::data) &&
  172. StringRef(S->segname).startswith("__DATA"))
  173. S->content =
  174. yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
  175. if (SecName.startswith("__debug_")) {
  176. // If the DWARF section cannot be successfully parsed, emit raw content
  177. // instead of an entry in the DWARF section of the YAML.
  178. if (Error Err = dumpDebugSection(SecName, *DWARFCtx, Y.DWARF))
  179. consumeError(std::move(Err));
  180. else
  181. S->content.reset();
  182. }
  183. Sections.push_back(std::move(*S));
  184. } else
  185. return S.takeError();
  186. }
  187. return reinterpret_cast<const char *>(Curr);
  188. }
  189. template <typename StructType>
  190. Expected<const char *> MachODumper::processLoadCommandData(
  191. MachOYAML::LoadCommand &LC,
  192. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  193. MachOYAML::Object &Y) {
  194. return LoadCmd.Ptr + sizeof(StructType);
  195. }
  196. template <>
  197. Expected<const char *>
  198. MachODumper::processLoadCommandData<MachO::segment_command>(
  199. MachOYAML::LoadCommand &LC,
  200. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  201. MachOYAML::Object &Y) {
  202. return extractSections<MachO::section, MachO::segment_command>(
  203. LoadCmd, LC.Sections, Y);
  204. }
  205. template <>
  206. Expected<const char *>
  207. MachODumper::processLoadCommandData<MachO::segment_command_64>(
  208. MachOYAML::LoadCommand &LC,
  209. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  210. MachOYAML::Object &Y) {
  211. return extractSections<MachO::section_64, MachO::segment_command_64>(
  212. LoadCmd, LC.Sections, Y);
  213. }
  214. template <typename StructType>
  215. const char *
  216. readString(MachOYAML::LoadCommand &LC,
  217. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
  218. auto Start = LoadCmd.Ptr + sizeof(StructType);
  219. auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
  220. auto Size = strnlen(Start, MaxSize);
  221. LC.Content = StringRef(Start, Size).str();
  222. return Start + Size;
  223. }
  224. template <>
  225. Expected<const char *>
  226. MachODumper::processLoadCommandData<MachO::dylib_command>(
  227. MachOYAML::LoadCommand &LC,
  228. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  229. MachOYAML::Object &Y) {
  230. return readString<MachO::dylib_command>(LC, LoadCmd);
  231. }
  232. template <>
  233. Expected<const char *>
  234. MachODumper::processLoadCommandData<MachO::dylinker_command>(
  235. MachOYAML::LoadCommand &LC,
  236. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  237. MachOYAML::Object &Y) {
  238. return readString<MachO::dylinker_command>(LC, LoadCmd);
  239. }
  240. template <>
  241. Expected<const char *>
  242. MachODumper::processLoadCommandData<MachO::rpath_command>(
  243. MachOYAML::LoadCommand &LC,
  244. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  245. MachOYAML::Object &Y) {
  246. return readString<MachO::rpath_command>(LC, LoadCmd);
  247. }
  248. template <>
  249. Expected<const char *>
  250. MachODumper::processLoadCommandData<MachO::build_version_command>(
  251. MachOYAML::LoadCommand &LC,
  252. const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  253. MachOYAML::Object &Y) {
  254. auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
  255. auto NTools = LC.Data.build_version_command_data.ntools;
  256. for (unsigned i = 0; i < NTools; ++i) {
  257. auto Curr = Start + i * sizeof(MachO::build_tool_version);
  258. MachO::build_tool_version BV;
  259. memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
  260. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  261. MachO::swapStruct(BV);
  262. LC.Tools.push_back(BV);
  263. }
  264. return Start + NTools * sizeof(MachO::build_tool_version);
  265. }
  266. Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
  267. auto Y = std::make_unique<MachOYAML::Object>();
  268. Y->IsLittleEndian = Obj.isLittleEndian();
  269. dumpHeader(Y);
  270. if (Error Err = dumpLoadCommands(Y))
  271. return std::move(Err);
  272. if (RawSegment & ::RawSegments::linkedit)
  273. Y->RawLinkEditSegment =
  274. yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
  275. else
  276. dumpLinkEdit(Y);
  277. return std::move(Y);
  278. }
  279. void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
  280. Y->Header.magic = Obj.getHeader().magic;
  281. Y->Header.cputype = Obj.getHeader().cputype;
  282. Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
  283. Y->Header.filetype = Obj.getHeader().filetype;
  284. Y->Header.ncmds = Obj.getHeader().ncmds;
  285. Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
  286. Y->Header.flags = Obj.getHeader().flags;
  287. Y->Header.reserved = 0;
  288. }
  289. Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
  290. for (auto LoadCmd : Obj.load_commands()) {
  291. MachOYAML::LoadCommand LC;
  292. const char *EndPtr = LoadCmd.Ptr;
  293. switch (LoadCmd.C.cmd) {
  294. default:
  295. memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
  296. sizeof(MachO::load_command));
  297. if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
  298. MachO::swapStruct(LC.Data.load_command_data);
  299. if (Expected<const char *> ExpectedEndPtr =
  300. processLoadCommandData<MachO::load_command>(LC, LoadCmd, *Y))
  301. EndPtr = *ExpectedEndPtr;
  302. else
  303. return ExpectedEndPtr.takeError();
  304. break;
  305. #include "llvm/BinaryFormat/MachO.def"
  306. }
  307. auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
  308. if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
  309. [](const char C) { return C == 0; })) {
  310. LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
  311. &EndPtr[RemainingBytes]);
  312. RemainingBytes = 0;
  313. }
  314. LC.ZeroPadBytes = RemainingBytes;
  315. Y->LoadCommands.push_back(std::move(LC));
  316. }
  317. return Error::success();
  318. }
  319. void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
  320. dumpRebaseOpcodes(Y);
  321. dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
  322. dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
  323. Obj.getDyldInfoWeakBindOpcodes());
  324. dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
  325. true);
  326. dumpExportTrie(Y);
  327. dumpSymbols(Y);
  328. dumpIndirectSymbols(Y);
  329. dumpFunctionStarts(Y);
  330. dumpChainedFixups(Y);
  331. dumpDataInCode(Y);
  332. }
  333. void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) {
  334. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  335. auto FunctionStarts = Obj.getFunctionStarts();
  336. for (auto Addr : FunctionStarts)
  337. LEData.FunctionStarts.push_back(Addr);
  338. }
  339. void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
  340. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  341. auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
  342. for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
  343. ++OpCode) {
  344. MachOYAML::RebaseOpcode RebaseOp;
  345. RebaseOp.Opcode =
  346. static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
  347. RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
  348. unsigned Count;
  349. uint64_t ULEB = 0;
  350. switch (RebaseOp.Opcode) {
  351. case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
  352. ULEB = decodeULEB128(OpCode + 1, &Count);
  353. RebaseOp.ExtraData.push_back(ULEB);
  354. OpCode += Count;
  355. [[fallthrough]];
  356. // Intentionally no break here -- This opcode has two ULEB values
  357. case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
  358. case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
  359. case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
  360. case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
  361. ULEB = decodeULEB128(OpCode + 1, &Count);
  362. RebaseOp.ExtraData.push_back(ULEB);
  363. OpCode += Count;
  364. break;
  365. default:
  366. break;
  367. }
  368. LEData.RebaseOpcodes.push_back(RebaseOp);
  369. if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
  370. break;
  371. }
  372. }
  373. StringRef ReadStringRef(const uint8_t *Start) {
  374. const uint8_t *Itr = Start;
  375. for (; *Itr; ++Itr)
  376. ;
  377. return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
  378. }
  379. void MachODumper::dumpBindOpcodes(
  380. std::vector<MachOYAML::BindOpcode> &BindOpcodes,
  381. ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
  382. for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
  383. ++OpCode) {
  384. MachOYAML::BindOpcode BindOp;
  385. BindOp.Opcode =
  386. static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
  387. BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
  388. unsigned Count;
  389. uint64_t ULEB = 0;
  390. int64_t SLEB = 0;
  391. switch (BindOp.Opcode) {
  392. case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
  393. ULEB = decodeULEB128(OpCode + 1, &Count);
  394. BindOp.ULEBExtraData.push_back(ULEB);
  395. OpCode += Count;
  396. [[fallthrough]];
  397. // Intentionally no break here -- this opcode has two ULEB values
  398. case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
  399. case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
  400. case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
  401. case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
  402. ULEB = decodeULEB128(OpCode + 1, &Count);
  403. BindOp.ULEBExtraData.push_back(ULEB);
  404. OpCode += Count;
  405. break;
  406. case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
  407. SLEB = decodeSLEB128(OpCode + 1, &Count);
  408. BindOp.SLEBExtraData.push_back(SLEB);
  409. OpCode += Count;
  410. break;
  411. case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
  412. BindOp.Symbol = ReadStringRef(OpCode + 1);
  413. OpCode += BindOp.Symbol.size() + 1;
  414. break;
  415. default:
  416. break;
  417. }
  418. BindOpcodes.push_back(BindOp);
  419. // Lazy bindings have DONE opcodes between operations, so we need to keep
  420. // processing after a DONE.
  421. if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
  422. break;
  423. }
  424. }
  425. /*!
  426. * /brief processes a node from the export trie, and its children.
  427. *
  428. * To my knowledge there is no documentation of the encoded format of this data
  429. * other than in the heads of the Apple linker engineers. To that end hopefully
  430. * this comment and the implementation below can serve to light the way for
  431. * anyone crazy enough to come down this path in the future.
  432. *
  433. * This function reads and preserves the trie structure of the export trie. To
  434. * my knowledge there is no code anywhere else that reads the data and preserves
  435. * the Trie. LD64 (sources available at opensource.apple.com) has a similar
  436. * implementation that parses the export trie into a vector. That code as well
  437. * as LLVM's libObject MachO implementation were the basis for this.
  438. *
  439. * The export trie is an encoded trie. The node serialization is a bit awkward.
  440. * The below pseudo-code is the best description I've come up with for it.
  441. *
  442. * struct SerializedNode {
  443. * ULEB128 TerminalSize;
  444. * struct TerminalData { <-- This is only present if TerminalSize > 0
  445. * ULEB128 Flags;
  446. * ULEB128 Address; <-- Present if (! Flags & REEXPORT )
  447. * ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
  448. * Flags & STUB_AND_RESOLVER )
  449. * char[] ImportName; <-- Present if ( Flags & REEXPORT )
  450. * }
  451. * uint8_t ChildrenCount;
  452. * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
  453. * SerializedNode Children[ChildrenCount]
  454. * }
  455. *
  456. * Terminal nodes are nodes that represent actual exports. They can appear
  457. * anywhere in the tree other than at the root; they do not need to be leaf
  458. * nodes. When reading the data out of the trie this routine reads it in-order,
  459. * but it puts the child names and offsets directly into the child nodes. This
  460. * results in looping over the children twice during serialization and
  461. * de-serialization, but it makes the YAML representation more human readable.
  462. *
  463. * Below is an example of the graph from a "Hello World" executable:
  464. *
  465. * -------
  466. * | '' |
  467. * -------
  468. * |
  469. * -------
  470. * | '_' |
  471. * -------
  472. * |
  473. * |----------------------------------------|
  474. * | |
  475. * ------------------------ ---------------------
  476. * | '_mh_execute_header' | | 'main' |
  477. * | Flags: 0x00000000 | | Flags: 0x00000000 |
  478. * | Addr: 0x00000000 | | Addr: 0x00001160 |
  479. * ------------------------ ---------------------
  480. *
  481. * This graph represents the trie for the exports "__mh_execute_header" and
  482. * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
  483. * terminal.
  484. */
  485. const uint8_t *processExportNode(const uint8_t *CurrPtr,
  486. const uint8_t *const End,
  487. MachOYAML::ExportEntry &Entry) {
  488. if (CurrPtr >= End)
  489. return CurrPtr;
  490. unsigned Count = 0;
  491. Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
  492. CurrPtr += Count;
  493. if (Entry.TerminalSize != 0) {
  494. Entry.Flags = decodeULEB128(CurrPtr, &Count);
  495. CurrPtr += Count;
  496. if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
  497. Entry.Address = 0;
  498. Entry.Other = decodeULEB128(CurrPtr, &Count);
  499. CurrPtr += Count;
  500. Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
  501. } else {
  502. Entry.Address = decodeULEB128(CurrPtr, &Count);
  503. CurrPtr += Count;
  504. if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
  505. Entry.Other = decodeULEB128(CurrPtr, &Count);
  506. CurrPtr += Count;
  507. } else
  508. Entry.Other = 0;
  509. }
  510. }
  511. uint8_t childrenCount = *CurrPtr++;
  512. if (childrenCount == 0)
  513. return CurrPtr;
  514. Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
  515. MachOYAML::ExportEntry());
  516. for (auto &Child : Entry.Children) {
  517. Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
  518. CurrPtr += Child.Name.length() + 1;
  519. Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
  520. CurrPtr += Count;
  521. }
  522. for (auto &Child : Entry.Children) {
  523. CurrPtr = processExportNode(CurrPtr, End, Child);
  524. }
  525. return CurrPtr;
  526. }
  527. void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
  528. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  529. // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE
  530. auto ExportsTrie = Obj.getDyldInfoExportsTrie();
  531. if (ExportsTrie.empty())
  532. ExportsTrie = Obj.getDyldExportsTrie();
  533. processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
  534. }
  535. template <typename nlist_t>
  536. MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
  537. MachOYAML::NListEntry NL;
  538. NL.n_strx = nlist.n_strx;
  539. NL.n_type = nlist.n_type;
  540. NL.n_sect = nlist.n_sect;
  541. NL.n_desc = nlist.n_desc;
  542. NL.n_value = nlist.n_value;
  543. return NL;
  544. }
  545. void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
  546. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  547. for (auto Symbol : Obj.symbols()) {
  548. MachOYAML::NListEntry NLE =
  549. Obj.is64Bit()
  550. ? constructNameList<MachO::nlist_64>(
  551. Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
  552. : constructNameList<MachO::nlist>(
  553. Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
  554. LEData.NameList.push_back(NLE);
  555. }
  556. StringRef RemainingTable = Obj.getStringTableData();
  557. while (RemainingTable.size() > 0) {
  558. auto SymbolPair = RemainingTable.split('\0');
  559. RemainingTable = SymbolPair.second;
  560. LEData.StringTable.push_back(SymbolPair.first);
  561. }
  562. }
  563. void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
  564. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  565. MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
  566. for (unsigned i = 0; i < DLC.nindirectsyms; ++i)
  567. LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i));
  568. }
  569. void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) {
  570. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  571. for (const auto &LC : Y->LoadCommands) {
  572. if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) {
  573. const MachO::linkedit_data_command &DC =
  574. LC.Data.linkedit_data_command_data;
  575. if (DC.dataoff) {
  576. assert(DC.dataoff < Obj.getData().size());
  577. assert(DC.dataoff + DC.datasize <= Obj.getData().size());
  578. const char *Bytes = Obj.getData().data() + DC.dataoff;
  579. for (size_t Idx = 0; Idx < DC.datasize; Idx++) {
  580. LEData.ChainedFixups.push_back(Bytes[Idx]);
  581. }
  582. }
  583. break;
  584. }
  585. }
  586. }
  587. void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) {
  588. MachOYAML::LinkEditData &LEData = Y->LinkEdit;
  589. MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand();
  590. uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry);
  591. for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) {
  592. MachO::data_in_code_entry DICE =
  593. Obj.getDataInCodeTableEntry(DIC.dataoff, Idx);
  594. MachOYAML::DataInCodeEntry Entry{DICE.offset, DICE.length, DICE.kind};
  595. LEData.DataInCode.emplace_back(Entry);
  596. }
  597. }
  598. Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
  599. unsigned RawSegments) {
  600. std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
  601. MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
  602. Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
  603. if (!YAML)
  604. return YAML.takeError();
  605. yaml::YamlObjectFile YAMLFile;
  606. YAMLFile.MachO = std::move(YAML.get());
  607. yaml::Output Yout(Out);
  608. Yout << YAMLFile;
  609. return Error::success();
  610. }
  611. Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
  612. unsigned RawSegments) {
  613. yaml::YamlObjectFile YAMLFile;
  614. YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
  615. MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
  616. YAML.Header.magic = Obj.getMagic();
  617. YAML.Header.nfat_arch = Obj.getNumberOfObjects();
  618. for (auto Slice : Obj.objects()) {
  619. MachOYAML::FatArch arch;
  620. arch.cputype = Slice.getCPUType();
  621. arch.cpusubtype = Slice.getCPUSubType();
  622. arch.offset = Slice.getOffset();
  623. arch.size = Slice.getSize();
  624. arch.align = Slice.getAlign();
  625. arch.reserved = Slice.getReserved();
  626. YAML.FatArchs.push_back(arch);
  627. auto SliceObj = Slice.getAsObjectFile();
  628. if (!SliceObj)
  629. return SliceObj.takeError();
  630. std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
  631. MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
  632. Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
  633. if (!YAMLObj)
  634. return YAMLObj.takeError();
  635. YAML.Slices.push_back(*YAMLObj.get());
  636. }
  637. yaml::Output Yout(Out);
  638. Yout << YAML;
  639. return Error::success();
  640. }
  641. Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
  642. unsigned RawSegments) {
  643. if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
  644. return macho2yaml(Out, *MachOObj, RawSegments);
  645. if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
  646. return macho2yaml(Out, *MachOObj, RawSegments);
  647. llvm_unreachable("unexpected Mach-O file format");
  648. }