MachOReader.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. //===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "MachOReader.h"
  9. #include "MachOObject.h"
  10. #include "llvm/BinaryFormat/MachO.h"
  11. #include "llvm/Object/MachO.h"
  12. #include "llvm/Support/Errc.h"
  13. #include <memory>
  14. using namespace llvm;
  15. using namespace llvm::objcopy;
  16. using namespace llvm::objcopy::macho;
  17. void MachOReader::readHeader(Object &O) const {
  18. O.Header.Magic = MachOObj.getHeader().magic;
  19. O.Header.CPUType = MachOObj.getHeader().cputype;
  20. O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
  21. O.Header.FileType = MachOObj.getHeader().filetype;
  22. O.Header.NCmds = MachOObj.getHeader().ncmds;
  23. O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
  24. O.Header.Flags = MachOObj.getHeader().flags;
  25. }
  26. template <typename SectionType>
  27. static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
  28. StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
  29. StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
  30. Section S(SegName, SectName);
  31. S.Index = Index;
  32. S.Addr = Sec.addr;
  33. S.Size = Sec.size;
  34. S.OriginalOffset = Sec.offset;
  35. S.Align = Sec.align;
  36. S.RelOff = Sec.reloff;
  37. S.NReloc = Sec.nreloc;
  38. S.Flags = Sec.flags;
  39. S.Reserved1 = Sec.reserved1;
  40. S.Reserved2 = Sec.reserved2;
  41. S.Reserved3 = 0;
  42. return S;
  43. }
  44. Section constructSection(const MachO::section &Sec, uint32_t Index) {
  45. return constructSectionCommon(Sec, Index);
  46. }
  47. Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
  48. Section S = constructSectionCommon(Sec, Index);
  49. S.Reserved3 = Sec.reserved3;
  50. return S;
  51. }
  52. template <typename SectionType, typename SegmentType>
  53. Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
  54. const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
  55. const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
  56. std::vector<std::unique_ptr<Section>> Sections;
  57. for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
  58. sizeof(SegmentType)),
  59. End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
  60. LoadCmd.C.cmdsize);
  61. Curr < End; ++Curr) {
  62. SectionType Sec;
  63. memcpy((void *)&Sec, Curr, sizeof(SectionType));
  64. if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
  65. MachO::swapStruct(Sec);
  66. Sections.push_back(
  67. std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
  68. Section &S = *Sections.back();
  69. Expected<object::SectionRef> SecRef =
  70. MachOObj.getSection(NextSectionIndex++);
  71. if (!SecRef)
  72. return SecRef.takeError();
  73. Expected<ArrayRef<uint8_t>> Data =
  74. MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
  75. if (!Data)
  76. return Data.takeError();
  77. S.Content =
  78. StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
  79. const uint32_t CPUType = MachOObj.getHeader().cputype;
  80. S.Relocations.reserve(S.NReloc);
  81. for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
  82. RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
  83. RI != RE; ++RI) {
  84. RelocationInfo R;
  85. R.Symbol = nullptr; // We'll fill this field later.
  86. R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
  87. R.Scattered = MachOObj.isRelocationScattered(R.Info);
  88. unsigned Type = MachOObj.getAnyRelocationType(R.Info);
  89. // TODO Support CPU_TYPE_ARM.
  90. R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
  91. Type == MachO::ARM64_RELOC_ADDEND);
  92. R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);
  93. S.Relocations.push_back(R);
  94. }
  95. assert(S.NReloc == S.Relocations.size() &&
  96. "Incorrect number of relocations");
  97. }
  98. return std::move(Sections);
  99. }
  100. Error MachOReader::readLoadCommands(Object &O) const {
  101. // For MachO sections indices start from 1.
  102. uint32_t NextSectionIndex = 1;
  103. static constexpr char TextSegmentName[] = "__TEXT";
  104. for (auto LoadCmd : MachOObj.load_commands()) {
  105. LoadCommand LC;
  106. switch (LoadCmd.C.cmd) {
  107. case MachO::LC_CODE_SIGNATURE:
  108. O.CodeSignatureCommandIndex = O.LoadCommands.size();
  109. break;
  110. case MachO::LC_SEGMENT:
  111. // LoadCmd.Ptr might not be aligned temporarily as
  112. // MachO::segment_command requires, but the segname char pointer do not
  113. // have alignment restrictions.
  114. if (StringRef(reinterpret_cast<const char *>(
  115. LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
  116. TextSegmentName)
  117. O.TextSegmentCommandIndex = O.LoadCommands.size();
  118. if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
  119. extractSections<MachO::section, MachO::segment_command>(
  120. LoadCmd, MachOObj, NextSectionIndex))
  121. LC.Sections = std::move(*Sections);
  122. else
  123. return Sections.takeError();
  124. break;
  125. case MachO::LC_SEGMENT_64:
  126. // LoadCmd.Ptr might not be aligned temporarily as
  127. // MachO::segment_command_64 requires, but the segname char pointer do
  128. // not have alignment restrictions.
  129. if (StringRef(reinterpret_cast<const char *>(
  130. LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
  131. TextSegmentName)
  132. O.TextSegmentCommandIndex = O.LoadCommands.size();
  133. if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
  134. extractSections<MachO::section_64, MachO::segment_command_64>(
  135. LoadCmd, MachOObj, NextSectionIndex))
  136. LC.Sections = std::move(*Sections);
  137. else
  138. return Sections.takeError();
  139. break;
  140. case MachO::LC_SYMTAB:
  141. O.SymTabCommandIndex = O.LoadCommands.size();
  142. break;
  143. case MachO::LC_DYSYMTAB:
  144. O.DySymTabCommandIndex = O.LoadCommands.size();
  145. break;
  146. case MachO::LC_DYLD_INFO:
  147. case MachO::LC_DYLD_INFO_ONLY:
  148. O.DyLdInfoCommandIndex = O.LoadCommands.size();
  149. break;
  150. case MachO::LC_DATA_IN_CODE:
  151. O.DataInCodeCommandIndex = O.LoadCommands.size();
  152. break;
  153. case MachO::LC_LINKER_OPTIMIZATION_HINT:
  154. O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
  155. break;
  156. case MachO::LC_FUNCTION_STARTS:
  157. O.FunctionStartsCommandIndex = O.LoadCommands.size();
  158. break;
  159. case MachO::LC_DYLIB_CODE_SIGN_DRS:
  160. O.DylibCodeSignDRsIndex = O.LoadCommands.size();
  161. break;
  162. case MachO::LC_DYLD_EXPORTS_TRIE:
  163. O.ExportsTrieCommandIndex = O.LoadCommands.size();
  164. break;
  165. case MachO::LC_DYLD_CHAINED_FIXUPS:
  166. O.ChainedFixupsCommandIndex = O.LoadCommands.size();
  167. break;
  168. }
  169. #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
  170. case MachO::LCName: \
  171. memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \
  172. sizeof(MachO::LCStruct)); \
  173. if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \
  174. MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \
  175. if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \
  176. LC.Payload = ArrayRef<uint8_t>( \
  177. reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \
  178. sizeof(MachO::LCStruct), \
  179. LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \
  180. break;
  181. switch (LoadCmd.C.cmd) {
  182. default:
  183. memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
  184. sizeof(MachO::load_command));
  185. if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
  186. MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
  187. if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
  188. LC.Payload = ArrayRef<uint8_t>(
  189. reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
  190. sizeof(MachO::load_command),
  191. LoadCmd.C.cmdsize - sizeof(MachO::load_command));
  192. break;
  193. #include "llvm/BinaryFormat/MachO.def"
  194. }
  195. O.LoadCommands.push_back(std::move(LC));
  196. }
  197. return Error::success();
  198. }
  199. template <typename nlist_t>
  200. SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
  201. assert(nlist.n_strx < StrTable.size() &&
  202. "n_strx exceeds the size of the string table");
  203. SymbolEntry SE;
  204. SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
  205. SE.n_type = nlist.n_type;
  206. SE.n_sect = nlist.n_sect;
  207. SE.n_desc = nlist.n_desc;
  208. SE.n_value = nlist.n_value;
  209. return SE;
  210. }
  211. void MachOReader::readSymbolTable(Object &O) const {
  212. StringRef StrTable = MachOObj.getStringTableData();
  213. for (auto Symbol : MachOObj.symbols()) {
  214. SymbolEntry SE =
  215. (MachOObj.is64Bit()
  216. ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
  217. Symbol.getRawDataRefImpl()))
  218. : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
  219. Symbol.getRawDataRefImpl())));
  220. O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
  221. }
  222. }
  223. void MachOReader::setSymbolInRelocationInfo(Object &O) const {
  224. std::vector<const Section *> Sections;
  225. for (auto &LC : O.LoadCommands)
  226. for (std::unique_ptr<Section> &Sec : LC.Sections)
  227. Sections.push_back(Sec.get());
  228. for (LoadCommand &LC : O.LoadCommands)
  229. for (std::unique_ptr<Section> &Sec : LC.Sections)
  230. for (auto &Reloc : Sec->Relocations)
  231. if (!Reloc.Scattered && !Reloc.IsAddend) {
  232. const uint32_t SymbolNum =
  233. Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());
  234. if (Reloc.Extern) {
  235. Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);
  236. } else {
  237. // FIXME: Refactor error handling in MachOReader and report an error
  238. // if we encounter an invalid relocation.
  239. assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
  240. "Invalid section index.");
  241. Reloc.Sec = Sections[SymbolNum - 1];
  242. }
  243. }
  244. }
  245. void MachOReader::readRebaseInfo(Object &O) const {
  246. O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
  247. }
  248. void MachOReader::readBindInfo(Object &O) const {
  249. O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
  250. }
  251. void MachOReader::readWeakBindInfo(Object &O) const {
  252. O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
  253. }
  254. void MachOReader::readLazyBindInfo(Object &O) const {
  255. O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
  256. }
  257. void MachOReader::readExportInfo(Object &O) const {
  258. // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE
  259. ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();
  260. if (Trie.empty())
  261. Trie = MachOObj.getDyldExportsTrie();
  262. O.Exports.Trie = Trie;
  263. }
  264. void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,
  265. LinkData &LD) const {
  266. if (!LCIndex)
  267. return;
  268. const MachO::linkedit_data_command &LC =
  269. O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
  270. LD.Data =
  271. arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
  272. }
  273. void MachOReader::readDataInCodeData(Object &O) const {
  274. return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
  275. }
  276. void MachOReader::readLinkerOptimizationHint(Object &O) const {
  277. return readLinkData(O, O.LinkerOptimizationHintCommandIndex,
  278. O.LinkerOptimizationHint);
  279. }
  280. void MachOReader::readFunctionStartsData(Object &O) const {
  281. return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
  282. }
  283. void MachOReader::readDylibCodeSignDRs(Object &O) const {
  284. return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs);
  285. }
  286. void MachOReader::readExportsTrie(Object &O) const {
  287. return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
  288. }
  289. void MachOReader::readChainedFixups(Object &O) const {
  290. return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
  291. }
  292. void MachOReader::readIndirectSymbolTable(Object &O) const {
  293. MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
  294. constexpr uint32_t AbsOrLocalMask =
  295. MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
  296. for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
  297. uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
  298. if ((Index & AbsOrLocalMask) != 0)
  299. O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt);
  300. else
  301. O.IndirectSymTable.Symbols.emplace_back(
  302. Index, O.SymTable.getSymbolByIndex(Index));
  303. }
  304. }
  305. void MachOReader::readSwiftVersion(Object &O) const {
  306. struct ObjCImageInfo {
  307. uint32_t Version;
  308. uint32_t Flags;
  309. } ImageInfo;
  310. for (const LoadCommand &LC : O.LoadCommands)
  311. for (const std::unique_ptr<Section> &Sec : LC.Sections)
  312. if (Sec->Sectname == "__objc_imageinfo" &&
  313. (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
  314. Sec->Segname == "__DATA_DIRTY") &&
  315. Sec->Content.size() >= sizeof(ObjCImageInfo)) {
  316. memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));
  317. if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
  318. sys::swapByteOrder(ImageInfo.Version);
  319. sys::swapByteOrder(ImageInfo.Flags);
  320. }
  321. O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
  322. return;
  323. }
  324. }
  325. Expected<std::unique_ptr<Object>> MachOReader::create() const {
  326. auto Obj = std::make_unique<Object>();
  327. readHeader(*Obj);
  328. if (Error E = readLoadCommands(*Obj))
  329. return std::move(E);
  330. readSymbolTable(*Obj);
  331. setSymbolInRelocationInfo(*Obj);
  332. readRebaseInfo(*Obj);
  333. readBindInfo(*Obj);
  334. readWeakBindInfo(*Obj);
  335. readLazyBindInfo(*Obj);
  336. readExportInfo(*Obj);
  337. readDataInCodeData(*Obj);
  338. readLinkerOptimizationHint(*Obj);
  339. readFunctionStartsData(*Obj);
  340. readDylibCodeSignDRs(*Obj);
  341. readExportsTrie(*Obj);
  342. readChainedFixups(*Obj);
  343. readIndirectSymbolTable(*Obj);
  344. readSwiftVersion(*Obj);
  345. return std::move(Obj);
  346. }