MachOLayoutBuilder.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "MachOLayoutBuilder.h"
  9. #include "llvm/Support/Alignment.h"
  10. #include "llvm/Support/Errc.h"
  11. #include "llvm/Support/ErrorHandling.h"
  12. using namespace llvm;
  13. using namespace llvm::objcopy::macho;
  14. StringTableBuilder::Kind
  15. MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
  16. if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
  17. return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
  18. return Is64Bit ? StringTableBuilder::MachO64Linked
  19. : StringTableBuilder::MachOLinked;
  20. }
  21. uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
  22. uint32_t Size = 0;
  23. for (const LoadCommand &LC : O.LoadCommands) {
  24. const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
  25. auto cmd = MLC.load_command_data.cmd;
  26. switch (cmd) {
  27. case MachO::LC_SEGMENT:
  28. Size += sizeof(MachO::segment_command) +
  29. sizeof(MachO::section) * LC.Sections.size();
  30. continue;
  31. case MachO::LC_SEGMENT_64:
  32. Size += sizeof(MachO::segment_command_64) +
  33. sizeof(MachO::section_64) * LC.Sections.size();
  34. continue;
  35. }
  36. switch (cmd) {
  37. #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
  38. case MachO::LCName: \
  39. Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
  40. break;
  41. #include "llvm/BinaryFormat/MachO.def"
  42. #undef HANDLE_LOAD_COMMAND
  43. }
  44. }
  45. return Size;
  46. }
  47. void MachOLayoutBuilder::constructStringTable() {
  48. for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
  49. StrTableBuilder.add(Sym->Name);
  50. StrTableBuilder.finalize();
  51. }
  52. void MachOLayoutBuilder::updateSymbolIndexes() {
  53. uint32_t Index = 0;
  54. for (auto &Symbol : O.SymTable.Symbols)
  55. Symbol->Index = Index++;
  56. }
  57. // Updates the index and the number of local/external/undefined symbols.
  58. void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
  59. assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
  60. // Make sure that nlist entries in the symbol table are sorted by the those
  61. // types. The order is: local < defined external < undefined external.
  62. assert(llvm::is_sorted(O.SymTable.Symbols,
  63. [](const std::unique_ptr<SymbolEntry> &A,
  64. const std::unique_ptr<SymbolEntry> &B) {
  65. bool AL = A->isLocalSymbol(),
  66. BL = B->isLocalSymbol();
  67. if (AL != BL)
  68. return AL;
  69. return !AL && !A->isUndefinedSymbol() &&
  70. B->isUndefinedSymbol();
  71. }) &&
  72. "Symbols are not sorted by their types.");
  73. uint32_t NumLocalSymbols = 0;
  74. auto Iter = O.SymTable.Symbols.begin();
  75. auto End = O.SymTable.Symbols.end();
  76. for (; Iter != End; ++Iter) {
  77. if ((*Iter)->isExternalSymbol())
  78. break;
  79. ++NumLocalSymbols;
  80. }
  81. uint32_t NumExtDefSymbols = 0;
  82. for (; Iter != End; ++Iter) {
  83. if ((*Iter)->isUndefinedSymbol())
  84. break;
  85. ++NumExtDefSymbols;
  86. }
  87. MLC.dysymtab_command_data.ilocalsym = 0;
  88. MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
  89. MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
  90. MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
  91. MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
  92. MLC.dysymtab_command_data.nundefsym =
  93. O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
  94. }
  95. // Recomputes and updates offset and size fields in load commands and sections
  96. // since they could be modified.
  97. uint64_t MachOLayoutBuilder::layoutSegments() {
  98. auto HeaderSize =
  99. Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
  100. const bool IsObjectFile =
  101. O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
  102. uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
  103. for (LoadCommand &LC : O.LoadCommands) {
  104. auto &MLC = LC.MachOLoadCommand;
  105. StringRef Segname;
  106. uint64_t SegmentVmAddr;
  107. uint64_t SegmentVmSize;
  108. switch (MLC.load_command_data.cmd) {
  109. case MachO::LC_SEGMENT:
  110. SegmentVmAddr = MLC.segment_command_data.vmaddr;
  111. SegmentVmSize = MLC.segment_command_data.vmsize;
  112. Segname = StringRef(MLC.segment_command_data.segname,
  113. strnlen(MLC.segment_command_data.segname,
  114. sizeof(MLC.segment_command_data.segname)));
  115. break;
  116. case MachO::LC_SEGMENT_64:
  117. SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
  118. SegmentVmSize = MLC.segment_command_64_data.vmsize;
  119. Segname = StringRef(MLC.segment_command_64_data.segname,
  120. strnlen(MLC.segment_command_64_data.segname,
  121. sizeof(MLC.segment_command_64_data.segname)));
  122. break;
  123. default:
  124. continue;
  125. }
  126. if (Segname == "__LINKEDIT") {
  127. // We update the __LINKEDIT segment later (in layoutTail).
  128. assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
  129. LinkEditLoadCommand = &MLC;
  130. continue;
  131. }
  132. // Update file offsets and sizes of sections.
  133. uint64_t SegOffset = Offset;
  134. uint64_t SegFileSize = 0;
  135. uint64_t VMSize = 0;
  136. for (std::unique_ptr<Section> &Sec : LC.Sections) {
  137. assert(SegmentVmAddr <= Sec->Addr &&
  138. "Section's address cannot be smaller than Segment's one");
  139. uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
  140. if (IsObjectFile) {
  141. if (!Sec->hasValidOffset()) {
  142. Sec->Offset = 0;
  143. } else {
  144. uint64_t PaddingSize =
  145. offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
  146. Sec->Offset = SegOffset + SegFileSize + PaddingSize;
  147. Sec->Size = Sec->Content.size();
  148. SegFileSize += PaddingSize + Sec->Size;
  149. }
  150. } else {
  151. if (!Sec->hasValidOffset()) {
  152. Sec->Offset = 0;
  153. } else {
  154. Sec->Offset = SegOffset + SectOffset;
  155. Sec->Size = Sec->Content.size();
  156. SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
  157. }
  158. }
  159. VMSize = std::max(VMSize, SectOffset + Sec->Size);
  160. }
  161. if (IsObjectFile) {
  162. Offset += SegFileSize;
  163. } else {
  164. Offset = alignTo(Offset + SegFileSize, PageSize);
  165. SegFileSize = alignTo(SegFileSize, PageSize);
  166. // Use the original vmsize if the segment is __PAGEZERO.
  167. VMSize =
  168. Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
  169. }
  170. switch (MLC.load_command_data.cmd) {
  171. case MachO::LC_SEGMENT:
  172. MLC.segment_command_data.cmdsize =
  173. sizeof(MachO::segment_command) +
  174. sizeof(MachO::section) * LC.Sections.size();
  175. MLC.segment_command_data.nsects = LC.Sections.size();
  176. MLC.segment_command_data.fileoff = SegOffset;
  177. MLC.segment_command_data.vmsize = VMSize;
  178. MLC.segment_command_data.filesize = SegFileSize;
  179. break;
  180. case MachO::LC_SEGMENT_64:
  181. MLC.segment_command_64_data.cmdsize =
  182. sizeof(MachO::segment_command_64) +
  183. sizeof(MachO::section_64) * LC.Sections.size();
  184. MLC.segment_command_64_data.nsects = LC.Sections.size();
  185. MLC.segment_command_64_data.fileoff = SegOffset;
  186. MLC.segment_command_64_data.vmsize = VMSize;
  187. MLC.segment_command_64_data.filesize = SegFileSize;
  188. break;
  189. }
  190. }
  191. return Offset;
  192. }
  193. uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
  194. for (LoadCommand &LC : O.LoadCommands)
  195. for (std::unique_ptr<Section> &Sec : LC.Sections) {
  196. Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
  197. Sec->NReloc = Sec->Relocations.size();
  198. Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
  199. }
  200. return Offset;
  201. }
  202. Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
  203. // If we are building the layout of an executable or dynamic library
  204. // which does not have any segments other than __LINKEDIT,
  205. // the Offset can be equal to zero by this time. It happens because of the
  206. // convention that in such cases the file offsets specified by LC_SEGMENT
  207. // start with zero (unlike the case of a relocatable object file).
  208. const uint64_t HeaderSize =
  209. Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
  210. assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
  211. Offset >= HeaderSize + O.Header.SizeOfCmds) &&
  212. "Incorrect tail offset");
  213. Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
  214. // The order of LINKEDIT elements is as follows:
  215. // rebase info, binding info, weak binding info, lazy binding info, export
  216. // trie, data-in-code, symbol table, indirect symbol table, symbol table
  217. // strings, code signature.
  218. uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
  219. uint64_t StartOfLinkEdit = Offset;
  220. uint64_t StartOfRebaseInfo = StartOfLinkEdit;
  221. uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
  222. uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
  223. uint64_t StartOfLazyBindingInfo =
  224. StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
  225. uint64_t StartOfExportTrie =
  226. StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
  227. uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
  228. uint64_t StartOfDyldExportsTrie =
  229. StartOfFunctionStarts + O.FunctionStarts.Data.size();
  230. uint64_t StartOfChainedFixups =
  231. StartOfDyldExportsTrie + O.ExportsTrie.Data.size();
  232. uint64_t StartOfDataInCode =
  233. StartOfChainedFixups + O.ChainedFixups.Data.size();
  234. uint64_t StartOfLinkerOptimizationHint =
  235. StartOfDataInCode + O.DataInCode.Data.size();
  236. uint64_t StartOfSymbols =
  237. StartOfLinkerOptimizationHint + O.LinkerOptimizationHint.Data.size();
  238. uint64_t StartOfIndirectSymbols =
  239. StartOfSymbols + NListSize * O.SymTable.Symbols.size();
  240. uint64_t StartOfSymbolStrings =
  241. StartOfIndirectSymbols +
  242. sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
  243. uint64_t StartOfCodeSignature =
  244. StartOfSymbolStrings + StrTableBuilder.getSize();
  245. uint32_t CodeSignatureSize = 0;
  246. if (O.CodeSignatureCommandIndex) {
  247. StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
  248. // Note: These calculations are to be kept in sync with the same
  249. // calculations performed in LLD's CodeSignatureSection.
  250. const uint32_t AllHeadersSize =
  251. alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
  252. CodeSignature.Align);
  253. const uint32_t BlockCount =
  254. (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
  255. CodeSignature.BlockSize;
  256. const uint32_t Size =
  257. alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
  258. CodeSignature.Align);
  259. CodeSignature.StartOffset = StartOfCodeSignature;
  260. CodeSignature.AllHeadersSize = AllHeadersSize;
  261. CodeSignature.BlockCount = BlockCount;
  262. CodeSignature.OutputFileName = OutputFileName;
  263. CodeSignature.Size = Size;
  264. CodeSignatureSize = Size;
  265. }
  266. uint64_t LinkEditSize =
  267. StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
  268. // Now we have determined the layout of the contents of the __LINKEDIT
  269. // segment. Update its load command.
  270. if (LinkEditLoadCommand) {
  271. MachO::macho_load_command *MLC = LinkEditLoadCommand;
  272. switch (LinkEditLoadCommand->load_command_data.cmd) {
  273. case MachO::LC_SEGMENT:
  274. MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
  275. MLC->segment_command_data.fileoff = StartOfLinkEdit;
  276. MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
  277. MLC->segment_command_data.filesize = LinkEditSize;
  278. break;
  279. case MachO::LC_SEGMENT_64:
  280. MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
  281. MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
  282. MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
  283. MLC->segment_command_64_data.filesize = LinkEditSize;
  284. break;
  285. }
  286. }
  287. for (LoadCommand &LC : O.LoadCommands) {
  288. auto &MLC = LC.MachOLoadCommand;
  289. auto cmd = MLC.load_command_data.cmd;
  290. switch (cmd) {
  291. case MachO::LC_CODE_SIGNATURE:
  292. MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
  293. MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
  294. break;
  295. case MachO::LC_SYMTAB:
  296. MLC.symtab_command_data.symoff = StartOfSymbols;
  297. MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
  298. MLC.symtab_command_data.stroff = StartOfSymbolStrings;
  299. MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
  300. break;
  301. case MachO::LC_DYSYMTAB: {
  302. if (MLC.dysymtab_command_data.ntoc != 0 ||
  303. MLC.dysymtab_command_data.nmodtab != 0 ||
  304. MLC.dysymtab_command_data.nextrefsyms != 0 ||
  305. MLC.dysymtab_command_data.nlocrel != 0 ||
  306. MLC.dysymtab_command_data.nextrel != 0)
  307. return createStringError(llvm::errc::not_supported,
  308. "shared library is not yet supported");
  309. if (!O.IndirectSymTable.Symbols.empty()) {
  310. MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
  311. MLC.dysymtab_command_data.nindirectsyms =
  312. O.IndirectSymTable.Symbols.size();
  313. }
  314. updateDySymTab(MLC);
  315. break;
  316. }
  317. case MachO::LC_DATA_IN_CODE:
  318. MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
  319. MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
  320. break;
  321. case MachO::LC_LINKER_OPTIMIZATION_HINT:
  322. MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint;
  323. MLC.linkedit_data_command_data.datasize =
  324. O.LinkerOptimizationHint.Data.size();
  325. break;
  326. case MachO::LC_FUNCTION_STARTS:
  327. MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
  328. MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
  329. break;
  330. case MachO::LC_DYLD_CHAINED_FIXUPS:
  331. MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
  332. MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
  333. break;
  334. case MachO::LC_DYLD_EXPORTS_TRIE:
  335. MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
  336. MLC.linkedit_data_command_data.datasize = O.ExportsTrie.Data.size();
  337. break;
  338. case MachO::LC_DYLD_INFO:
  339. case MachO::LC_DYLD_INFO_ONLY:
  340. MLC.dyld_info_command_data.rebase_off =
  341. O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
  342. MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
  343. MLC.dyld_info_command_data.bind_off =
  344. O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
  345. MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
  346. MLC.dyld_info_command_data.weak_bind_off =
  347. O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
  348. MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
  349. MLC.dyld_info_command_data.lazy_bind_off =
  350. O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
  351. MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
  352. MLC.dyld_info_command_data.export_off =
  353. O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
  354. MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
  355. break;
  356. // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
  357. // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
  358. // relative virtual address. At the moment modification of the __TEXT
  359. // segment of executables isn't supported anyway (e.g. data in code entries
  360. // are not recalculated). Moreover, in general
  361. // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
  362. // without making additional assumptions (e.g. that the entire __TEXT
  363. // segment should be encrypted) we do not know how to recalculate the
  364. // boundaries of the encrypted part. For now just copy over these load
  365. // commands until we encounter a real world usecase where
  366. // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
  367. case MachO::LC_ENCRYPTION_INFO:
  368. case MachO::LC_ENCRYPTION_INFO_64:
  369. case MachO::LC_LOAD_DYLINKER:
  370. case MachO::LC_MAIN:
  371. case MachO::LC_RPATH:
  372. case MachO::LC_SEGMENT:
  373. case MachO::LC_SEGMENT_64:
  374. case MachO::LC_VERSION_MIN_MACOSX:
  375. case MachO::LC_VERSION_MIN_IPHONEOS:
  376. case MachO::LC_VERSION_MIN_TVOS:
  377. case MachO::LC_VERSION_MIN_WATCHOS:
  378. case MachO::LC_BUILD_VERSION:
  379. case MachO::LC_ID_DYLIB:
  380. case MachO::LC_LOAD_DYLIB:
  381. case MachO::LC_LOAD_WEAK_DYLIB:
  382. case MachO::LC_UUID:
  383. case MachO::LC_SOURCE_VERSION:
  384. case MachO::LC_THREAD:
  385. case MachO::LC_UNIXTHREAD:
  386. case MachO::LC_SUB_FRAMEWORK:
  387. case MachO::LC_SUB_UMBRELLA:
  388. case MachO::LC_SUB_CLIENT:
  389. case MachO::LC_SUB_LIBRARY:
  390. case MachO::LC_LINKER_OPTION:
  391. // Nothing to update.
  392. break;
  393. default:
  394. // Abort if it's unsupported in order to prevent corrupting the object.
  395. return createStringError(llvm::errc::not_supported,
  396. "unsupported load command (cmd=0x%x)", cmd);
  397. }
  398. }
  399. return Error::success();
  400. }
  401. Error MachOLayoutBuilder::layout() {
  402. O.Header.NCmds = O.LoadCommands.size();
  403. O.Header.SizeOfCmds = computeSizeOfCmds();
  404. constructStringTable();
  405. updateSymbolIndexes();
  406. uint64_t Offset = layoutSegments();
  407. Offset = layoutRelocations(Offset);
  408. return layoutTail(Offset);
  409. }