Archive.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991
  1. //===- Archive.cpp - ar File Format implementation ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the ArchiveObjectFile class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Object/Archive.h"
  13. #include "llvm/ADT/Optional.h"
  14. #include "llvm/ADT/SmallString.h"
  15. #include "llvm/ADT/StringRef.h"
  16. #include "llvm/ADT/Twine.h"
  17. #include "llvm/Object/Binary.h"
  18. #include "llvm/Object/Error.h"
  19. #include "llvm/Support/Chrono.h"
  20. #include "llvm/Support/Endian.h"
  21. #include "llvm/Support/Error.h"
  22. #include "llvm/Support/ErrorOr.h"
  23. #include "llvm/Support/FileSystem.h"
  24. #include "llvm/Support/MemoryBuffer.h"
  25. #include "llvm/Support/Path.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include <algorithm>
  28. #include <cassert>
  29. #include <cstddef>
  30. #include <cstdint>
  31. #include <cstring>
  32. #include <memory>
  33. #include <string>
  34. #include <system_error>
  35. using namespace llvm;
  36. using namespace object;
  37. using namespace llvm::support::endian;
  38. const char Magic[] = "!<arch>\n";
  39. const char ThinMagic[] = "!<thin>\n";
  40. void Archive::anchor() {}
  41. static Error
  42. malformedError(Twine Msg) {
  43. std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
  44. return make_error<GenericBinaryError>(std::move(StringMsg),
  45. object_error::parse_failed);
  46. }
  47. ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
  48. const char *RawHeaderPtr,
  49. uint64_t Size, Error *Err)
  50. : Parent(Parent),
  51. ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
  52. if (RawHeaderPtr == nullptr)
  53. return;
  54. ErrorAsOutParameter ErrAsOutParam(Err);
  55. if (Size < sizeof(ArMemHdrType)) {
  56. if (Err) {
  57. std::string Msg("remaining size of archive too small for next archive "
  58. "member header ");
  59. Expected<StringRef> NameOrErr = getName(Size);
  60. if (!NameOrErr) {
  61. consumeError(NameOrErr.takeError());
  62. uint64_t Offset = RawHeaderPtr - Parent->getData().data();
  63. *Err = malformedError(Msg + "at offset " + Twine(Offset));
  64. } else
  65. *Err = malformedError(Msg + "for " + NameOrErr.get());
  66. }
  67. return;
  68. }
  69. if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
  70. if (Err) {
  71. std::string Buf;
  72. raw_string_ostream OS(Buf);
  73. OS.write_escaped(StringRef(ArMemHdr->Terminator,
  74. sizeof(ArMemHdr->Terminator)));
  75. OS.flush();
  76. std::string Msg("terminator characters in archive member \"" + Buf +
  77. "\" not the correct \"`\\n\" values for the archive "
  78. "member header ");
  79. Expected<StringRef> NameOrErr = getName(Size);
  80. if (!NameOrErr) {
  81. consumeError(NameOrErr.takeError());
  82. uint64_t Offset = RawHeaderPtr - Parent->getData().data();
  83. *Err = malformedError(Msg + "at offset " + Twine(Offset));
  84. } else
  85. *Err = malformedError(Msg + "for " + NameOrErr.get());
  86. }
  87. return;
  88. }
  89. }
  90. // This gets the raw name from the ArMemHdr->Name field and checks that it is
  91. // valid for the kind of archive. If it is not valid it returns an Error.
  92. Expected<StringRef> ArchiveMemberHeader::getRawName() const {
  93. char EndCond;
  94. auto Kind = Parent->kind();
  95. if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
  96. if (ArMemHdr->Name[0] == ' ') {
  97. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  98. Parent->getData().data();
  99. return malformedError("name contains a leading space for archive member "
  100. "header at offset " + Twine(Offset));
  101. }
  102. EndCond = ' ';
  103. }
  104. else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
  105. EndCond = ' ';
  106. else
  107. EndCond = '/';
  108. StringRef::size_type end =
  109. StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
  110. if (end == StringRef::npos)
  111. end = sizeof(ArMemHdr->Name);
  112. assert(end <= sizeof(ArMemHdr->Name) && end > 0);
  113. // Don't include the EndCond if there is one.
  114. return StringRef(ArMemHdr->Name, end);
  115. }
  116. // This gets the name looking up long names. Size is the size of the archive
  117. // member including the header, so the size of any name following the header
  118. // is checked to make sure it does not overflow.
  119. Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
  120. // This can be called from the ArchiveMemberHeader constructor when the
  121. // archive header is truncated to produce an error message with the name.
  122. // Make sure the name field is not truncated.
  123. if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
  124. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  125. Parent->getData().data();
  126. return malformedError("archive header truncated before the name field "
  127. "for archive member header at offset " +
  128. Twine(ArchiveOffset));
  129. }
  130. // The raw name itself can be invalid.
  131. Expected<StringRef> NameOrErr = getRawName();
  132. if (!NameOrErr)
  133. return NameOrErr.takeError();
  134. StringRef Name = NameOrErr.get();
  135. // Check if it's a special name.
  136. if (Name[0] == '/') {
  137. if (Name.size() == 1) // Linker member.
  138. return Name;
  139. if (Name.size() == 2 && Name[1] == '/') // String table.
  140. return Name;
  141. // It's a long name.
  142. // Get the string table offset.
  143. std::size_t StringOffset;
  144. if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
  145. std::string Buf;
  146. raw_string_ostream OS(Buf);
  147. OS.write_escaped(Name.substr(1).rtrim(' '));
  148. OS.flush();
  149. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  150. Parent->getData().data();
  151. return malformedError("long name offset characters after the '/' are "
  152. "not all decimal numbers: '" + Buf + "' for "
  153. "archive member header at offset " +
  154. Twine(ArchiveOffset));
  155. }
  156. // Verify it.
  157. if (StringOffset >= Parent->getStringTable().size()) {
  158. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  159. Parent->getData().data();
  160. return malformedError("long name offset " + Twine(StringOffset) + " past "
  161. "the end of the string table for archive member "
  162. "header at offset " + Twine(ArchiveOffset));
  163. }
  164. // GNU long file names end with a "/\n".
  165. if (Parent->kind() == Archive::K_GNU ||
  166. Parent->kind() == Archive::K_GNU64) {
  167. size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
  168. if (End == StringRef::npos || End < 1 ||
  169. Parent->getStringTable()[End - 1] != '/') {
  170. return malformedError("string table at long name offset " +
  171. Twine(StringOffset) + "not terminated");
  172. }
  173. return Parent->getStringTable().slice(StringOffset, End - 1);
  174. }
  175. return Parent->getStringTable().begin() + StringOffset;
  176. }
  177. if (Name.startswith("#1/")) {
  178. uint64_t NameLength;
  179. if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
  180. std::string Buf;
  181. raw_string_ostream OS(Buf);
  182. OS.write_escaped(Name.substr(3).rtrim(' '));
  183. OS.flush();
  184. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  185. Parent->getData().data();
  186. return malformedError("long name length characters after the #1/ are "
  187. "not all decimal numbers: '" + Buf + "' for "
  188. "archive member header at offset " +
  189. Twine(ArchiveOffset));
  190. }
  191. if (getSizeOf() + NameLength > Size) {
  192. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  193. Parent->getData().data();
  194. return malformedError("long name length: " + Twine(NameLength) +
  195. " extends past the end of the member or archive "
  196. "for archive member header at offset " +
  197. Twine(ArchiveOffset));
  198. }
  199. return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
  200. NameLength).rtrim('\0');
  201. }
  202. // It is not a long name so trim the blanks at the end of the name.
  203. if (Name[Name.size() - 1] != '/')
  204. return Name.rtrim(' ');
  205. // It's a simple name.
  206. return Name.drop_back(1);
  207. }
  208. Expected<uint64_t> ArchiveMemberHeader::getSize() const {
  209. uint64_t Ret;
  210. if (StringRef(ArMemHdr->Size,
  211. sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
  212. std::string Buf;
  213. raw_string_ostream OS(Buf);
  214. OS.write_escaped(StringRef(ArMemHdr->Size,
  215. sizeof(ArMemHdr->Size)).rtrim(" "));
  216. OS.flush();
  217. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  218. Parent->getData().data();
  219. return malformedError("characters in size field in archive header are not "
  220. "all decimal numbers: '" + Buf + "' for archive "
  221. "member header at offset " + Twine(Offset));
  222. }
  223. return Ret;
  224. }
  225. Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
  226. unsigned Ret;
  227. if (StringRef(ArMemHdr->AccessMode,
  228. sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
  229. std::string Buf;
  230. raw_string_ostream OS(Buf);
  231. OS.write_escaped(StringRef(ArMemHdr->AccessMode,
  232. sizeof(ArMemHdr->AccessMode)).rtrim(" "));
  233. OS.flush();
  234. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  235. Parent->getData().data();
  236. return malformedError("characters in AccessMode field in archive header "
  237. "are not all decimal numbers: '" + Buf + "' for the "
  238. "archive member header at offset " + Twine(Offset));
  239. }
  240. return static_cast<sys::fs::perms>(Ret);
  241. }
  242. Expected<sys::TimePoint<std::chrono::seconds>>
  243. ArchiveMemberHeader::getLastModified() const {
  244. unsigned Seconds;
  245. if (StringRef(ArMemHdr->LastModified,
  246. sizeof(ArMemHdr->LastModified)).rtrim(' ')
  247. .getAsInteger(10, Seconds)) {
  248. std::string Buf;
  249. raw_string_ostream OS(Buf);
  250. OS.write_escaped(StringRef(ArMemHdr->LastModified,
  251. sizeof(ArMemHdr->LastModified)).rtrim(" "));
  252. OS.flush();
  253. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  254. Parent->getData().data();
  255. return malformedError("characters in LastModified field in archive header "
  256. "are not all decimal numbers: '" + Buf + "' for the "
  257. "archive member header at offset " + Twine(Offset));
  258. }
  259. return sys::toTimePoint(Seconds);
  260. }
  261. Expected<unsigned> ArchiveMemberHeader::getUID() const {
  262. unsigned Ret;
  263. StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
  264. if (User.empty())
  265. return 0;
  266. if (User.getAsInteger(10, Ret)) {
  267. std::string Buf;
  268. raw_string_ostream OS(Buf);
  269. OS.write_escaped(User);
  270. OS.flush();
  271. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  272. Parent->getData().data();
  273. return malformedError("characters in UID field in archive header "
  274. "are not all decimal numbers: '" + Buf + "' for the "
  275. "archive member header at offset " + Twine(Offset));
  276. }
  277. return Ret;
  278. }
  279. Expected<unsigned> ArchiveMemberHeader::getGID() const {
  280. unsigned Ret;
  281. StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
  282. if (Group.empty())
  283. return 0;
  284. if (Group.getAsInteger(10, Ret)) {
  285. std::string Buf;
  286. raw_string_ostream OS(Buf);
  287. OS.write_escaped(Group);
  288. OS.flush();
  289. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  290. Parent->getData().data();
  291. return malformedError("characters in GID field in archive header "
  292. "are not all decimal numbers: '" + Buf + "' for the "
  293. "archive member header at offset " + Twine(Offset));
  294. }
  295. return Ret;
  296. }
  297. Archive::Child::Child(const Archive *Parent, StringRef Data,
  298. uint16_t StartOfFile)
  299. : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
  300. Data(Data), StartOfFile(StartOfFile) {
  301. }
  302. Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
  303. : Parent(Parent),
  304. Header(Parent, Start,
  305. Parent
  306. ? Parent->getData().size() - (Start - Parent->getData().data())
  307. : 0, Err) {
  308. if (!Start)
  309. return;
  310. // If we are pointed to real data, Start is not a nullptr, then there must be
  311. // a non-null Err pointer available to report malformed data on. Only in
  312. // the case sentinel value is being constructed is Err is permitted to be a
  313. // nullptr.
  314. assert(Err && "Err can't be nullptr if Start is not a nullptr");
  315. ErrorAsOutParameter ErrAsOutParam(Err);
  316. // If there was an error in the construction of the Header
  317. // then just return with the error now set.
  318. if (*Err)
  319. return;
  320. uint64_t Size = Header.getSizeOf();
  321. Data = StringRef(Start, Size);
  322. Expected<bool> isThinOrErr = isThinMember();
  323. if (!isThinOrErr) {
  324. *Err = isThinOrErr.takeError();
  325. return;
  326. }
  327. bool isThin = isThinOrErr.get();
  328. if (!isThin) {
  329. Expected<uint64_t> MemberSize = getRawSize();
  330. if (!MemberSize) {
  331. *Err = MemberSize.takeError();
  332. return;
  333. }
  334. Size += MemberSize.get();
  335. Data = StringRef(Start, Size);
  336. }
  337. // Setup StartOfFile and PaddingBytes.
  338. StartOfFile = Header.getSizeOf();
  339. // Don't include attached name.
  340. Expected<StringRef> NameOrErr = getRawName();
  341. if (!NameOrErr){
  342. *Err = NameOrErr.takeError();
  343. return;
  344. }
  345. StringRef Name = NameOrErr.get();
  346. if (Name.startswith("#1/")) {
  347. uint64_t NameSize;
  348. if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
  349. std::string Buf;
  350. raw_string_ostream OS(Buf);
  351. OS.write_escaped(Name.substr(3).rtrim(' '));
  352. OS.flush();
  353. uint64_t Offset = Start - Parent->getData().data();
  354. *Err = malformedError("long name length characters after the #1/ are "
  355. "not all decimal numbers: '" + Buf + "' for "
  356. "archive member header at offset " +
  357. Twine(Offset));
  358. return;
  359. }
  360. StartOfFile += NameSize;
  361. }
  362. }
  363. Expected<uint64_t> Archive::Child::getSize() const {
  364. if (Parent->IsThin)
  365. return Header.getSize();
  366. return Data.size() - StartOfFile;
  367. }
  368. Expected<uint64_t> Archive::Child::getRawSize() const {
  369. return Header.getSize();
  370. }
  371. Expected<bool> Archive::Child::isThinMember() const {
  372. Expected<StringRef> NameOrErr = Header.getRawName();
  373. if (!NameOrErr)
  374. return NameOrErr.takeError();
  375. StringRef Name = NameOrErr.get();
  376. return Parent->IsThin && Name != "/" && Name != "//";
  377. }
  378. Expected<std::string> Archive::Child::getFullName() const {
  379. Expected<bool> isThin = isThinMember();
  380. if (!isThin)
  381. return isThin.takeError();
  382. assert(isThin.get());
  383. Expected<StringRef> NameOrErr = getName();
  384. if (!NameOrErr)
  385. return NameOrErr.takeError();
  386. StringRef Name = *NameOrErr;
  387. if (sys::path::is_absolute(Name))
  388. return std::string(Name);
  389. SmallString<128> FullName = sys::path::parent_path(
  390. Parent->getMemoryBufferRef().getBufferIdentifier());
  391. sys::path::append(FullName, Name);
  392. return std::string(FullName.str());
  393. }
  394. Expected<StringRef> Archive::Child::getBuffer() const {
  395. Expected<bool> isThinOrErr = isThinMember();
  396. if (!isThinOrErr)
  397. return isThinOrErr.takeError();
  398. bool isThin = isThinOrErr.get();
  399. if (!isThin) {
  400. Expected<uint64_t> Size = getSize();
  401. if (!Size)
  402. return Size.takeError();
  403. return StringRef(Data.data() + StartOfFile, Size.get());
  404. }
  405. Expected<std::string> FullNameOrErr = getFullName();
  406. if (!FullNameOrErr)
  407. return FullNameOrErr.takeError();
  408. const std::string &FullName = *FullNameOrErr;
  409. ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
  410. if (std::error_code EC = Buf.getError())
  411. return errorCodeToError(EC);
  412. Parent->ThinBuffers.push_back(std::move(*Buf));
  413. return Parent->ThinBuffers.back()->getBuffer();
  414. }
  415. Expected<Archive::Child> Archive::Child::getNext() const {
  416. size_t SpaceToSkip = Data.size();
  417. // If it's odd, add 1 to make it even.
  418. if (SpaceToSkip & 1)
  419. ++SpaceToSkip;
  420. const char *NextLoc = Data.data() + SpaceToSkip;
  421. // Check to see if this is at the end of the archive.
  422. if (NextLoc == Parent->Data.getBufferEnd())
  423. return Child(nullptr, nullptr, nullptr);
  424. // Check to see if this is past the end of the archive.
  425. if (NextLoc > Parent->Data.getBufferEnd()) {
  426. std::string Msg("offset to next archive member past the end of the archive "
  427. "after member ");
  428. Expected<StringRef> NameOrErr = getName();
  429. if (!NameOrErr) {
  430. consumeError(NameOrErr.takeError());
  431. uint64_t Offset = Data.data() - Parent->getData().data();
  432. return malformedError(Msg + "at offset " + Twine(Offset));
  433. } else
  434. return malformedError(Msg + NameOrErr.get());
  435. }
  436. Error Err = Error::success();
  437. Child Ret(Parent, NextLoc, &Err);
  438. if (Err)
  439. return std::move(Err);
  440. return Ret;
  441. }
  442. uint64_t Archive::Child::getChildOffset() const {
  443. const char *a = Parent->Data.getBuffer().data();
  444. const char *c = Data.data();
  445. uint64_t offset = c - a;
  446. return offset;
  447. }
  448. Expected<StringRef> Archive::Child::getName() const {
  449. Expected<uint64_t> RawSizeOrErr = getRawSize();
  450. if (!RawSizeOrErr)
  451. return RawSizeOrErr.takeError();
  452. uint64_t RawSize = RawSizeOrErr.get();
  453. Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
  454. if (!NameOrErr)
  455. return NameOrErr.takeError();
  456. StringRef Name = NameOrErr.get();
  457. return Name;
  458. }
  459. Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
  460. Expected<StringRef> NameOrErr = getName();
  461. if (!NameOrErr)
  462. return NameOrErr.takeError();
  463. StringRef Name = NameOrErr.get();
  464. Expected<StringRef> Buf = getBuffer();
  465. if (!Buf)
  466. return createFileError(Name, Buf.takeError());
  467. return MemoryBufferRef(*Buf, Name);
  468. }
  469. Expected<std::unique_ptr<Binary>>
  470. Archive::Child::getAsBinary(LLVMContext *Context) const {
  471. Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
  472. if (!BuffOrErr)
  473. return BuffOrErr.takeError();
  474. auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
  475. if (BinaryOrErr)
  476. return std::move(*BinaryOrErr);
  477. return BinaryOrErr.takeError();
  478. }
  479. Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
  480. Error Err = Error::success();
  481. std::unique_ptr<Archive> Ret(new Archive(Source, Err));
  482. if (Err)
  483. return std::move(Err);
  484. return std::move(Ret);
  485. }
  486. void Archive::setFirstRegular(const Child &C) {
  487. FirstRegularData = C.Data;
  488. FirstRegularStartOfFile = C.StartOfFile;
  489. }
  490. Archive::Archive(MemoryBufferRef Source, Error &Err)
  491. : Binary(Binary::ID_Archive, Source) {
  492. ErrorAsOutParameter ErrAsOutParam(&Err);
  493. StringRef Buffer = Data.getBuffer();
  494. // Check for sufficient magic.
  495. if (Buffer.startswith(ThinMagic)) {
  496. IsThin = true;
  497. } else if (Buffer.startswith(Magic)) {
  498. IsThin = false;
  499. } else {
  500. Err = make_error<GenericBinaryError>("file too small to be an archive",
  501. object_error::invalid_file_type);
  502. return;
  503. }
  504. // Make sure Format is initialized before any call to
  505. // ArchiveMemberHeader::getName() is made. This could be a valid empty
  506. // archive which is the same in all formats. So claiming it to be gnu to is
  507. // fine if not totally correct before we look for a string table or table of
  508. // contents.
  509. Format = K_GNU;
  510. // Get the special members.
  511. child_iterator I = child_begin(Err, false);
  512. if (Err)
  513. return;
  514. child_iterator E = child_end();
  515. // See if this is a valid empty archive and if so return.
  516. if (I == E) {
  517. Err = Error::success();
  518. return;
  519. }
  520. const Child *C = &*I;
  521. auto Increment = [&]() {
  522. ++I;
  523. if (Err)
  524. return true;
  525. C = &*I;
  526. return false;
  527. };
  528. Expected<StringRef> NameOrErr = C->getRawName();
  529. if (!NameOrErr) {
  530. Err = NameOrErr.takeError();
  531. return;
  532. }
  533. StringRef Name = NameOrErr.get();
  534. // Below is the pattern that is used to figure out the archive format
  535. // GNU archive format
  536. // First member : / (may exist, if it exists, points to the symbol table )
  537. // Second member : // (may exist, if it exists, points to the string table)
  538. // Note : The string table is used if the filename exceeds 15 characters
  539. // BSD archive format
  540. // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
  541. // There is no string table, if the filename exceeds 15 characters or has a
  542. // embedded space, the filename has #1/<size>, The size represents the size
  543. // of the filename that needs to be read after the archive header
  544. // COFF archive format
  545. // First member : /
  546. // Second member : / (provides a directory of symbols)
  547. // Third member : // (may exist, if it exists, contains the string table)
  548. // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
  549. // even if the string table is empty. However, lib.exe does not in fact
  550. // seem to create the third member if there's no member whose filename
  551. // exceeds 15 characters. So the third member is optional.
  552. if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
  553. if (Name == "__.SYMDEF")
  554. Format = K_BSD;
  555. else // Name == "__.SYMDEF_64"
  556. Format = K_DARWIN64;
  557. // We know that the symbol table is not an external file, but we still must
  558. // check any Expected<> return value.
  559. Expected<StringRef> BufOrErr = C->getBuffer();
  560. if (!BufOrErr) {
  561. Err = BufOrErr.takeError();
  562. return;
  563. }
  564. SymbolTable = BufOrErr.get();
  565. if (Increment())
  566. return;
  567. setFirstRegular(*C);
  568. Err = Error::success();
  569. return;
  570. }
  571. if (Name.startswith("#1/")) {
  572. Format = K_BSD;
  573. // We know this is BSD, so getName will work since there is no string table.
  574. Expected<StringRef> NameOrErr = C->getName();
  575. if (!NameOrErr) {
  576. Err = NameOrErr.takeError();
  577. return;
  578. }
  579. Name = NameOrErr.get();
  580. if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
  581. // We know that the symbol table is not an external file, but we still
  582. // must check any Expected<> return value.
  583. Expected<StringRef> BufOrErr = C->getBuffer();
  584. if (!BufOrErr) {
  585. Err = BufOrErr.takeError();
  586. return;
  587. }
  588. SymbolTable = BufOrErr.get();
  589. if (Increment())
  590. return;
  591. }
  592. else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
  593. Format = K_DARWIN64;
  594. // We know that the symbol table is not an external file, but we still
  595. // must check any Expected<> return value.
  596. Expected<StringRef> BufOrErr = C->getBuffer();
  597. if (!BufOrErr) {
  598. Err = BufOrErr.takeError();
  599. return;
  600. }
  601. SymbolTable = BufOrErr.get();
  602. if (Increment())
  603. return;
  604. }
  605. setFirstRegular(*C);
  606. return;
  607. }
  608. // MIPS 64-bit ELF archives use a special format of a symbol table.
  609. // This format is marked by `ar_name` field equals to "/SYM64/".
  610. // For detailed description see page 96 in the following document:
  611. // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
  612. bool has64SymTable = false;
  613. if (Name == "/" || Name == "/SYM64/") {
  614. // We know that the symbol table is not an external file, but we still
  615. // must check any Expected<> return value.
  616. Expected<StringRef> BufOrErr = C->getBuffer();
  617. if (!BufOrErr) {
  618. Err = BufOrErr.takeError();
  619. return;
  620. }
  621. SymbolTable = BufOrErr.get();
  622. if (Name == "/SYM64/")
  623. has64SymTable = true;
  624. if (Increment())
  625. return;
  626. if (I == E) {
  627. Err = Error::success();
  628. return;
  629. }
  630. Expected<StringRef> NameOrErr = C->getRawName();
  631. if (!NameOrErr) {
  632. Err = NameOrErr.takeError();
  633. return;
  634. }
  635. Name = NameOrErr.get();
  636. }
  637. if (Name == "//") {
  638. Format = has64SymTable ? K_GNU64 : K_GNU;
  639. // The string table is never an external member, but we still
  640. // must check any Expected<> return value.
  641. Expected<StringRef> BufOrErr = C->getBuffer();
  642. if (!BufOrErr) {
  643. Err = BufOrErr.takeError();
  644. return;
  645. }
  646. StringTable = BufOrErr.get();
  647. if (Increment())
  648. return;
  649. setFirstRegular(*C);
  650. Err = Error::success();
  651. return;
  652. }
  653. if (Name[0] != '/') {
  654. Format = has64SymTable ? K_GNU64 : K_GNU;
  655. setFirstRegular(*C);
  656. Err = Error::success();
  657. return;
  658. }
  659. if (Name != "/") {
  660. Err = errorCodeToError(object_error::parse_failed);
  661. return;
  662. }
  663. Format = K_COFF;
  664. // We know that the symbol table is not an external file, but we still
  665. // must check any Expected<> return value.
  666. Expected<StringRef> BufOrErr = C->getBuffer();
  667. if (!BufOrErr) {
  668. Err = BufOrErr.takeError();
  669. return;
  670. }
  671. SymbolTable = BufOrErr.get();
  672. if (Increment())
  673. return;
  674. if (I == E) {
  675. setFirstRegular(*C);
  676. Err = Error::success();
  677. return;
  678. }
  679. NameOrErr = C->getRawName();
  680. if (!NameOrErr) {
  681. Err = NameOrErr.takeError();
  682. return;
  683. }
  684. Name = NameOrErr.get();
  685. if (Name == "//") {
  686. // The string table is never an external member, but we still
  687. // must check any Expected<> return value.
  688. Expected<StringRef> BufOrErr = C->getBuffer();
  689. if (!BufOrErr) {
  690. Err = BufOrErr.takeError();
  691. return;
  692. }
  693. StringTable = BufOrErr.get();
  694. if (Increment())
  695. return;
  696. }
  697. setFirstRegular(*C);
  698. Err = Error::success();
  699. }
  700. Archive::child_iterator Archive::child_begin(Error &Err,
  701. bool SkipInternal) const {
  702. if (isEmpty())
  703. return child_end();
  704. if (SkipInternal)
  705. return child_iterator::itr(
  706. Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
  707. const char *Loc = Data.getBufferStart() + strlen(Magic);
  708. Child C(this, Loc, &Err);
  709. if (Err)
  710. return child_end();
  711. return child_iterator::itr(C, Err);
  712. }
  713. Archive::child_iterator Archive::child_end() const {
  714. return child_iterator::end(Child(nullptr, nullptr, nullptr));
  715. }
  716. StringRef Archive::Symbol::getName() const {
  717. return Parent->getSymbolTable().begin() + StringIndex;
  718. }
  719. Expected<Archive::Child> Archive::Symbol::getMember() const {
  720. const char *Buf = Parent->getSymbolTable().begin();
  721. const char *Offsets = Buf;
  722. if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
  723. Offsets += sizeof(uint64_t);
  724. else
  725. Offsets += sizeof(uint32_t);
  726. uint64_t Offset = 0;
  727. if (Parent->kind() == K_GNU) {
  728. Offset = read32be(Offsets + SymbolIndex * 4);
  729. } else if (Parent->kind() == K_GNU64) {
  730. Offset = read64be(Offsets + SymbolIndex * 8);
  731. } else if (Parent->kind() == K_BSD) {
  732. // The SymbolIndex is an index into the ranlib structs that start at
  733. // Offsets (the first uint32_t is the number of bytes of the ranlib
  734. // structs). The ranlib structs are a pair of uint32_t's the first
  735. // being a string table offset and the second being the offset into
  736. // the archive of the member that defines the symbol. Which is what
  737. // is needed here.
  738. Offset = read32le(Offsets + SymbolIndex * 8 + 4);
  739. } else if (Parent->kind() == K_DARWIN64) {
  740. // The SymbolIndex is an index into the ranlib_64 structs that start at
  741. // Offsets (the first uint64_t is the number of bytes of the ranlib_64
  742. // structs). The ranlib_64 structs are a pair of uint64_t's the first
  743. // being a string table offset and the second being the offset into
  744. // the archive of the member that defines the symbol. Which is what
  745. // is needed here.
  746. Offset = read64le(Offsets + SymbolIndex * 16 + 8);
  747. } else {
  748. // Skip offsets.
  749. uint32_t MemberCount = read32le(Buf);
  750. Buf += MemberCount * 4 + 4;
  751. uint32_t SymbolCount = read32le(Buf);
  752. if (SymbolIndex >= SymbolCount)
  753. return errorCodeToError(object_error::parse_failed);
  754. // Skip SymbolCount to get to the indices table.
  755. const char *Indices = Buf + 4;
  756. // Get the index of the offset in the file member offset table for this
  757. // symbol.
  758. uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
  759. // Subtract 1 since OffsetIndex is 1 based.
  760. --OffsetIndex;
  761. if (OffsetIndex >= MemberCount)
  762. return errorCodeToError(object_error::parse_failed);
  763. Offset = read32le(Offsets + OffsetIndex * 4);
  764. }
  765. const char *Loc = Parent->getData().begin() + Offset;
  766. Error Err = Error::success();
  767. Child C(Parent, Loc, &Err);
  768. if (Err)
  769. return std::move(Err);
  770. return C;
  771. }
  772. Archive::Symbol Archive::Symbol::getNext() const {
  773. Symbol t(*this);
  774. if (Parent->kind() == K_BSD) {
  775. // t.StringIndex is an offset from the start of the __.SYMDEF or
  776. // "__.SYMDEF SORTED" member into the string table for the ranlib
  777. // struct indexed by t.SymbolIndex . To change t.StringIndex to the
  778. // offset in the string table for t.SymbolIndex+1 we subtract the
  779. // its offset from the start of the string table for t.SymbolIndex
  780. // and add the offset of the string table for t.SymbolIndex+1.
  781. // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
  782. // which is the number of bytes of ranlib structs that follow. The ranlib
  783. // structs are a pair of uint32_t's the first being a string table offset
  784. // and the second being the offset into the archive of the member that
  785. // define the symbol. After that the next uint32_t is the byte count of
  786. // the string table followed by the string table.
  787. const char *Buf = Parent->getSymbolTable().begin();
  788. uint32_t RanlibCount = 0;
  789. RanlibCount = read32le(Buf) / 8;
  790. // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
  791. // don't change the t.StringIndex as we don't want to reference a ranlib
  792. // past RanlibCount.
  793. if (t.SymbolIndex + 1 < RanlibCount) {
  794. const char *Ranlibs = Buf + 4;
  795. uint32_t CurRanStrx = 0;
  796. uint32_t NextRanStrx = 0;
  797. CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
  798. NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
  799. t.StringIndex -= CurRanStrx;
  800. t.StringIndex += NextRanStrx;
  801. }
  802. } else {
  803. // Go to one past next null.
  804. t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
  805. }
  806. ++t.SymbolIndex;
  807. return t;
  808. }
  809. Archive::symbol_iterator Archive::symbol_begin() const {
  810. if (!hasSymbolTable())
  811. return symbol_iterator(Symbol(this, 0, 0));
  812. const char *buf = getSymbolTable().begin();
  813. if (kind() == K_GNU) {
  814. uint32_t symbol_count = 0;
  815. symbol_count = read32be(buf);
  816. buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
  817. } else if (kind() == K_GNU64) {
  818. uint64_t symbol_count = read64be(buf);
  819. buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
  820. } else if (kind() == K_BSD) {
  821. // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
  822. // which is the number of bytes of ranlib structs that follow. The ranlib
  823. // structs are a pair of uint32_t's the first being a string table offset
  824. // and the second being the offset into the archive of the member that
  825. // define the symbol. After that the next uint32_t is the byte count of
  826. // the string table followed by the string table.
  827. uint32_t ranlib_count = 0;
  828. ranlib_count = read32le(buf) / 8;
  829. const char *ranlibs = buf + 4;
  830. uint32_t ran_strx = 0;
  831. ran_strx = read32le(ranlibs);
  832. buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
  833. // Skip the byte count of the string table.
  834. buf += sizeof(uint32_t);
  835. buf += ran_strx;
  836. } else if (kind() == K_DARWIN64) {
  837. // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
  838. // which is the number of bytes of ranlib_64 structs that follow. The
  839. // ranlib_64 structs are a pair of uint64_t's the first being a string
  840. // table offset and the second being the offset into the archive of the
  841. // member that define the symbol. After that the next uint64_t is the byte
  842. // count of the string table followed by the string table.
  843. uint64_t ranlib_count = 0;
  844. ranlib_count = read64le(buf) / 16;
  845. const char *ranlibs = buf + 8;
  846. uint64_t ran_strx = 0;
  847. ran_strx = read64le(ranlibs);
  848. buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
  849. // Skip the byte count of the string table.
  850. buf += sizeof(uint64_t);
  851. buf += ran_strx;
  852. } else {
  853. uint32_t member_count = 0;
  854. uint32_t symbol_count = 0;
  855. member_count = read32le(buf);
  856. buf += 4 + (member_count * 4); // Skip offsets.
  857. symbol_count = read32le(buf);
  858. buf += 4 + (symbol_count * 2); // Skip indices.
  859. }
  860. uint32_t string_start_offset = buf - getSymbolTable().begin();
  861. return symbol_iterator(Symbol(this, 0, string_start_offset));
  862. }
  863. Archive::symbol_iterator Archive::symbol_end() const {
  864. return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
  865. }
  866. uint32_t Archive::getNumberOfSymbols() const {
  867. if (!hasSymbolTable())
  868. return 0;
  869. const char *buf = getSymbolTable().begin();
  870. if (kind() == K_GNU)
  871. return read32be(buf);
  872. if (kind() == K_GNU64)
  873. return read64be(buf);
  874. if (kind() == K_BSD)
  875. return read32le(buf) / 8;
  876. if (kind() == K_DARWIN64)
  877. return read64le(buf) / 16;
  878. uint32_t member_count = 0;
  879. member_count = read32le(buf);
  880. buf += 4 + (member_count * 4); // Skip offsets.
  881. return read32le(buf);
  882. }
  883. Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
  884. Archive::symbol_iterator bs = symbol_begin();
  885. Archive::symbol_iterator es = symbol_end();
  886. for (; bs != es; ++bs) {
  887. StringRef SymName = bs->getName();
  888. if (SymName == name) {
  889. if (auto MemberOrErr = bs->getMember())
  890. return Child(*MemberOrErr);
  891. else
  892. return MemberOrErr.takeError();
  893. }
  894. }
  895. return Optional<Child>();
  896. }
  897. // Returns true if archive file contains no member file.
  898. bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
  899. bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }